Merge remote branch 'origin/master' into gallium_draw_llvm

author: Zack Rusin <zackr@vmware.com> 2010-03-15 15:24:38 -0400
committer: Zack Rusin <zackr@vmware.com> 2010-03-15 15:24:38 -0400
commit: 275c4bd3643d773210780cb8d578ca84f2604684 (patch)
tree: 8266edc39d4253ac0f2a0ecd41f560f3d815bb5c /src/mesa/drivers/dri/i965
parent: c5c5cd7132e18f4aad8e73d8ee879f8823c4c1e7 (diff)
parent: d0b35352ed27b1e66785c45ee95a352ed06b47ce (diff)
39 files changed, 2800 insertions, 482 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile
index 7758a792fd..842d4b7aa1 100644
--- a/src/mesa/drivers/dri/i965/Makefile
+++ b/src/mesa/drivers/dri/i965/Makefile
@@ -54,6 +54,7 @@ DRIVER_SOURCES = \
 	brw_gs_emit.c \
 	brw_gs_state.c \
 	brw_misc_state.c \
+	brw_optimize.c \
 	brw_program.c \
 	brw_queryobj.c \
 	brw_sf.c \
@@ -84,11 +85,21 @@ DRIVER_SOURCES = \
 	brw_wm_pass2.c \
 	brw_wm_sampler_state.c \
 	brw_wm_state.c \
-	brw_wm_surface_state.c 
+	brw_wm_surface_state.c \
+	gen6_cc.c \
+	gen6_clip_state.c \
+	gen6_depthstencil.c \
+	gen6_gs_state.c \
+	gen6_sampler_state.c \
+	gen6_scissor_state.c \
+	gen6_sf_state.c \
+	gen6_urb.c \
+	gen6_viewport_state.c \
+	gen6_vs_state.c \
+	gen6_wm_state.c
 
 C_SOURCES = \
 	$(COMMON_SOURCES) \
-	$(MINIGLX_SOURCES) \
 	$(DRIVER_SOURCES)
 
 ASM_SOURCES = 
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 2ca29b7ae1..241193c357 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -150,12 +150,13 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
       MIN2(ctx->Const.FragmentProgram.MaxNativeParameters,
 	   ctx->Const.FragmentProgram.MaxEnvParams);
 
-   if (intel->is_ironlake || intel->is_g4x) {
+   if (intel->is_ironlake || intel->is_g4x || intel->gen >= 6) {
       brw->CMD_VF_STATISTICS = CMD_VF_STATISTICS_GM45;
       brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_GM45;
       brw->has_surface_tile_offset = GL_TRUE;
       brw->has_compr4 = GL_TRUE;
       brw->has_aa_line_parameters = GL_TRUE;
+      brw->has_pln = GL_TRUE;
   } else {
       brw->CMD_VF_STATISTICS = CMD_VF_STATISTICS_965;
       brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_965;
@@ -170,7 +171,7 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
       brw->urb.size = 384;
       brw->vs_max_threads = 32;
       brw->wm_max_threads = 10 * 5;
-   } else {
+   } else if (intel->gen < 6) {
       brw->urb.size = 256;
       brw->vs_max_threads = 16;
       brw->wm_max_threads = 8 * 4;
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 21c4cd38a7..2855c93ea6 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -282,6 +282,9 @@ struct brw_vs_ouput_sizes {
 
 
 enum brw_cache_id {
+   BRW_BLEND_STATE,
+   BRW_DEPTH_STENCIL_STATE,
+   BRW_COLOR_CALC_STATE,
    BRW_CC_VP,
    BRW_CC_UNIT,
    BRW_WM_PROG,
@@ -290,7 +293,7 @@ enum brw_cache_id {
    BRW_WM_UNIT,
    BRW_SF_PROG,
    BRW_SF_VP,
-   BRW_SF_UNIT,
+   BRW_SF_UNIT, /* scissor state on gen6 */
    BRW_VS_UNIT,
    BRW_VS_PROG,
    BRW_GS_UNIT,
@@ -354,6 +357,9 @@ struct brw_tracked_state {
 
 /* Flags for brw->state.cache.
  */
+#define CACHE_NEW_BLEND_STATE            (1<<BRW_BLEND_STATE)
+#define CACHE_NEW_DEPTH_STENCIL_STATE    (1<<BRW_DEPTH_STENCIL_STATE)
+#define CACHE_NEW_COLOR_CALC_STATE       (1<<BRW_COLOR_CALC_STATE)
 #define CACHE_NEW_CC_VP                  (1<<BRW_CC_VP)
 #define CACHE_NEW_CC_UNIT                (1<<BRW_CC_UNIT)
 #define CACHE_NEW_WM_PROG                (1<<BRW_WM_PROG)
@@ -440,6 +446,7 @@ struct brw_context
    GLboolean has_compr4;
    GLboolean has_negative_rhw_bug;
    GLboolean has_aa_line_parameters;
+   GLboolean has_pln;
 ;
    struct {
       struct brw_state_flags dirty;
@@ -538,7 +545,8 @@ struct brw_context
       GLuint nr_sf_entries;
       GLuint nr_cs_entries;
 
-/*       GLuint vs_size; */
+      /* gen6 */
+      GLuint vs_size;
 /*       GLuint gs_size; */
 /*       GLuint clip_size; */
 /*       GLuint sf_size; */
@@ -643,9 +651,16 @@ struct brw_context
 
 
    struct {
+      /* gen4 */
       dri_bo *prog_bo;
-      dri_bo *state_bo;
       dri_bo *vp_bo;
+
+      /* gen6 */
+      dri_bo *blend_state_bo;
+      dri_bo *depth_stencil_state_bo;
+      dri_bo *color_calc_state_bo;
+
+      dri_bo *state_bo;
    } cc;
 
    struct {
diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c
index 6f2ead793d..4e78b08cfe 100644
--- a/src/mesa/drivers/dri/i965/brw_curbe.c
+++ b/src/mesa/drivers/dri/i965/brw_curbe.c
@@ -179,7 +179,6 @@ static GLfloat fixed_plane[6][4] = {
  */
 static void prepare_constant_buffer(struct brw_context *brw)
 {
-   struct intel_context *intel = &brw->intel;
    GLcontext *ctx = &brw->intel.ctx;
    const struct brw_vertex_program *vp =
       brw_vertex_program_const(brw->vertex_program);
@@ -307,7 +306,7 @@ static void prepare_constant_buffer(struct brw_context *brw)
       if (brw->curbe.curbe_bo != NULL &&
 	  brw->curbe.curbe_next_offset + bufsz > brw->curbe.curbe_bo->size)
       {
-	 intel_bo_unmap_gtt_preferred(intel, brw->curbe.curbe_bo);
+	 drm_intel_gem_bo_unmap_gtt(brw->curbe.curbe_bo);
 	 dri_bo_unreference(brw->curbe.curbe_bo);
 	 brw->curbe.curbe_bo = NULL;
       }
@@ -319,7 +318,7 @@ static void prepare_constant_buffer(struct brw_context *brw)
 	 brw->curbe.curbe_bo = dri_bo_alloc(brw->intel.bufmgr, "CURBE",
 					    4096, 1 << 6);
 	 brw->curbe.curbe_next_offset = 0;
-	 intel_bo_map_gtt_preferred(intel, brw->curbe.curbe_bo, GL_TRUE);
+	 drm_intel_gem_bo_map_gtt(brw->curbe.curbe_bo);
       }
 
       brw->curbe.curbe_offset = brw->curbe.curbe_next_offset;
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index ea0d7e05d4..984e56d00c 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -530,6 +530,7 @@
 #define BRW_OPCODE_POP        47
 #define BRW_OPCODE_WAIT       48
 #define BRW_OPCODE_SEND       49
+#define BRW_OPCODE_MATH       56
 #define BRW_OPCODE_ADD        64
 #define BRW_OPCODE_MUL        65
 #define BRW_OPCODE_AVG        66
@@ -549,6 +550,7 @@
 #define BRW_OPCODE_DP2        87
 #define BRW_OPCODE_DPA2       88
 #define BRW_OPCODE_LINE       89
+#define BRW_OPCODE_PLN        90
 #define BRW_OPCODE_NOP        126
 
 #define BRW_PREDICATE_NONE             0
@@ -727,7 +729,8 @@
 #define BRW_MATH_FUNCTION_SIN                              6 /* was 7 */
 #define BRW_MATH_FUNCTION_COS                              7 /* was 8 */
 #define BRW_MATH_FUNCTION_SINCOS                           8 /* was 6 */
-#define BRW_MATH_FUNCTION_TAN                              9
+#define BRW_MATH_FUNCTION_TAN                              9 /* gen4 */
+#define BRW_MATH_FUNCTION_FDIV                             9 /* gen6+ */
 #define BRW_MATH_FUNCTION_POW                              10
 #define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER   11
 #define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT                 12
@@ -778,17 +781,33 @@
 
 #define CMD_PIPELINED_STATE_POINTERS  0x7800
 #define CMD_BINDING_TABLE_PTRS        0x7801
+# define GEN6_BINDING_TABLE_MODIFY_VS	(1 << 8)
+# define GEN6_BINDING_TABLE_MODIFY_GS	(1 << 9)
+# define GEN6_BINDING_TABLE_MODIFY_PS	(1 << 10)
+
+#define CMD_3D_SAMPLER_STATE_POINTERS			0x7802 /* SNB+ */
+# define PS_SAMPLER_STATE_CHANGE				(1 << 12)
+# define GS_SAMPLER_STATE_CHANGE				(1 << 9)
+# define VS_SAMPLER_STATE_CHANGE				(1 << 8)
+/* DW1: VS */
+/* DW2: GS */
+/* DW3: PS */
 
 #define CMD_VERTEX_BUFFER             0x7808
 # define BRW_VB0_INDEX_SHIFT		27
+# define GEN6_VB0_INDEX_SHIFT		26
 # define BRW_VB0_ACCESS_VERTEXDATA	(0 << 26)
 # define BRW_VB0_ACCESS_INSTANCEDATA	(1 << 26)
+# define GEN6_VB0_ACCESS_VERTEXDATA	(0 << 20)
+# define GEN6_VB0_ACCESS_INSTANCEDATA	(1 << 20)
 # define BRW_VB0_PITCH_SHIFT		0
 
 #define CMD_VERTEX_ELEMENT            0x7809
 # define BRW_VE0_INDEX_SHIFT		27
+# define GEN6_VE0_INDEX_SHIFT		26
 # define BRW_VE0_FORMAT_SHIFT		16
 # define BRW_VE0_VALID			(1 << 26)
+# define GEN6_VE0_VALID			(1 << 25)
 # define BRW_VE0_SRC_OFFSET_SHIFT	0
 # define BRW_VE1_COMPONENT_NOSTORE	0
 # define BRW_VE1_COMPONENT_STORE_SRC	1
@@ -805,8 +824,219 @@
 # define BRW_VE1_DST_OFFSET_SHIFT	0
 
 #define CMD_INDEX_BUFFER              0x780a
-#define CMD_VF_STATISTICS_965         0x780b
+#define CMD_VF_STATISTICS_965          0x780b
 #define CMD_VF_STATISTICS_GM45        0x680b
+#define CMD_3D_CC_STATE_POINTERS      0x780e /* GEN6+ */
+
+#define CMD_URB					0x7805 /* GEN6+ */
+# define GEN6_URB_VS_SIZE_SHIFT				16
+# define GEN6_URB_VS_ENTRIES_SHIFT			0
+# define GEN6_URB_GS_SIZE_SHIFT				8
+# define GEN6_URB_GS_ENTRIES_SHIFT			0
+
+#define CMD_VIEWPORT_STATE_POINTERS			0x780d /* GEN6+ */
+# define GEN6_CC_VIEWPORT_MODIFY			(1 << 12)
+# define GEN6_SF_VIEWPORT_MODIFY			(1 << 11)
+# define GEN6_CLIP_VIEWPORT_MODIFY			(1 << 10)
+
+#define CMD_3D_SCISSOR_STATE_POINTERS		0x780f /* GEN6+ */
+
+#define CMD_3D_VS_STATE		      0x7810 /* GEN6+ */
+/* DW2 */
+# define GEN6_VS_SPF_MODE				(1 << 31)
+# define GEN6_VS_VECTOR_MASK_ENABLE			(1 << 30)
+# define GEN6_VS_SAMPLER_COUNT_SHIFT			27
+# define GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT	18
+/* DW4 */
+# define GEN6_VS_DISPATCH_START_GRF_SHIFT		20
+# define GEN6_VS_URB_READ_LENGTH_SHIFT			11
+# define GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT		4
+/* DW5 */
+# define GEN6_VS_MAX_THREADS_SHIFT			25
+# define GEN6_VS_STATISTICS_ENABLE			(1 << 10)
+# define GEN6_VS_CACHE_DISABLE				(1 << 1)
+# define GEN6_VS_ENABLE					(1 << 0)
+
+#define CMD_3D_GS_STATE		      0x7811 /* GEN6+ */
+/* DW2 */
+# define GEN6_GS_SPF_MODE				(1 << 31)
+# define GEN6_GS_VECTOR_MASK_ENABLE			(1 << 30)
+# define GEN6_GS_SAMPLER_COUNT_SHIFT			27
+# define GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT	18
+/* DW4 */
+# define GEN6_GS_URB_READ_LENGTH_SHIFT			11
+# define GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT		4
+# define GEN6_GS_DISPATCH_START_GRF_SHIFT		0
+/* DW5 */
+# define GEN6_GS_MAX_THREADS_SHIFT			25
+# define GEN6_GS_STATISTICS_ENABLE			(1 << 10)
+# define GEN6_GS_SO_STATISTICS_ENABLE			(1 << 9)
+# define GEN6_GS_RENDERING_ENABLE			(1 << 8)
+/* DW6 */
+# define GEN6_GS_ENABLE					(1 << 15)
+
+#define CMD_3D_CLIP_STATE		      0x7812 /* GEN6+ */
+/* DW1 */
+# define GEN6_CLIP_STATISTICS_ENABLE			(1 << 10)
+/* DW2 */
+# define GEN6_CLIP_ENABLE				(1 << 31)
+# define GEN6_CLIP_API_OGL				(0 << 30)
+# define GEN6_CLIP_API_D3D				(1 << 30)
+# define GEN6_CLIP_XY_TEST				(1 << 28)
+# define GEN6_CLIP_Z_TEST				(1 << 27)
+# define GEN6_CLIP_GB_TEST				(1 << 26)
+# define GEN6_CLIP_MODE_NORMAL				(0 << 13)
+# define GEN6_CLIP_MODE_REJECT_ALL			(3 << 13)
+# define GEN6_CLIP_MODE_ACCEPT_ALL			(4 << 13)
+# define GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE		(1 << 9)
+# define GEN6_CLIP_BARYCENTRIC_ENABLE			(1 << 8)
+# define GEN6_CLIP_TRI_PROVOKE_SHIFT			4
+# define GEN6_CLIP_LINE_PROVOKE_SHIFT			2
+# define GEN6_CLIP_TRIFAN_PROVOKE_SHIFT			0
+/* DW3 */
+# define GEN6_CLIP_MIN_POINT_WIDTH_SHIFT		17
+# define GEN6_CLIP_MAX_POINT_WIDTH_SHIFT		6
+
+#define CMD_3D_SF_STATE				0x7813 /* GEN6+ */
+/* DW1 */
+# define GEN6_SF_NUM_OUTPUTS_SHIFT			22
+# define GEN6_SF_SWIZZLE_ENABLE				(1 << 21)
+# define GEN6_SF_POINT_SPRITE_LOWERLEFT			(1 << 20)
+# define GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT		11
+# define GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT		4
+/* DW2 */
+# define GEN6_SF_LEGACY_GLOBAL_DEPTH_BIAS		(1 << 11)
+# define GEN6_SF_STATISTICS_ENABLE			(1 << 10)
+# define GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID		(1 << 9)
+# define GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME		(1 << 8)
+# define GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT		(1 << 7)
+# define GEN6_SF_FRONT_SOLID				(0 << 5)
+# define GEN6_SF_FRONT_WIREFRAME			(1 << 5)
+# define GEN6_SF_FRONT_POINT				(2 << 5)
+# define GEN6_SF_BACK_SOLID				(0 << 3)
+# define GEN6_SF_BACK_WIREFRAME				(1 << 3)
+# define GEN6_SF_BACK_POINT				(2 << 3)
+# define GEN6_SF_VIEWPORT_TRANSFORM_ENABLE		(1 << 1)
+# define GEN6_SF_WINDING_CCW				(1 << 0)
+/* DW3 */
+# define GEN6_SF_LINE_AA_ENABLE				(1 << 31)
+# define GEN6_SF_CULL_BOTH				(0 << 29)
+# define GEN6_SF_CULL_NONE				(1 << 29)
+# define GEN6_SF_CULL_FRONT				(2 << 29)
+# define GEN6_SF_CULL_BACK				(3 << 29)
+# define GEN6_SF_LINE_WIDTH_SHIFT			18 /* U3.7 */
+# define GEN6_SF_LINE_END_CAP_WIDTH_0_5			(0 << 16)
+# define GEN6_SF_LINE_END_CAP_WIDTH_1_0			(1 << 16)
+# define GEN6_SF_LINE_END_CAP_WIDTH_2_0			(2 << 16)
+# define GEN6_SF_LINE_END_CAP_WIDTH_4_0			(3 << 16)
+# define GEN6_SF_SCISSOR_ENABLE				(1 << 11)
+# define GEN6_SF_MSRAST_OFF_PIXEL			(0 << 8)
+# define GEN6_SF_MSRAST_OFF_PATTERN			(1 << 8)
+# define GEN6_SF_MSRAST_ON_PIXEL			(2 << 8)
+# define GEN6_SF_MSRAST_ON_PATTERN			(3 << 8)
+/* DW4 */
+# define GEN6_SF_TRI_PROVOKE_SHIFT			29
+# define GEN6_SF_LINE_PROVOKE_SHIFT			27
+# define GEN6_SF_TRIFAN_PROVOKE_SHIFT			25
+# define GEN6_SF_LINE_AA_MODE_MANHATTAN			(0 << 14)
+# define GEN6_SF_LINE_AA_MODE_TRUE			(1 << 14)
+# define GEN6_SF_VERTEX_SUBPIXEL_8BITS			(0 << 12)
+# define GEN6_SF_VERTEX_SUBPIXEL_4BITS			(1 << 12)
+# define GEN6_SF_USE_STATE_POINT_WIDTH			(1 << 11)
+# define GEN6_SF_POINT_WIDTH_SHIFT			0 /* U8.3 */
+/* DW5: depth offset constant */
+/* DW6: depth offset scale */
+/* DW7: depth offset clamp */
+/* DW8 */
+# define ATTRIBUTE_1_OVERRIDE_W				(1 << 31)
+# define ATTRIBUTE_1_OVERRIDE_Z				(1 << 30)
+# define ATTRIBUTE_1_OVERRIDE_Y				(1 << 29)
+# define ATTRIBUTE_1_OVERRIDE_X				(1 << 28)
+# define ATTRIBUTE_1_CONST_SOURCE_SHIFT			25
+# define ATTRIBUTE_1_SWIZZLE_SHIFT			22
+# define ATTRIBUTE_1_SOURCE_SHIFT			16
+# define ATTRIBUTE_0_OVERRIDE_W				(1 << 15)
+# define ATTRIBUTE_0_OVERRIDE_Z				(1 << 14)
+# define ATTRIBUTE_0_OVERRIDE_Y				(1 << 13)
+# define ATTRIBUTE_0_OVERRIDE_X				(1 << 12)
+# define ATTRIBUTE_0_CONST_SOURCE_SHIFT			9
+# define ATTRIBUTE_0_SWIZZLE_SHIFT			6
+# define ATTRIBUTE_0_SOURCE_SHIFT			0
+/* DW16: Point sprite texture coordinate enables */
+/* DW17: Constant interpolation enables */
+/* DW18: attr 0-7 wrap shortest enables */
+/* DW19: attr 8-16 wrap shortest enables */
+
+#define CMD_3D_WM_STATE		      0x7814 /* GEN6+ */
+/* DW1: kernel pointer */
+/* DW2 */
+# define GEN6_WM_SPF_MODE				(1 << 31)
+# define GEN6_WM_VECTOR_MASK_ENABLE			(1 << 30)
+# define GEN6_WM_SAMPLER_COUNT_SHIFT			27
+# define GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT	18
+/* DW3: scratch space */
+/* DW4 */
+# define GEN6_WM_STATISTICS_ENABLE			(1 << 31)
+# define GEN6_WM_DEPTH_CLEAR				(1 << 30)
+# define GEN6_WM_DEPTH_RESOLVE				(1 << 28)
+# define GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE		(1 << 27)
+# define GEN6_WM_DISPATCH_START_GRF_SHIFT_0		16
+# define GEN6_WM_DISPATCH_START_GRF_SHIFT_1		8
+# define GEN6_WM_DISPATCH_START_GRF_SHIFT_2		0
+/* DW5 */
+# define GEN6_WM_MAX_THREADS_SHIFT			25
+# define GEN6_WM_KILL_ENABLE				(1 << 22)
+# define GEN6_WM_COMPUTED_DEPTH				(1 << 21)
+# define GEN6_WM_USES_SOURCE_DEPTH			(1 << 20)
+# define GEN6_WM_DISPATCH_ENABLE			(1 << 19)
+# define GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5		(0 << 16)
+# define GEN6_WM_LINE_END_CAP_AA_WIDTH_1_0		(1 << 16)
+# define GEN6_WM_LINE_END_CAP_AA_WIDTH_2_0		(2 << 16)
+# define GEN6_WM_LINE_END_CAP_AA_WIDTH_4_0		(3 << 16)
+# define GEN6_WM_LINE_AA_WIDTH_0_5			(0 << 14)
+# define GEN6_WM_LINE_AA_WIDTH_1_0			(1 << 14)
+# define GEN6_WM_LINE_AA_WIDTH_2_0			(2 << 14)
+# define GEN6_WM_LINE_AA_WIDTH_4_0			(3 << 14)
+# define GEN6_WM_POLYGON_STIPPLE_ENABLE			(1 << 13)
+# define GEN6_WM_LINE_STIPPLE_ENABLE			(1 << 12)
+# define GEN6_WM_OMASK_TO_RENDER_TARGET			(1 << 9)
+# define GEN6_WM_USES_SOURCE_W				(1 << 8)
+# define GEN6_WM_DUAL_SOURCE_BLEND_ENABLE		(1 << 7)
+# define GEN6_WM_32_DISPATCH_ENABLE			(1 << 2)
+# define GEN6_WM_16_DISPATCH_ENABLE			(1 << 1)
+# define GEN6_WM_8_DISPATCH_ENABLE			(1 << 0)
+/* DW6 */
+# define GEN6_WM_NUM_SF_OUTPUTS_SHIFT			20
+# define GEN6_WM_POSOFFSET_NONE				(0 << 18)
+# define GEN6_WM_POSOFFSET_CENTROID			(2 << 18)
+# define GEN6_WM_POSOFFSET_SAMPLE			(3 << 18)
+# define GEN6_WM_POSITION_ZW_PIXEL			(0 << 16)
+# define GEN6_WM_POSITION_ZW_CENTROID			(2 << 16)
+# define GEN6_WM_POSITION_ZW_SAMPLE			(3 << 16)
+# define GEN6_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC	(1 << 15)
+# define GEN6_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC	(1 << 14)
+# define GEN6_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC	(1 << 13)
+# define GEN6_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC		(1 << 12)
+# define GEN6_WM_PERSPECTIVE_CENTROID_BARYCENTRIC	(1 << 11)
+# define GEN6_WM_PERSPECTIVE_PIXEL_BARYCENTRIC		(1 << 10)
+# define GEN6_WM_POINT_RASTRULE_UPPER_RIGHT		(1 << 9)
+# define GEN6_WM_MSRAST_OFF_PIXEL			(0 << 1)
+# define GEN6_WM_MSRAST_OFF_PATTERN			(1 << 1)
+# define GEN6_WM_MSRAST_ON_PIXEL			(2 << 1)
+# define GEN6_WM_MSRAST_ON_PATTERN			(3 << 1)
+# define GEN6_WM_MSDISPMODE_PERPIXEL			(1 << 0)
+/* DW7: kernel 1 pointer */
+/* DW8: kernel 2 pointer */
+
+#define CMD_3D_CONSTANT_VS_STATE	      0x7815 /* GEN6+ */
+#define CMD_3D_CONSTANT_GS_STATE	      0x7816 /* GEN6+ */
+#define CMD_3D_CONSTANT_PS_STATE	      0x7817 /* GEN6+ */
+# define GEN6_CONSTANT_BUFFER_3_ENABLE			(1 << 15)
+# define GEN6_CONSTANT_BUFFER_2_ENABLE			(1 << 14)
+# define GEN6_CONSTANT_BUFFER_1_ENABLE			(1 << 13)
+# define GEN6_CONSTANT_BUFFER_0_ENABLE			(1 << 12)
+
+#define CMD_3D_SAMPLE_MASK			0x7818 /* GEN6+ */
 
 #define CMD_DRAW_RECT                 0x7900
 #define CMD_BLEND_CONSTANT_COLOR      0x7901
@@ -818,6 +1048,25 @@
 #define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909
 #define CMD_AA_LINE_PARAMETERS        0x790a
 
+#define CMD_GS_SVB_INDEX			0x790b /* CTG+ */
+/* DW1 */
+# define SVB_INDEX_SHIFT				29
+# define SVB_LOAD_INTERNAL_VERTEX_COUNT			(1 << 0) /* SNB+ */
+/* DW2: SVB index */
+/* DW3: SVB maximum index */
+
+#define CMD_3D_MULTISAMPLE			0x790d /* SNB+ */
+/* DW1 */
+# define MS_PIXEL_LOCATION_CENTER			(0 << 4)
+# define MS_PIXEL_LOCATION_UPPER_LEFT			(1 << 4)
+# define MS_NUMSAMPLES_1				(0 << 1)
+# define MS_NUMSAMPLES_4				(2 << 1)
+# define MS_NUMSAMPLES_8				(3 << 1)
+
+#define CMD_3D_CLEAR_PARAMS			0x7910 /* ILK+ */
+# define DEPTH_CLEAR_VALID				(1 << 15)
+/* DW1: depth clear value */
+
 #define CMD_PIPE_CONTROL              0x7a00
 
 #define CMD_3D_PRIM                   0x7b00
diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c
index a8f6b993ac..ad61770212 100644
--- a/src/mesa/drivers/dri/i965/brw_disasm.c
+++ b/src/mesa/drivers/dri/i965/brw_disasm.c
@@ -50,6 +50,7 @@ struct {
     [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 },
     [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 },
     [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 },
     [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 },
     [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 },
     [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 },
@@ -73,10 +74,10 @@ struct {
     [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 },
     [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 },
     [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 },
-    [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 1, .ndst = 01 },
-    [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 1, .ndst = 0 },
+    [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 2, .ndst = 0 },
     [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 },
-    [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 1, .ndst = 0 },
+    [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 2, .ndst = 0 },
     [BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 },
     [BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 },
     [BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 },
diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index 106454de4a..71a43577bf 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -276,7 +276,6 @@ copy_array_to_vbo_array( struct brw_context *brw,
 			 struct brw_vertex_element *element,
 			 GLuint dst_stride)
 {
-   struct intel_context *intel = &brw->intel;
    GLuint size = element->count * dst_stride;
 
    get_space(brw, size, &element->bo, &element->offset);
@@ -289,52 +288,26 @@ copy_array_to_vbo_array( struct brw_context *brw,
    }
 
    if (dst_stride == element->glarray->StrideB) {
-      if (intel->intelScreen->kernel_exec_fencing) {
-	 drm_intel_gem_bo_map_gtt(element->bo);
-	 memcpy((char *)element->bo->virtual + element->offset,
-		element->glarray->Ptr, size);
-	 drm_intel_gem_bo_unmap_gtt(element->bo);
-      } else {
-	 dri_bo_subdata(element->bo,
-			element->offset,
-			size,
-			element->glarray->Ptr);
-      }
+      drm_intel_gem_bo_map_gtt(element->bo);
+      memcpy((char *)element->bo->virtual + element->offset,
+	     element->glarray->Ptr, size);
+      drm_intel_gem_bo_unmap_gtt(element->bo);
    } else {
       char *dest;
       const unsigned char *src = element->glarray->Ptr;
       int i;
 
-      if (intel->intelScreen->kernel_exec_fencing) {
-	 drm_intel_gem_bo_map_gtt(element->bo);
-	 dest = element->bo->virtual;
-	 dest += element->offset;
-
-	 for (i = 0; i < element->count; i++) {
-	    memcpy(dest, src, dst_stride);
-	    src += element->glarray->StrideB;
-	    dest += dst_stride;
-	 }
-
-	 drm_intel_gem_bo_unmap_gtt(element->bo);
-      } else {
-	 void *data;
-
-	 data = malloc(dst_stride * element->count);
-	 dest = data;
-	 for (i = 0; i < element->count; i++) {
-	    memcpy(dest, src, dst_stride);
-	    src += element->glarray->StrideB;
-	    dest += dst_stride;
-	 }
+      drm_intel_gem_bo_map_gtt(element->bo);
+      dest = element->bo->virtual;
+      dest += element->offset;
 
-	 dri_bo_subdata(element->bo,
-			element->offset,
-			size,
-			data);
-
-	 free(data);
+      for (i = 0; i < element->count; i++) {
+	 memcpy(dest, src, dst_stride);
+	 src += element->glarray->StrideB;
+	 dest += dst_stride;
       }
+
+      drm_intel_gem_bo_unmap_gtt(element->bo);
    }
 }
 
@@ -503,10 +476,17 @@ static void brw_emit_vertices(struct brw_context *brw)
    if (brw->vb.nr_enabled == 0) {
       BEGIN_BATCH(3);
       OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | 1);
-      OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) |
-		BRW_VE0_VALID |
-		(BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
-		(0 << BRW_VE0_SRC_OFFSET_SHIFT));
+      if (IS_GEN6(intel->intelScreen->deviceID)) {
+	 OUT_BATCH((0 << GEN6_VE0_INDEX_SHIFT) |
+		   GEN6_VE0_VALID |
+		   (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
+		   (0 << BRW_VE0_SRC_OFFSET_SHIFT));
+      } else {
+	 OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) |
+		   BRW_VE0_VALID |
+		   (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
+		   (0 << BRW_VE0_SRC_OFFSET_SHIFT));
+      }
       OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) |
 		(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
 		(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
@@ -527,14 +507,22 @@ static void brw_emit_vertices(struct brw_context *brw)
 
    for (i = 0; i < brw->vb.nr_enabled; i++) {
       struct brw_vertex_element *input = brw->vb.enabled[i];
+      uint32_t dw0;
+
+      if (intel->gen >= 6) {
+	 dw0 = GEN6_VB0_ACCESS_VERTEXDATA |
+	    (i << GEN6_VB0_INDEX_SHIFT);
+      } else {
+	 dw0 = BRW_VB0_ACCESS_VERTEXDATA |
+	    (i << BRW_VB0_INDEX_SHIFT);
+      }
 
-      OUT_BATCH((i << BRW_VB0_INDEX_SHIFT) |
-		BRW_VB0_ACCESS_VERTEXDATA |
+      OUT_BATCH(dw0 |
 		(input->stride << BRW_VB0_PITCH_SHIFT));
       OUT_RELOC(input->bo,
 		I915_GEM_DOMAIN_VERTEX, 0,
 		input->offset);
-      if (intel->is_ironlake) {
+      if (intel->is_ironlake || intel->gen >= 6) {
 	 OUT_RELOC(input->bo,
 		   I915_GEM_DOMAIN_VERTEX, 0,
 		   input->bo->size - 1);
@@ -565,12 +553,19 @@ static void brw_emit_vertices(struct brw_context *brw)
 	 break;
       }
 
-      OUT_BATCH((i << BRW_VE0_INDEX_SHIFT) |
-		BRW_VE0_VALID |
-		(format << BRW_VE0_FORMAT_SHIFT) |
-		(0 << BRW_VE0_SRC_OFFSET_SHIFT));
+      if (IS_GEN6(intel->intelScreen->deviceID)) {
+	 OUT_BATCH((i << GEN6_VE0_INDEX_SHIFT) |
+		   GEN6_VE0_VALID |
+		   (format << BRW_VE0_FORMAT_SHIFT) |
+		   (0 << BRW_VE0_SRC_OFFSET_SHIFT));
+      } else {
+	 OUT_BATCH((i << BRW_VE0_INDEX_SHIFT) |
+		   BRW_VE0_VALID |
+		   (format << BRW_VE0_FORMAT_SHIFT) |
+		   (0 << BRW_VE0_SRC_OFFSET_SHIFT));
+      }
 
-      if (intel->is_ironlake)
+      if (intel->is_ironlake || intel->gen >= 6)
           OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
                     (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
                     (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
@@ -624,13 +619,9 @@ static void brw_prepare_indices(struct brw_context *brw)
 
       /* Straight upload
        */
-      if (intel->intelScreen->kernel_exec_fencing) {
-	 drm_intel_gem_bo_map_gtt(bo);
-	 memcpy((char *)bo->virtual + offset, index_buffer->ptr, ib_size);
-	 drm_intel_gem_bo_unmap_gtt(bo);
-      } else {
-	 dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr);
-      }
+      drm_intel_gem_bo_map_gtt(bo);
+      memcpy((char *)bo->virtual + offset, index_buffer->ptr, ib_size);
+      drm_intel_gem_bo_unmap_gtt(bo);
    } else {
       offset = (GLuint) (unsigned long) index_buffer->ptr;
       brw->ib.start_vertex_offset = 0;
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 39eb88d7c2..4f55158e8f 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -795,6 +795,7 @@ ALU2(DPH)
 ALU2(DP3)
 ALU2(DP2)
 ALU2(LINE)
+ALU2(PLN)
 
 #undef ALU1
 #undef ALU2
@@ -965,4 +966,9 @@ void brw_math_invert( struct brw_compile *p,
 
 void brw_set_src1( struct brw_instruction *insn,
                           struct brw_reg reg );
+
+
+/* brw_optimize.c */
+void brw_optimize(struct brw_compile *p);
+
 #endif
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index b832c7165d..d2395dec28 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -102,8 +102,6 @@ static void brw_set_dest( struct brw_instruction *insn,
 static void brw_set_src0( struct brw_instruction *insn,
                           struct brw_reg reg )
 {
-   assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
-
    if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
       assert(reg.nr < 128);
 
@@ -323,7 +321,7 @@ static void brw_set_urb_message( struct brw_context *brw,
     struct intel_context *intel = &brw->intel;
     brw_set_src1(insn, brw_imm_d(0));
 
-    if (intel->is_ironlake) {
+    if (intel->is_ironlake || intel->gen >= 6) {
         insn->bits3.urb_igdng.opcode = 0;	/* ? */
         insn->bits3.urb_igdng.offset = offset;
         insn->bits3.urb_igdng.swizzle_control = swizzle_control;
@@ -334,8 +332,16 @@ static void brw_set_urb_message( struct brw_context *brw,
         insn->bits3.urb_igdng.response_length = response_length;
         insn->bits3.urb_igdng.msg_length = msg_length;
         insn->bits3.urb_igdng.end_of_thread = end_of_thread;
-        insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB;
-        insn->bits2.send_igdng.end_of_thread = end_of_thread;
+	if (intel->gen >= 6) {
+	   /* For SNB, the SFID bits moved to the condmod bits, and
+	    * EOT stayed in bits3 above.  Does the EOT bit setting
+	    * below on Ironlake even do anything?
+	    */
+	   insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
+	} else {
+	   insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB;
+	   insn->bits2.send_igdng.end_of_thread = end_of_thread;
+	}
     } else {
         insn->bits3.urb.opcode = 0;	/* ? */
         insn->bits3.urb.offset = offset;
@@ -567,7 +573,7 @@ ALU2(DPH)
 ALU2(DP3)
 ALU2(DP2)
 ALU2(LINE)
-
+ALU2(PLN)
 
 
 
@@ -917,26 +923,40 @@ void brw_math( struct brw_compile *p,
 	       GLuint data_type,
 	       GLuint precision )
 {
-   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
-   GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; 
-   GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; 
+   struct intel_context *intel = &p->brw->intel;
 
-   /* Example code doesn't set predicate_control for send
-    * instructions.
-    */
-   insn->header.predicate_control = 0; 
-   insn->header.destreg__conditionalmod = msg_reg_nr;
+   if (intel->gen >= 6) {
+      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
 
-   brw_set_dest(insn, dest);
-   brw_set_src0(insn, src);
-   brw_set_math_message(p->brw,
-			insn, 
-			msg_length, response_length, 
-			function,
-			BRW_MATH_INTEGER_UNSIGNED,
-			precision,
-			saturate,
-			data_type);
+      /* Math is the same ISA format as other opcodes, except that CondModifier
+       * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
+       */
+      insn->header.destreg__conditionalmod = function;
+
+      brw_set_dest(insn, dest);
+      brw_set_src0(insn, src);
+      brw_set_src1(insn, brw_null_reg());
+   } else {
+      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+      GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
+      GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
+      /* Example code doesn't set predicate_control for send
+       * instructions.
+       */
+      insn->header.predicate_control = 0;
+      insn->header.destreg__conditionalmod = msg_reg_nr;
+
+      brw_set_dest(insn, dest);
+      brw_set_src0(insn, src);
+      brw_set_math_message(p->brw,
+			   insn,
+			   msg_length, response_length,
+			   function,
+			   BRW_MATH_INTEGER_UNSIGNED,
+			   precision,
+			   saturate,
+			   data_type);
+   }
 }
 
 /**
@@ -1270,7 +1290,7 @@ void brw_SAMPLE(struct brw_compile *p,
 		GLuint simd_mode)
 {
    GLboolean need_stall = 0;
-   
+
    if (writemask == 0) {
       /*printf("%s: zero writemask??\n", __FUNCTION__); */
       return;
@@ -1307,8 +1327,14 @@ void brw_SAMPLE(struct brw_compile *p,
          /* printf("need stall %x %x\n", newmask , writemask); */
       }
       else {
+	 GLboolean dispatch_16 = GL_FALSE;
+
 	 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
-	 
+
+	 guess_execution_size(p->current, dest);
+	 if (p->current->header.execution_size == BRW_EXECUTE_16)
+	    dispatch_16 = GL_TRUE;
+
 	 newmask = ~newmask & WRITEMASK_XYZW;
 
 	 brw_push_insn_state(p);
@@ -1323,7 +1349,13 @@ void brw_SAMPLE(struct brw_compile *p,
 
   	 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); 
 	 dest = offset(dest, dst_offset);
-	 response_length = len * 2;
+
+	 /* For 16-wide dispatch, masked channels are skipped in the
+	  * response.  For 8-wide, masked channels still take up slots,
+	  * and are just not written to.
+	  */
+	 if (dispatch_16)
+	    response_length = len * 2;
       }
    }
 
@@ -1377,7 +1409,18 @@ void brw_urb_WRITE(struct brw_compile *p,
 		   GLuint offset,
 		   GLuint swizzle)
 {
-   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+   struct intel_context *intel = &p->brw->intel;
+   struct brw_instruction *insn;
+
+   /* Sandybridge doesn't have the implied move for SENDs,
+    * and the first message register index comes from src0.
+    */
+   if (intel->gen >= 6) {
+      brw_MOV(p, brw_message_reg(msg_reg_nr), src0);
+      src0 = brw_message_reg(msg_reg_nr);
+   }
+
+   insn = next_insn(p, BRW_OPCODE_SEND);
 
    assert(msg_length < BRW_MAX_MRF);
 
@@ -1385,7 +1428,8 @@ void brw_urb_WRITE(struct brw_compile *p,
    brw_set_src0(insn, src0);
    brw_set_src1(insn, brw_imm_d(0));
 
-   insn->header.destreg__conditionalmod = msg_reg_nr;
+   if (intel->gen < 6)
+      insn->header.destreg__conditionalmod = msg_reg_nr;
 
    brw_set_urb_message(p->brw,
 		       insn,
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index f708ee0063..d030ed41f4 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -136,6 +136,41 @@ const struct brw_tracked_state brw_binding_table_pointers = {
    .emit = upload_binding_table_pointers,
 };
 
+/**
+ * Upload the binding table pointers, which point each stage's array of surface
+ * state pointers.
+ *
+ * The binding table pointers are relative to the surface state base address,
+ * which is 0.
+ */
+static void upload_gen6_binding_table_pointers(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+
+   BEGIN_BATCH(4);
+   OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 |
+	     GEN6_BINDING_TABLE_MODIFY_VS |
+	     GEN6_BINDING_TABLE_MODIFY_GS |
+	     GEN6_BINDING_TABLE_MODIFY_PS |
+	     (4 - 2));
+   if (brw->vs.bind_bo != NULL)
+      OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */
+   else
+      OUT_BATCH(0);
+   OUT_BATCH(0); /* gs */
+   OUT_RELOC(brw->wm.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* wm/ps */
+   ADVANCE_BATCH();
+}
+
+const struct brw_tracked_state gen6_binding_table_pointers = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_BATCH,
+      .cache = CACHE_NEW_SURF_BIND,
+   },
+   .prepare = prepare_binding_table_pointers,
+   .emit = upload_gen6_binding_table_pointers,
+};
 
 /**
  * Upload pointers to the per-stage state.
@@ -209,7 +244,14 @@ static void emit_depthbuffer(struct brw_context *brw)
 {
    struct intel_context *intel = &brw->intel;
    struct intel_region *region = brw->state.depth_region;
-   unsigned int len = (intel->is_g4x || intel->is_ironlake) ? 6 : 5;
+   unsigned int len;
+
+   if (intel->gen >= 6)
+      len = 7;
+   else if (intel->is_g4x || intel->is_ironlake)
+      len = 6;
+   else
+      len = 5;
 
    if (region == NULL) {
       BEGIN_BATCH(len);
@@ -220,9 +262,12 @@ static void emit_depthbuffer(struct brw_context *brw)
       OUT_BATCH(0);
       OUT_BATCH(0);
 
-      if (intel->is_g4x || intel->is_ironlake)
+      if (intel->is_g4x || intel->is_ironlake || intel->gen >= 6)
          OUT_BATCH(0);
 
+      if (intel->gen >= 6)
+	 OUT_BATCH(0);
+
       ADVANCE_BATCH();
    } else {
       unsigned int format;
@@ -243,6 +288,8 @@ static void emit_depthbuffer(struct brw_context *brw)
       }
 
       assert(region->tiling != I915_TILING_X);
+      if (IS_GEN6(intel->intelScreen->deviceID))
+	 assert(region->tiling != I915_TILING_NONE);
 
       BEGIN_BATCH(len);
       OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2));
@@ -259,9 +306,20 @@ static void emit_depthbuffer(struct brw_context *brw)
 		((region->height - 1) << 19));
       OUT_BATCH(0);
 
-      if (intel->is_g4x || intel->is_ironlake)
+      if (intel->is_g4x || intel->is_ironlake || intel->gen >= 6)
          OUT_BATCH(0);
 
+      if (intel->gen >= 6)
+	 OUT_BATCH(0);
+
+      ADVANCE_BATCH();
+   }
+
+   /* Initialize it for safety. */
+   if (intel->gen >= 6) {
+      BEGIN_BATCH(2);
+      OUT_BATCH(CMD_3D_CLEAR_PARAMS << 16 | (2 - 2));
+      OUT_BATCH(0);
       ADVANCE_BATCH();
    }
 }
@@ -435,6 +493,8 @@ const struct brw_tracked_state brw_line_stipple = {
 
 static void upload_invarient_state( struct brw_context *brw )
 {
+   struct intel_context *intel = &brw->intel;
+
    {
       /* 0x61040000  Pipeline Select */
       /*     PipelineSelect            : 0 */
@@ -446,7 +506,7 @@ static void upload_invarient_state( struct brw_context *brw )
       BRW_BATCH_STRUCT(brw, &ps);
    }
 
-   {
+   if (intel->gen < 6) {
       struct brw_global_depth_offset_clamp gdo;
       memset(&gdo, 0, sizeof(gdo));
 
@@ -459,6 +519,32 @@ static void upload_invarient_state( struct brw_context *brw )
       BRW_BATCH_STRUCT(brw, &gdo);
    }
 
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+
+   if (intel->gen >= 6) {
+      int i;
+
+      BEGIN_BATCH(3);
+      OUT_BATCH(CMD_3D_MULTISAMPLE << 16 | (3 - 2));
+      OUT_BATCH(MS_PIXEL_LOCATION_CENTER |
+		MS_NUMSAMPLES_1);
+      OUT_BATCH(0); /* positions for 4/8-sample */
+      ADVANCE_BATCH();
+
+      BEGIN_BATCH(2);
+      OUT_BATCH(CMD_3D_SAMPLE_MASK << 16 | (2 - 2));
+      OUT_BATCH(1);
+      ADVANCE_BATCH();
+
+      for (i = 0; i < 4; i++) {
+	 BEGIN_BATCH(4);
+	 OUT_BATCH(CMD_GS_SVB_INDEX << 16 | (4 - 2));
+	 OUT_BATCH(i << SVB_INDEX_SHIFT);
+	 OUT_BATCH(0);
+	 OUT_BATCH(0xffffffff);
+	 ADVANCE_BATCH();
+      }
+   }
 
    /* 0x61020000  State Instruction Pointer */
    {
@@ -509,7 +595,20 @@ static void upload_state_base_address( struct brw_context *brw )
    /* Output the structure (brw_state_base_address) directly to the
     * batchbuffer, so we can emit relocations inline.
     */
-   if (intel->is_ironlake) {
+   if (intel->gen >= 6) {
+       BEGIN_BATCH(10);
+       OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
+       OUT_BATCH(1); /* General state base address */
+       OUT_BATCH(1); /* Surface state base address */
+       OUT_BATCH(1); /* Dynamic state base address */
+       OUT_BATCH(1); /* Indirect object base address */
+       OUT_BATCH(1); /* Instruction base address */
+       OUT_BATCH(1); /* General state upper bound */
+       OUT_BATCH(1); /* Dynamic state upper bound */
+       OUT_BATCH(1); /* Indirect object upper bound */
+       OUT_BATCH(1); /* Instruction access upper bound */
+       ADVANCE_BATCH();
+   } else if (intel->is_ironlake) {
        BEGIN_BATCH(8);
        OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2));
        OUT_BATCH(1); /* General state base address */
diff --git a/src/mesa/drivers/dri/i965/brw_optimize.c b/src/mesa/drivers/dri/i965/brw_optimize.c
new file mode 100644
index 0000000000..57df9ea115
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_optimize.c
@@ -0,0 +1,115 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "main/macros.h"
+#include "shader/program.h"
+#include "shader/prog_parameter.h"
+#include "shader/prog_print.h"
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_eu.h"
+
+static GLboolean
+is_single_channel_dp4(struct brw_instruction *insn)
+{
+   if (insn->header.opcode != BRW_OPCODE_DP4 ||
+       insn->header.execution_size != BRW_EXECUTE_8 ||
+       insn->header.access_mode != BRW_ALIGN_16 ||
+       insn->bits1.da1.dest_reg_file != BRW_GENERAL_REGISTER_FILE)
+      return GL_FALSE;
+
+   if (!is_power_of_two(insn->bits1.da16.dest_writemask))
+      return GL_FALSE;
+
+   return GL_TRUE;
+}
+
+/**
+ * Sets the dependency control fields on DP4 instructions.
+ *
+ * The hardware only tracks dependencies on a register basis, so when
+ * you do:
+ *
+ * DP4 dst.x src1 src2
+ * DP4 dst.y src1 src3
+ * DP4 dst.z src1 src4
+ * DP4 dst.w src1 src5
+ *
+ * It will wait to do the DP4 dst.y until the dst.x is resolved, etc.
+ * We can examine our instruction stream and set the dependency
+ * control fields to tell the hardware when to do it.
+ *
+ * We may want to extend this to other instructions that are used to
+ * fill in a channel at a time of the destination register.
+ */
+static void
+brw_set_dp4_dependency_control(struct brw_compile *p)
+{
+   int i;
+
+   for (i = 1; i < p->nr_insn; i++) {
+      struct brw_instruction *insn = &p->store[i];
+      struct brw_instruction *prev = &p->store[i - 1];
+
+      if (!is_single_channel_dp4(prev))
+	 continue;
+
+      if (!is_single_channel_dp4(insn)) {
+	 i++;
+	 continue;
+      }
+
+      /* Only avoid hw dep control if the write masks are different
+       * channels of one reg.
+       */
+      if (insn->bits1.da16.dest_writemask == prev->bits1.da16.dest_writemask)
+	 continue;
+      if (insn->bits1.da16.dest_reg_nr != prev->bits1.da16.dest_reg_nr)
+	 continue;
+
+      /* Check if the second instruction depends on the previous one
+       * for a src.
+       */
+      if (insn->bits1.da1.src0_reg_file == BRW_GENERAL_REGISTER_FILE &&
+	  (insn->bits2.da1.src0_address_mode != BRW_ADDRESS_DIRECT ||
+	   insn->bits2.da1.src0_reg_nr == insn->bits1.da16.dest_reg_nr))
+	  continue;
+      if (insn->bits1.da1.src1_reg_file == BRW_GENERAL_REGISTER_FILE &&
+	  (insn->bits3.da1.src1_address_mode != BRW_ADDRESS_DIRECT ||
+	   insn->bits3.da1.src1_reg_nr == insn->bits1.da16.dest_reg_nr))
+	  continue;
+
+      prev->header.dependency_control |= BRW_DEPENDENCY_NOTCLEARED;
+      insn->header.dependency_control |= BRW_DEPENDENCY_NOTCHECKED;
+   }
+}
+
+void
+brw_optimize(struct brw_compile *p)
+{
+   brw_set_dp4_dependency_control(p);
+}
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index c78f7b38ae..1fd957b3ad 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -95,9 +95,17 @@ static void brwDeleteProgram( GLcontext *ctx,
 			      struct gl_program *prog )
 {
    if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
-      struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog;
-      struct brw_fragment_program *brw_fprog = brw_fragment_program(fprog);
-      dri_bo_unreference(brw_fprog->const_buffer);
+      struct gl_fragment_program *fp = (struct gl_fragment_program *) prog;
+      struct brw_fragment_program *brw_fp = brw_fragment_program(fp);
+
+      dri_bo_unreference(brw_fp->const_buffer);
+   }
+
+   if (prog->Target == GL_VERTEX_PROGRAM_ARB) {
+      struct gl_vertex_program *vp = (struct gl_vertex_program *) prog;
+      struct brw_vertex_program *brw_vp = brw_vertex_program(vp);
+
+      dri_bo_unreference(brw_vp->const_buffer);
    }
 
    _mesa_delete_program( ctx, prog );
diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c
index 8e6839b812..57d1c29ade 100644
--- a/src/mesa/drivers/dri/i965/brw_sf.c
+++ b/src/mesa/drivers/dri/i965/brw_sf.c
@@ -46,7 +46,6 @@
 static void compile_sf_prog( struct brw_context *brw,
 			     struct brw_sf_prog_key *key )
 {
-   GLcontext *ctx = &brw->intel.ctx;
    struct brw_sf_compile c;
    const GLuint *program;
    GLuint program_size;
@@ -69,20 +68,14 @@ static void compile_sf_prog( struct brw_context *brw,
 
    /* Construct map from attribute number to position in the vertex.
     */
-   for (i = idx = 0; i < VERT_RESULT_MAX; i++) 
+   for (i = idx = 0; i < VERT_RESULT_MAX; i++) {
       if (c.key.attrs & BITFIELD64_BIT(i)) {
 	 c.attr_to_idx[i] = idx;
 	 c.idx_to_attr[idx] = i;
-	 if (i >= VERT_RESULT_TEX0 && i <= VERT_RESULT_TEX7) {
-            c.point_attrs[i].CoordReplace = 
-               ctx->Point.CoordReplace[i - VERT_RESULT_TEX0];
-	 }
-         else {
-            c.point_attrs[i].CoordReplace = GL_FALSE;
-         }
 	 idx++;
       }
-   
+   }
+
    /* Which primitive?  Or all three? 
     */
    switch (key->primitive) {
@@ -162,6 +155,14 @@ static void upload_sf_prog(struct brw_context *brw)
    }
 
    key.do_point_sprite = ctx->Point.PointSprite;
+   if (key.do_point_sprite) {
+      int i;
+
+      for (i = 0; i < 8; i++) {
+	 if (ctx->Point.CoordReplace[i])
+	    key.point_sprite_coord_replace |= (1 << i);
+      }
+   }
    key.sprite_origin_lower_left = (ctx->Point.SpriteOrigin == GL_LOWER_LEFT);
    /* _NEW_LIGHT */
    key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT);
diff --git a/src/mesa/drivers/dri/i965/brw_sf.h b/src/mesa/drivers/dri/i965/brw_sf.h
index 0ba731fac9..a0680a56f2 100644
--- a/src/mesa/drivers/dri/i965/brw_sf.h
+++ b/src/mesa/drivers/dri/i965/brw_sf.h
@@ -46,6 +46,7 @@
 
 struct brw_sf_prog_key {
    GLbitfield64 attrs;
+   uint8_t point_sprite_coord_replace;
    GLuint primitive:2;
    GLuint do_twoside_color:1;
    GLuint do_flat_shading:1;
@@ -56,10 +57,6 @@ struct brw_sf_prog_key {
    GLuint pad:24;
 };
 
-struct brw_sf_point_tex {
-   GLboolean CoordReplace;	
-};
-
 struct brw_sf_compile {
    struct brw_compile func;
    struct brw_sf_prog_key key;
@@ -100,7 +97,6 @@ struct brw_sf_compile {
 
    GLubyte attr_to_idx[VERT_RESULT_MAX];   
    GLubyte idx_to_attr[VERT_RESULT_MAX];   
-   struct brw_sf_point_tex point_attrs[VERT_RESULT_MAX];
 };
 
  
diff --git a/src/mesa/drivers/dri/i965/brw_sf_emit.c b/src/mesa/drivers/dri/i965/brw_sf_emit.c
index bb08055e3b..56f7c986e7 100644
--- a/src/mesa/drivers/dri/i965/brw_sf_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_sf_emit.c
@@ -354,6 +354,33 @@ static GLboolean calculate_masks( struct brw_sf_compile *c,
    return is_last_attr;
 }
 
+/* Calculates the predicate control for which channels of a reg
+ * (containing 2 attrs) to do point sprite coordinate replacement on.
+ */
+static uint16_t
+calculate_point_sprite_mask(struct brw_sf_compile *c, GLuint reg)
+{
+   int attr1, attr2;
+   uint16_t pc = 0;
+
+   attr1 = c->idx_to_attr[reg * 2];
+   if (attr1 >= VERT_RESULT_TEX0 && attr1 <= VERT_RESULT_TEX7) {
+      if (c->key.point_sprite_coord_replace & (1 << (attr1 - VERT_RESULT_TEX0)))
+	 pc |= 0x0f;
+   }
+
+   if (reg * 2 + 1 < c->nr_setup_attrs) {
+       attr2 = c->idx_to_attr[reg * 2 + 1];
+       if (attr2 >= VERT_RESULT_TEX0 && attr2 <= VERT_RESULT_TEX7) {
+	  if (c->key.point_sprite_coord_replace & (1 << (attr2 -
+							 VERT_RESULT_TEX0)))
+	     pc |= 0xf0;
+       }
+   }
+
+   return pc;
+}
+
 
 
 void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate)
@@ -529,22 +556,27 @@ void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate)
    copy_z_inv_w(c);
    for (i = 0; i < c->nr_setup_regs; i++)
    {
-      struct brw_sf_point_tex *tex = &c->point_attrs[c->idx_to_attr[2*i]];
       struct brw_reg a0 = offset(c->vert[0], i);
-      GLushort pc, pc_persp, pc_linear;
+      GLushort pc, pc_persp, pc_linear, pc_coord_replace;
       GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
-            
-      if (pc_persp)
-      {				
-	  if (!tex->CoordReplace) {
-	      brw_set_predicate_control_flag_value(p, pc_persp);
-	      brw_MUL(p, a0, a0, c->inv_w[0]);
-	  }
+
+      pc_coord_replace = calculate_point_sprite_mask(c, i);
+      pc_persp &= ~pc_coord_replace;
+
+      if (pc_persp) {
+	 brw_set_predicate_control_flag_value(p, pc_persp);
+	 brw_MUL(p, a0, a0, c->inv_w[0]);
       }
 
-      if (tex->CoordReplace) {
-	  /* Caculate 1.0/PointWidth */
-	  brw_math(&c->func,
+      /* Point sprite coordinate replacement: A texcoord with this
+       * enabled gets replaced with the value (x, y, 0, 1) where x and
+       * y vary from 0 to 1 across the horizontal and vertical of the
+       * point.
+       */
+      if (pc_coord_replace) {
+	 brw_set_predicate_control_flag_value(p, pc_coord_replace);
+	 /* Caculate 1.0/PointWidth */
+	 brw_math(&c->func,
 		  c->tmp,
 		  BRW_MATH_FUNCTION_INV,
 		  BRW_MATH_SATURATE_NONE,
@@ -553,50 +585,51 @@ void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate)
 		  BRW_MATH_DATA_SCALAR,
 		  BRW_MATH_PRECISION_FULL);
 
-	  if (c->key.sprite_origin_lower_left) {
-	   	brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]);
-		brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0));
-	  	brw_MUL(p, c->m2Cy, c->tmp, negate(c->inv_w[0]));
-		brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0));
-	  } else {
-	   	brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]);
-		brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0));
-	  	brw_MUL(p, c->m2Cy, c->tmp, c->inv_w[0]);
-		brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0));
-	  }
-      } else {
-	  brw_MOV(p, c->m1Cx, brw_imm_ud(0));
-	  brw_MOV(p, c->m2Cy, brw_imm_ud(0));
-      }
+	 brw_set_access_mode(p, BRW_ALIGN_16);
 
-      {
-	 brw_set_predicate_control_flag_value(p, pc); 
-	 if (tex->CoordReplace) {
-	     if (c->key.sprite_origin_lower_left) {
-		 brw_MUL(p, c->m3C0, c->inv_w[0], brw_imm_f(1.0));
-		 brw_MOV(p, vec1(suboffset(c->m3C0, 0)), brw_imm_f(0.0));
-	     }
-	     else
-		 brw_MOV(p, c->m3C0, brw_imm_f(0.0));
+	 /* dA/dx, dA/dy */
+	 brw_MOV(p, c->m1Cx, brw_imm_f(0.0));
+	 brw_MOV(p, c->m2Cy, brw_imm_f(0.0));
+	 brw_MOV(p, brw_writemask(c->m1Cx, WRITEMASK_X), c->tmp);
+	 if (c->key.sprite_origin_lower_left) {
+	    brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), negate(c->tmp));
 	 } else {
-	 	brw_MOV(p, c->m3C0, a0); /* constant value */
+	    brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), c->tmp);
 	 }
 
-	 /* Copy m0..m3 to URB. 
-	  */
-	 brw_urb_WRITE(p, 
-		       brw_null_reg(),
-		       0,
-		       brw_vec8_grf(0, 0),
-		       0, 	/* allocate */
-		       1,	/* used */
-		       4, 	/* msg len */
-		       0,	/* response len */
-		       last, 	/* eot */
-		       last, 	/* writes complete */
-		       i*4,	/* urb destination offset */
-		       BRW_URB_SWIZZLE_TRANSPOSE);
+	 /* attribute constant offset */
+	 brw_MOV(p, c->m3C0, brw_imm_f(0.0));
+	 if (c->key.sprite_origin_lower_left) {
+	    brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_YW), brw_imm_f(1.0));
+	 } else {
+	    brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_W), brw_imm_f(1.0));
+	 }
+
+	 brw_set_access_mode(p, BRW_ALIGN_1);
       }
+
+      if (pc & ~pc_coord_replace) {
+	 brw_set_predicate_control_flag_value(p, pc & ~pc_coord_replace);
+	 brw_MOV(p, c->m1Cx, brw_imm_ud(0));
+	 brw_MOV(p, c->m2Cy, brw_imm_ud(0));
+	 brw_MOV(p, c->m3C0, a0); /* constant value */
+      }
+
+
+      brw_set_predicate_control_flag_value(p, pc);
+      /* Copy m0..m3 to URB. */
+      brw_urb_WRITE(p,
+		    brw_null_reg(),
+		    0,
+		    brw_vec8_grf(0, 0),
+		    0, 	/* allocate */
+		    1,	/* used */
+		    4, 	/* msg len */
+		    0,	/* response len */
+		    last, 	/* eot */
+		    last, 	/* writes complete */
+		    i*4,	/* urb destination offset */
+		    BRW_URB_SWIZZLE_TRANSPOSE);
    }
 }
 
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 536fe8b249..f790cfabe2 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -90,6 +90,23 @@ const struct brw_tracked_state brw_drawing_rect;
 const struct brw_tracked_state brw_indices;
 const struct brw_tracked_state brw_vertices;
 const struct brw_tracked_state brw_index_buffer;
+const struct brw_tracked_state gen6_binding_table_pointers;
+const struct brw_tracked_state gen6_blend_state;
+const struct brw_tracked_state gen6_cc_state_pointers;
+const struct brw_tracked_state gen6_cc_vp;
+const struct brw_tracked_state gen6_clip_state;
+const struct brw_tracked_state gen6_clip_vp;
+const struct brw_tracked_state gen6_color_calc_state;
+const struct brw_tracked_state gen6_depth_stencil_state;
+const struct brw_tracked_state gen6_gs_state;
+const struct brw_tracked_state gen6_sampler_state;
+const struct brw_tracked_state gen6_scissor_state;
+const struct brw_tracked_state gen6_sf_state;
+const struct brw_tracked_state gen6_sf_vp;
+const struct brw_tracked_state gen6_urb;
+const struct brw_tracked_state gen6_viewport_state;
+const struct brw_tracked_state gen6_vs_state;
+const struct brw_tracked_state gen6_wm_state;
 
 /**
  * Use same key for WM and VS surfaces.
diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c
index 4bb98d8d5d..c08cb45b75 100644
--- a/src/mesa/drivers/dri/i965/brw_state_cache.c
+++ b/src/mesa/drivers/dri/i965/brw_state_cache.c
@@ -390,6 +390,7 @@ brw_init_non_surface_cache(struct brw_context *brw)
    brw_init_cache_id(cache, "GS_UNIT", BRW_GS_UNIT);
 
    brw_init_cache_id(cache, "GS_PROG", BRW_GS_PROG);
+   brw_init_cache_id(cache, "BLEND_STATE", BRW_BLEND_STATE);
 }
 
 
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 4f477cfc6b..9e54f29f0f 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -35,8 +35,15 @@
 #include "brw_state.h"
 #include "intel_batchbuffer.h"
 #include "intel_buffers.h"
+#include "intel_chipset.h"
 
-static const struct brw_tracked_state *atoms[] =
+/* This is used to initialize brw->state.atoms[].  We could use this
+ * list directly except for a single atom, brw_constant_buffer, which
+ * has a .dirty value which changes according to the parameters of the
+ * current fragment and vertex programs, and so cannot be a static
+ * value.
+ */
+static const struct brw_tracked_state *gen4_atoms[] =
 {
    &brw_check_fallback,
 
@@ -95,6 +102,63 @@ static const struct brw_tracked_state *atoms[] =
    &brw_constant_buffer
 };
 
+const struct brw_tracked_state *gen6_atoms[] =
+{
+   &brw_check_fallback,
+
+   &brw_wm_input_sizes,
+   &brw_vs_prog,
+   &brw_gs_prog,
+   &brw_wm_prog,
+
+   &gen6_clip_vp,
+   &gen6_sf_vp,
+   &gen6_cc_vp,
+
+   /* Command packets: */
+   &brw_invarient_state,
+
+   &gen6_viewport_state,	/* must do after *_vp stages */
+
+   &gen6_urb,
+   &gen6_blend_state,		/* must do before cc unit */
+   &gen6_color_calc_state,	/* must do before cc unit */
+   &gen6_depth_stencil_state,	/* must do before cc unit */
+   &gen6_cc_state_pointers,
+
+   &brw_vs_surfaces,		/* must do before unit */
+   &brw_wm_constant_surface,	/* must do before wm surfaces/bind bo */
+   &brw_wm_surfaces,		/* must do before samplers and unit */
+
+   &brw_wm_samplers,
+   &gen6_sampler_state,
+
+   &gen6_vs_state,
+   &gen6_gs_state,
+   &gen6_clip_state,
+   &gen6_sf_state,
+   &gen6_wm_state,
+
+   &gen6_scissor_state,
+
+   &brw_state_base_address,
+
+   &gen6_binding_table_pointers,
+
+   &brw_depthbuffer,
+
+   &brw_polygon_stipple,
+   &brw_polygon_stipple_offset,
+
+   &brw_line_stipple,
+   &brw_aa_line_parameters,
+
+   &brw_drawing_rect,
+
+   &brw_indices,
+   &brw_index_buffer,
+   &brw_vertices,
+};
 
 void brw_init_state( struct brw_context *brw )
 {
@@ -211,6 +275,7 @@ static struct dirty_bit_map brw_bits[] = {
 };
 
 static struct dirty_bit_map cache_bits[] = {
+   DEFINE_BIT(CACHE_NEW_BLEND_STATE),
    DEFINE_BIT(CACHE_NEW_CC_VP),
    DEFINE_BIT(CACHE_NEW_CC_UNIT),
    DEFINE_BIT(CACHE_NEW_WM_PROG),
@@ -270,6 +335,8 @@ void brw_validate_state( struct brw_context *brw )
    struct intel_context *intel = &brw->intel;
    struct brw_state_flags *state = &brw->state.dirty;
    GLuint i;
+   const struct brw_tracked_state **atoms;
+   int num_atoms;
 
    brw_clear_validated_bos(brw);
 
@@ -278,6 +345,14 @@ void brw_validate_state( struct brw_context *brw )
 
    brw_add_validated_bo(brw, intel->batch->buf);
 
+   if (IS_GEN6(intel->intelScreen->deviceID)) {
+      atoms = gen6_atoms;
+      num_atoms = ARRAY_SIZE(gen6_atoms);
+   } else {
+      atoms = gen4_atoms;
+      num_atoms = ARRAY_SIZE(gen4_atoms);
+   }
+
    if (brw->emit_state_always) {
       state->mesa |= ~0;
       state->brw |= ~0;
@@ -305,7 +380,7 @@ void brw_validate_state( struct brw_context *brw )
    brw->intel.Fallback = GL_FALSE; /* boolean, not bitfield */
 
    /* do prepare stage for all atoms */
-   for (i = 0; i < Elements(atoms); i++) {
+   for (i = 0; i < num_atoms; i++) {
       const struct brw_tracked_state *atom = atoms[i];
 
       if (brw->intel.Fallback)
@@ -337,9 +412,20 @@ void brw_validate_state( struct brw_context *brw )
 
 void brw_upload_state(struct brw_context *brw)
 {
+   struct intel_context *intel = &brw->intel;
    struct brw_state_flags *state = &brw->state.dirty;
    int i;
    static int dirty_count = 0;
+   const struct brw_tracked_state **atoms;
+   int num_atoms;
+
+   if (IS_GEN6(intel->intelScreen->deviceID)) {
+      atoms = gen6_atoms;
+      num_atoms = ARRAY_SIZE(gen6_atoms);
+   } else {
+      atoms = gen4_atoms;
+      num_atoms = ARRAY_SIZE(gen4_atoms);
+   }
 
    brw_clear_validated_bos(brw);
 
@@ -352,7 +438,7 @@ void brw_upload_state(struct brw_context *brw)
       memset(&examined, 0, sizeof(examined));
       prev = *state;
 
-      for (i = 0; i < Elements(atoms); i++) {	 
+      for (i = 0; i < num_atoms; i++) {
 	 const struct brw_tracked_state *atom = atoms[i];
 	 struct brw_state_flags generated;
 
@@ -381,7 +467,7 @@ void brw_upload_state(struct brw_context *brw)
       }
    }
    else {
-      for (i = 0; i < Elements(atoms); i++) {	 
+      for (i = 0; i < num_atoms; i++) {
 	 const struct brw_tracked_state *atom = atoms[i];
 
 	 if (brw->intel.Fallback)
diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h
index 66d4127271..3c2adfc87d 100644
--- a/src/mesa/drivers/dri/i965/brw_structs.h
+++ b/src/mesa/drivers/dri/i965/brw_structs.h
@@ -658,7 +658,105 @@ struct brw_clip_unit_state
    GLfloat viewport_ymax;  
 };
 
+struct gen6_blend_state
+{
+   struct {
+      GLuint dest_blend_factor:5;
+      GLuint source_blend_factor:5;
+      GLuint pad3:1;
+      GLuint blend_func:3;
+      GLuint pad2:1;
+      GLuint ia_dest_blend_factor:5;
+      GLuint ia_source_blend_factor:5;
+      GLuint pad1:1;
+      GLuint ia_blend_func:3;
+      GLuint pad0:1;
+      GLuint ia_blend_enable:1;
+      GLuint blend_enable:1;
+   } blend0;
+
+   struct {
+      GLuint post_blend_clamp_enable:1;
+      GLuint pre_blend_clamp_enable:1;
+      GLuint clamp_range:2;
+      GLuint pad0:4;
+      GLuint x_dither_offset:2;
+      GLuint y_dither_offset:2;
+      GLuint dither_enable:1;
+      GLuint alpha_test_func:3;
+      GLuint alpha_test_enable:1;
+      GLuint pad1:1;
+      GLuint logic_op_func:4;
+      GLuint logic_op_enable:1;
+      GLuint pad2:1;
+      GLuint write_disable_b:1;
+      GLuint write_disable_g:1;
+      GLuint write_disable_r:1;
+      GLuint write_disable_a:1;
+      GLuint pad3:1;
+      GLuint alpha_to_coverage_dither:1;
+      GLuint alpha_to_one:1;
+      GLuint alpha_to_coverage:1;
+   } blend1;
+};
+
+struct gen6_color_calc_state
+{
+   struct {
+      GLuint alpha_test_format:1;
+      GLuint pad0:14;
+      GLuint round_disable:1;
+      GLuint bf_stencil_ref:8;
+      GLuint stencil_ref:8;
+   } cc0;
 
+   union {
+      GLfloat alpha_ref_f;
+      struct {
+	 GLuint ui:8;
+	 GLuint pad0:24;
+      } alpha_ref_fi;
+   } cc1;
+
+   GLfloat constant_r;
+   GLfloat constant_g;
+   GLfloat constant_b;
+   GLfloat constant_a;
+};
+
+struct gen6_depth_stencil_state
+{
+   struct {
+      GLuint pad0:3;
+      GLuint bf_stencil_pass_depth_pass_op:3;
+      GLuint bf_stencil_pass_depth_fail_op:3;
+      GLuint bf_stencil_fail_op:3;
+      GLuint bf_stencil_func:3;
+      GLuint bf_stencil_enable:1;
+      GLuint pad1:2;
+      GLuint stencil_write_enable:1;
+      GLuint stencil_pass_depth_pass_op:3;
+      GLuint stencil_pass_depth_fail_op:3;
+      GLuint stencil_fail_op:3;
+      GLuint stencil_func:3;
+      GLuint stencil_enable:1;
+   } ds0;
+
+   struct {
+      GLuint bf_stencil_write_mask:8;
+      GLuint bf_stencil_test_mask:8;
+      GLuint stencil_write_mask:8;
+      GLuint stencil_test_mask:8;
+   } ds1;
+
+   struct {
+      GLuint pad0:25;
+      GLuint depth_write_enable:1;
+      GLuint depth_test_func:3;
+      GLuint pad1:1;
+      GLuint depth_test_enable:1;
+   } ds2;
+};
 
 struct brw_cc_unit_state
 {
@@ -752,8 +850,6 @@ struct brw_cc_unit_state
    } cc7;
 };
 
-
-
 struct brw_sf_unit_state
 {
    struct thread0 thread0;
@@ -813,6 +909,11 @@ struct brw_sf_unit_state
 
 };
 
+struct gen6_scissor_state
+{
+   GLuint ymin, xmin;
+   GLuint ymax, xmax;
+};
 
 struct brw_gs_unit_state
 {
@@ -1043,6 +1144,15 @@ struct brw_sf_viewport
    } scissor;
 };
 
+struct gen6_sf_viewport {
+   GLfloat m00;
+   GLfloat m11;
+   GLfloat m22;
+   GLfloat m30;
+   GLfloat m31;
+   GLfloat m32;
+};
+
 /* Documented in the subsystem/shared-functions/sampler chapter...
  */
 struct brw_surface_state
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index 88327d9927..d16e916832 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -180,10 +180,12 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
    c->first_output = reg;
    c->first_overflow_output = 0;
 
-   if (intel->is_ironlake)
-       mrf = 8;
+   if (intel->gen >= 6)
+      mrf = 6;
+   else if (intel->is_ironlake)
+      mrf = 8;
    else
-       mrf = 4;
+      mrf = 4;
 
    for (i = 0; i < VERT_RESULT_MAX; i++) {
       if (c->prog_data.outputs_written & BITFIELD64_BIT(i)) {
@@ -279,10 +281,12 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
     */
    attributes_in_vue = MAX2(c->nr_outputs, c->nr_inputs);
 
-   if (intel->is_ironlake)
-       c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4;
+   if (intel->gen >= 6)
+      c->prog_data.urb_entry_size = (attributes_in_vue + 4 + 7) / 8;
+   else if (intel->is_ironlake)
+      c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4;
    else
-       c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4;
+      c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4;
 
    c->prog_data.total_grf = reg;
 
@@ -380,9 +384,8 @@ static void emit_sop( struct brw_vs_compile *c,
 {
    struct brw_compile *p = &c->func;
 
-   brw_MOV(p, dst, brw_imm_f(0.0f));
-   brw_CMP(p, brw_null_reg(), cond, arg0, arg1);
-   brw_MOV(p, dst, brw_imm_f(1.0f));
+   brw_CMP(p, brw_null_reg(), cond, arg1, arg0);
+   brw_SEL(p, dst, brw_null_reg(), brw_imm_f(1.0f));
    brw_set_predicate_control_flag_value(p, 0xff);
 }
 
@@ -479,9 +482,11 @@ static void emit_math1( struct brw_vs_compile *c,
     * whether that turns out to be a simulator bug or not:
     */
    struct brw_compile *p = &c->func;
+   struct intel_context *intel = &p->brw->intel;
    struct brw_reg tmp = dst;
-   GLboolean need_tmp = (dst.dw1.bits.writemask != 0xf ||
-			 dst.file != BRW_GENERAL_REGISTER_FILE);
+   GLboolean need_tmp = (intel->gen < 6 &&
+			 (dst.dw1.bits.writemask != 0xf ||
+			  dst.file != BRW_GENERAL_REGISTER_FILE));
 
    if (need_tmp) 
       tmp = get_tmp(c);
@@ -510,9 +515,11 @@ static void emit_math2( struct brw_vs_compile *c,
 			GLuint precision)
 {
    struct brw_compile *p = &c->func;
+   struct intel_context *intel = &p->brw->intel;
    struct brw_reg tmp = dst;
-   GLboolean need_tmp = (dst.dw1.bits.writemask != 0xf ||
-			 dst.file != BRW_GENERAL_REGISTER_FILE);
+   GLboolean need_tmp = (intel->gen < 6 &&
+			 (dst.dw1.bits.writemask != 0xf ||
+			  dst.file != BRW_GENERAL_REGISTER_FILE));
 
    if (need_tmp) 
       tmp = get_tmp(c);
@@ -1191,7 +1198,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
    struct brw_reg pos = c->regs[PROGRAM_OUTPUT][VERT_RESULT_HPOS];
    struct brw_reg ndc;
    int eot;
-   GLuint len_vertext_header = 2;
+   GLuint len_vertex_header = 2;
 
    if (c->key.copy_edgeflag) {
       brw_MOV(p, 
@@ -1199,12 +1206,14 @@ static void emit_vertex_write( struct brw_vs_compile *c)
 	      get_reg(c, PROGRAM_INPUT, VERT_ATTRIB_EDGEFLAG));
    }
 
-   /* Build ndc coords */
-   ndc = get_tmp(c);
-   /* ndc = 1.0 / pos.w */
-   emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL);
-   /* ndc.xyz = pos * ndc */
-   brw_MUL(p, brw_writemask(ndc, WRITEMASK_XYZ), pos, ndc);
+   if (intel->gen < 6) {
+      /* Build ndc coords */
+      ndc = get_tmp(c);
+      /* ndc = 1.0 / pos.w */
+      emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL);
+      /* ndc.xyz = pos * ndc */
+      brw_MUL(p, brw_writemask(ndc, WRITEMASK_XYZ), pos, ndc);
+   }
 
    /* Update the header for point size, user clipping flags, and -ve rhw
     * workaround.
@@ -1267,21 +1276,41 @@ static void emit_vertex_write( struct brw_vs_compile *c)
     * of zeros followed by two sets of NDC coordinates:
     */
    brw_set_access_mode(p, BRW_ALIGN_1);
-   brw_MOV(p, offset(m0, 2), ndc);
-
-   if (intel->is_ironlake) {
-       /* There are 20 DWs (D0-D19) in VUE vertex header on Ironlake */
-       brw_MOV(p, offset(m0, 3), pos); /* a portion of vertex header */
-       /* m4, m5 contain the distances from vertex to the user clip planeXXX. 
-        * Seems it is useless for us.
-        * m6 is used for aligning, so that the remainder of vertex element is 
-        * reg-aligned.
-        */
-       brw_MOV(p, offset(m0, 7), pos); /* the remainder of vertex element */
-       len_vertext_header = 6;
+
+   if (intel->gen >= 6) {
+      /* There are 16 DWs (D0-D15) in VUE header on Sandybridge:
+       * dword 0-3 (m1) of the header is indices, point width, clip flags.
+       * dword 4-7 (m2) is the 4D space position
+       * dword 8-15 (m3,m4) of the vertex header is the user clip distance.
+       * m5 is the first vertex data we fill, which is the vertex position.
+       */
+      brw_MOV(p, offset(m0, 2), pos);
+      brw_MOV(p, offset(m0, 5), pos);
+      len_vertex_header = 4;
+   } else if (intel->is_ironlake) {
+      /* There are 20 DWs (D0-D19) in VUE header on Ironlake:
+       * dword 0-3 (m1) of the header is indices, point width, clip flags.
+       * dword 4-7 (m2) is the ndc position (set above)
+       * dword 8-11 (m3) of the vertex header is the 4D space position
+       * dword 12-19 (m4,m5) of the vertex header is the user clip distance.
+       * m6 is a pad so that the vertex element data is aligned
+       * m7 is the first vertex data we fill, which is the vertex position.
+       */
+      brw_MOV(p, offset(m0, 2), ndc);
+      brw_MOV(p, offset(m0, 3), pos);
+      brw_MOV(p, offset(m0, 7), pos);
+      len_vertex_header = 6;
    } else {
-       brw_MOV(p, offset(m0, 3), pos);
-       len_vertext_header = 2;
+      /* There are 8 dwords in VUE header pre-Ironlake:
+       * dword 0-3 (m1) is indices, point width, clip flags.
+       * dword 4-7 (m2) is ndc position (set above)
+       *
+       * dword 8-11 (m3) is the first vertex data, which we always have be the
+       * vertex position.
+       */
+      brw_MOV(p, offset(m0, 2), ndc);
+      brw_MOV(p, offset(m0, 3), pos);
+      len_vertex_header = 2;
    }
 
    eot = (c->first_overflow_output == 0);
@@ -1292,7 +1321,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
 		 c->r0,		/* src */
 		 0,		/* allocate */
 		 1,		/* used */
-		 MIN2(c->nr_outputs + 1 + len_vertext_header, (BRW_MAX_MRF-1)), /* msg len */
+		 MIN2(c->nr_outputs + 1 + len_vertex_header, (BRW_MAX_MRF-1)), /* msg len */
 		 0,		/* response len */
 		 eot, 		/* eot */
 		 eot, 		/* writes complete */
@@ -1687,11 +1716,13 @@ void brw_vs_emit(struct brw_vs_compile *c )
             /* patch all the BREAK/CONT instructions from last BEGINLOOP */
             while (inst0 > loop_inst[loop_depth]) {
                inst0--;
-               if (inst0->header.opcode == BRW_OPCODE_BREAK) {
+               if (inst0->header.opcode == BRW_OPCODE_BREAK &&
+		   inst0->bits3.if_else.jump_count == 0) {
                   inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
                   inst0->bits3.if_else.pop_count = 0;
                }
-               else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) {
+               else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
+			inst0->bits3.if_else.jump_count == 0) {
                   inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
                   inst0->bits3.if_else.pop_count = 0;
                }
@@ -1793,6 +1824,8 @@ void brw_vs_emit(struct brw_vs_compile *c )
 
    post_vs_emit(c, end_inst, last_inst);
 
+   brw_optimize(p);
+
    if (INTEL_DEBUG & DEBUG_VS) {
       int i;
 
diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
index ead623fc0e..4007b5a15c 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
@@ -67,13 +67,13 @@ brw_vs_update_constant_buffer(struct brw_context *brw)
     */
    _mesa_load_state_parameters(&brw->intel.ctx, vp->program.Base.Parameters);
 
-   intel_bo_map_gtt_preferred(intel, const_buffer, GL_TRUE);
+   drm_intel_gem_bo_map_gtt(const_buffer);
    for (i = 0; i < params->NumParameters; i++) {
       memcpy(const_buffer->virtual + i * 4 * sizeof(float),
 	     params->ParameterValues[i],
 	     4 * sizeof(float));
    }
-   intel_bo_unmap_gtt_preferred(intel, const_buffer);
+   drm_intel_gem_bo_unmap_gtt(const_buffer);
 
    return const_buffer;
 }
@@ -104,7 +104,7 @@ brw_update_vs_constant_surface( GLcontext *ctx,
    /* If there's no constant buffer, then no surface BO is needed to point at
     * it.
     */
-   if (vp->const_buffer == 0) {
+   if (vp->const_buffer == NULL) {
       drm_intel_bo_unreference(brw->vs.surf_bo[surf]);
       brw->vs.surf_bo[surf] = NULL;
       return;
@@ -132,7 +132,7 @@ brw_update_vs_constant_surface( GLcontext *ctx,
    brw->vs.surf_bo[surf] = brw_search_cache(&brw->surface_cache,
                                             BRW_SS_SURFACE,
                                             &key, sizeof(key),
-                                            &key.bo, key.bo ? 1 : 0,
+                                            &key.bo, 1,
                                             NULL);
    if (brw->vs.surf_bo[surf] == NULL) {
       brw->vs.surf_bo[surf] = brw_create_constant_surface(brw, &key);
diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c
index 0b0be02dd2..96a44bfbec 100644
--- a/src/mesa/drivers/dri/i965/brw_vtbl.c
+++ b/src/mesa/drivers/dri/i965/brw_vtbl.c
@@ -102,6 +102,9 @@ static void brw_destroy_context( struct intel_context *intel )
    dri_bo_release(&brw->cc.prog_bo);
    dri_bo_release(&brw->cc.state_bo);
    dri_bo_release(&brw->cc.vp_bo);
+   dri_bo_release(&brw->cc.blend_state_bo);
+   dri_bo_release(&brw->cc.depth_stencil_state_bo);
+   dri_bo_release(&brw->cc.color_calc_state_bo);
 }
 
 
@@ -141,7 +144,7 @@ static void brw_finish_batch(struct intel_context *intel)
    brw_emit_query_end(brw);
 
    if (brw->curbe.curbe_bo) {
-      intel_bo_unmap_gtt_preferred(intel, brw->curbe.curbe_bo);
+      drm_intel_gem_bo_unmap_gtt(brw->curbe.curbe_bo);
       drm_intel_bo_unreference(brw->curbe.curbe_bo);
       brw->curbe.curbe_bo = NULL;
    }
diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
index 88d84ee82f..47b764d24d 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@@ -328,6 +328,12 @@ void emit_cinterp(struct brw_compile *p,
 		  const struct brw_reg *dst,
 		  GLuint mask,
 		  const struct brw_reg *arg0);
+void emit_cmp(struct brw_compile *p,
+	      const struct brw_reg *dst,
+	      GLuint mask,
+	      const struct brw_reg *arg0,
+	      const struct brw_reg *arg1,
+	      const struct brw_reg *arg2);
 void emit_ddxy(struct brw_compile *p,
 	       const struct brw_reg *dst,
 	       GLuint mask,
diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c
index 9315bca315..05e464d4b6 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -34,6 +34,23 @@
 #include "brw_context.h"
 #include "brw_wm.h"
 
+static GLboolean can_do_pln(struct intel_context *intel,
+			    const struct brw_reg *deltas)
+{
+   struct brw_context *brw = brw_context(&intel->ctx);
+
+   if (!brw->has_pln)
+      return GL_FALSE;
+
+   if (deltas[1].nr != deltas[0].nr + 1)
+      return GL_FALSE;
+
+   if (intel->gen < 6 && ((deltas[0].nr & 1) != 0))
+      return GL_FALSE;
+
+   return GL_TRUE;
+}
+
 /* Not quite sure how correct this is - need to understand horiz
  * vs. vertical strides a little better.
  */
@@ -45,7 +62,13 @@ static INLINE struct brw_reg sechalf( struct brw_reg reg )
 }
 
 
-/* Payload R0:
+/**
+ * Computes the screen-space x,y position of the pixels.
+ *
+ * This will be used by emit_delta_xy() or emit_wpos_xy() for
+ * interpolation of attributes..
+ *
+ * Payload R0:
  *
  * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
  *         corresponding to each of the 16 execution channels.
@@ -60,7 +83,6 @@ static INLINE struct brw_reg sechalf( struct brw_reg reg )
  * R1.7 -- ?
  * R1.8 -- ?
  */
-
 void emit_pixel_xy(struct brw_wm_compile *c,
 		   const struct brw_reg *dst,
 		   GLuint mask)
@@ -100,7 +122,14 @@ void emit_pixel_xy(struct brw_wm_compile *c,
    brw_pop_insn_state(p);
 }
 
-
+/**
+ * Computes the screen-space x,y distance of the pixels from the start
+ * vertex.
+ *
+ * This will be used in linterp or pinterp with the start vertex value
+ * and the Cx, Cy, and C0 coefficients passed in from the setup engine
+ * to produce interpolated attribute values.
+ */
 void emit_delta_xy(struct brw_compile *p,
 		   const struct brw_reg *dst,
 		   GLuint mask,
@@ -108,25 +137,27 @@ void emit_delta_xy(struct brw_compile *p,
 {
    struct brw_reg r1 = brw_vec1_grf(1, 0);
 
-   /* Calc delta X,Y by subtracting origin in r1 from the pixel
-    * centers.
-    */
-   if (mask & WRITEMASK_X) {
-      brw_ADD(p,
-	      dst[0],
-	      retype(arg0[0], BRW_REGISTER_TYPE_UW),
-	      negate(r1));
-   }
+   if (mask == 0)
+      return;
 
-   if (mask & WRITEMASK_Y) {
-      brw_ADD(p,
-	      dst[1],
-	      retype(arg0[1], BRW_REGISTER_TYPE_UW),
-	      negate(suboffset(r1,1)));
+   assert(mask == WRITEMASK_XY);
 
-   }
+   /* Calc delta X,Y by subtracting origin in r1 from the pixel
+    * centers produced by emit_pixel_xy().
+    */
+   brw_ADD(p,
+	   dst[0],
+	   retype(arg0[0], BRW_REGISTER_TYPE_UW),
+	   negate(r1));
+   brw_ADD(p,
+	   dst[1],
+	   retype(arg0[1], BRW_REGISTER_TYPE_UW),
+	   negate(suboffset(r1,1)));
 }
 
+/**
+ * Computes the pixel offset from the window origin for gl_FragCoord().
+ */
 void emit_wpos_xy(struct brw_wm_compile *c,
 		  const struct brw_reg *dst,
 		  GLuint mask,
@@ -134,9 +165,6 @@ void emit_wpos_xy(struct brw_wm_compile *c,
 {
    struct brw_compile *p = &c->func;
 
-   /* Calculate the pixel offset from window bottom left into destination
-    * X and Y channels.
-    */
    if (mask & WRITEMASK_X) {
       if (c->fp->program.PixelCenterInteger) {
 	 /* X' = X */
@@ -186,6 +214,7 @@ void emit_pixel_w(struct brw_wm_compile *c,
 		  const struct brw_reg *deltas)
 {
    struct brw_compile *p = &c->func;
+   struct intel_context *intel = &p->brw->intel;
 
    /* Don't need this if all you are doing is interpolating color, for
     * instance.
@@ -196,8 +225,12 @@ void emit_pixel_w(struct brw_wm_compile *c,
       /* Calc 1/w - just linterp wpos[3] optimized by putting the
        * result straight into a message reg.
        */
-      brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
-      brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
+      if (can_do_pln(intel, deltas)) {
+	 brw_PLN(p, brw_message_reg(2), interp3, deltas[0]);
+      } else {
+	 brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
+	 brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
+      }
 
       /* Calc w */
       if (c->dispatch_width == 16) {
@@ -224,6 +257,7 @@ void emit_linterp(struct brw_compile *p,
 		  const struct brw_reg *arg0,
 		  const struct brw_reg *deltas)
 {
+   struct intel_context *intel = &p->brw->intel;
    struct brw_reg interp[4];
    GLuint nr = arg0[0].nr;
    GLuint i;
@@ -235,8 +269,12 @@ void emit_linterp(struct brw_compile *p,
 
    for (i = 0; i < 4; i++) {
       if (mask & (1<<i)) {
-	 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
-	 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
+	 if (can_do_pln(intel, deltas)) {
+	    brw_PLN(p, dst[i], interp[i], deltas[0]);
+	 } else {
+	    brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
+	    brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
+	 }
       }
    }
 }
@@ -249,6 +287,7 @@ void emit_pinterp(struct brw_compile *p,
 		  const struct brw_reg *deltas,
 		  const struct brw_reg *w)
 {
+   struct intel_context *intel = &p->brw->intel;
    struct brw_reg interp[4];
    GLuint nr = arg0[0].nr;
    GLuint i;
@@ -260,8 +299,12 @@ void emit_pinterp(struct brw_compile *p,
 
    for (i = 0; i < 4; i++) {
       if (mask & (1<<i)) {
-	 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
-	 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
+	 if (can_do_pln(intel, deltas)) {
+	    brw_PLN(p, dst[i], interp[i], deltas[0]);
+	 } else {
+	    brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
+	    brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
+	 }
       }
    }
    for (i = 0; i < 4; i++) {
@@ -502,11 +545,8 @@ void emit_sop(struct brw_compile *p,
    for (i = 0; i < 4; i++) {
       if (mask & (1<<i)) {	
 	 brw_push_insn_state(p);
-	 brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]);
-	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
-	 brw_MOV(p, dst[i], brw_imm_f(0));
-	 brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
-	 brw_MOV(p, dst[i], brw_imm_f(1.0));
+	 brw_CMP(p, brw_null_reg(), cond, arg1[i], arg0[i]);
+	 brw_SEL(p, dst[i], brw_null_reg(), brw_imm_f(1.0));
 	 brw_pop_insn_state(p);
       }
    }
@@ -566,12 +606,12 @@ static void emit_sne( struct brw_compile *p,
    emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1);
 }
 
-static void emit_cmp( struct brw_compile *p, 
-		      const struct brw_reg *dst,
-		      GLuint mask,
-		      const struct brw_reg *arg0,
-		      const struct brw_reg *arg1,
-		      const struct brw_reg *arg2 )
+void emit_cmp(struct brw_compile *p,
+	      const struct brw_reg *dst,
+	      GLuint mask,
+	      const struct brw_reg *arg0,
+	      const struct brw_reg *arg1,
+	      const struct brw_reg *arg2)
 {
    GLuint i;
 
@@ -601,14 +641,10 @@ void emit_max(struct brw_compile *p,
 
    for (i = 0; i < 4; i++) {
       if (mask & (1<<i)) {	
-	 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
-	 brw_MOV(p, dst[i], arg0[i]);
-	 brw_set_saturate(p, 0);
-
-	 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
+	 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], arg1[i]);
 
 	 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
-	 brw_MOV(p, dst[i], arg1[i]);
+	 brw_SEL(p, dst[i], arg0[i], arg1[i]);
 	 brw_set_saturate(p, 0);
 	 brw_set_predicate_control_flag_value(p, 0xff);
       }
@@ -625,14 +661,10 @@ void emit_min(struct brw_compile *p,
 
    for (i = 0; i < 4; i++) {
       if (mask & (1<<i)) {	
-	 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
-	 brw_MOV(p, dst[i], arg1[i]);
-	 brw_set_saturate(p, 0);
-
 	 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
 
 	 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
-	 brw_MOV(p, dst[i], arg0[i]);
+	 brw_SEL(p, dst[i], arg0[i], arg1[i]);
 	 brw_set_saturate(p, 0);
 	 brw_set_predicate_control_flag_value(p, 0xff);
       }
@@ -1086,11 +1118,19 @@ static void emit_kil( struct brw_wm_compile *c,
 {
    struct brw_compile *p = &c->func;
    struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
-   GLuint i;
-   
-   /* XXX - usually won't need 4 compares!
-    */
+   GLuint i, j;
+
    for (i = 0; i < 4; i++) {
+      /* Check if we've already done the comparison for this reg
+       * -- common when someone does KIL TEMP.wwww.
+       */
+      for (j = 0; j < i; j++) {
+	 if (memcmp(&arg0[j], &arg0[i], sizeof(arg0[0])) == 0)
+	    break;
+      }
+      if (j != i)
+	 continue;
+
       brw_push_insn_state(p);
       brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));   
       brw_set_predicate_control_flag_value(p, 0xff);
diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
index 562608e2ec..0b66cc6c9f 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
@@ -289,6 +289,7 @@ reclaim_temps(struct brw_wm_compile *c)
  */
 static void prealloc_reg(struct brw_wm_compile *c)
 {
+    struct intel_context *intel = &c->func.brw->intel;
     int i, j;
     struct brw_reg reg;
     int urb_read_length = 0;
@@ -413,6 +414,43 @@ static void prealloc_reg(struct brw_wm_compile *c)
 	}
     }
 
+    for (i = 0; i < c->nr_fp_insns; i++) {
+	const struct prog_instruction *inst = &c->prog_instructions[i];
+
+	switch (inst->Opcode) {
+	case WM_DELTAXY:
+	    /* Allocate WM_DELTAXY destination on G45/GM45 to an
+	     * even-numbered GRF if possible so that we can use the PLN
+	     * instruction.
+	     */
+	    if (inst->DstReg.WriteMask == WRITEMASK_XY &&
+		!c->wm_regs[inst->DstReg.File][inst->DstReg.Index][0].inited &&
+		!c->wm_regs[inst->DstReg.File][inst->DstReg.Index][1].inited &&
+		(IS_G4X(intel->intelScreen->deviceID) || intel->gen == 5)) {
+		int grf;
+
+		for (grf = c->first_free_grf & ~1;
+		     grf < BRW_WM_MAX_GRF;
+		     grf += 2)
+		{
+		    if (!c->used_grf[grf] && !c->used_grf[grf + 1]) {
+			c->used_grf[grf] = GL_TRUE;
+			c->used_grf[grf + 1] = GL_TRUE;
+			c->first_free_grf = grf + 2;  /* a guess */
+
+			set_reg(c, inst->DstReg.File, inst->DstReg.Index, 0,
+				brw_vec8_grf(grf, 0));
+			set_reg(c, inst->DstReg.File, inst->DstReg.Index, 1,
+				brw_vec8_grf(grf + 1, 0));
+			break;
+		    }
+		}
+	    }
+	default:
+	    break;
+	}
+    }
+
     /* An instruction may reference up to three constants.
      * They'll be found in these registers.
      * XXX alloc these on demand!
@@ -614,112 +652,6 @@ static void invoke_subroutine( struct brw_wm_compile *c,
     }
 }
 
-/* Workaround for using brw_wm_emit.c's emit functions, which expect
- * destination regs to be uniquely written.  Moves arguments out to
- * temporaries as necessary for instructions which use their destination as
- * a temporary.
- */
-static void
-unalias3(struct brw_wm_compile *c,
-	 void (*func)(struct brw_compile *c,
-		      const struct brw_reg *dst,
-		      GLuint mask,
-		      const struct brw_reg *arg0,
-		      const struct brw_reg *arg1,
-		      const struct brw_reg *arg2),
-	 const struct brw_reg *dst,
-	 GLuint mask,
-	 const struct brw_reg *arg0,
-	 const struct brw_reg *arg1,
-	 const struct brw_reg *arg2)
-{
-    struct brw_compile *p = &c->func;
-    struct brw_reg tmp_arg0[4], tmp_arg1[4], tmp_arg2[4];
-    int i, j;
-    int mark = mark_tmps(c);
-
-    for (j = 0; j < 4; j++) {
-	tmp_arg0[j] = arg0[j];
-	tmp_arg1[j] = arg1[j];
-	tmp_arg2[j] = arg2[j];
-    }
-
-    for (i = 0; i < 4; i++) {
-	if (mask & (1<<i)) {
-	    for (j = 0; j < 4; j++) {
-		if (arg0[j].file == dst[i].file &&
-		    dst[i].nr == arg0[j].nr) {
-		    tmp_arg0[j] = alloc_tmp(c);
-		    brw_MOV(p, tmp_arg0[j], arg0[j]);
-		}
-		if (arg1[j].file == dst[i].file &&
-		    dst[i].nr == arg1[j].nr) {
-		    tmp_arg1[j] = alloc_tmp(c);
-		    brw_MOV(p, tmp_arg1[j], arg1[j]);
-		}
-		if (arg2[j].file == dst[i].file &&
-		    dst[i].nr == arg2[j].nr) {
-		    tmp_arg2[j] = alloc_tmp(c);
-		    brw_MOV(p, tmp_arg2[j], arg2[j]);
-		}
-	    }
-	}
-    }
-
-    func(p, dst, mask, tmp_arg0, tmp_arg1, tmp_arg2);
-
-    release_tmps(c, mark);
-}
-
-/* Workaround for using brw_wm_emit.c's emit functions, which expect
- * destination regs to be uniquely written.  Moves arguments out to
- * temporaries as necessary for instructions which use their destination as
- * a temporary.
- */
-static void
-unalias2(struct brw_wm_compile *c,
-	 void (*func)(struct brw_compile *c,
-		      const struct brw_reg *dst,
-		      GLuint mask,
-		      const struct brw_reg *arg0,
-		      const struct brw_reg *arg1),
-	 const struct brw_reg *dst,
-	 GLuint mask,
-	 const struct brw_reg *arg0,
-	 const struct brw_reg *arg1)
-{
-    struct brw_compile *p = &c->func;
-    struct brw_reg tmp_arg0[4], tmp_arg1[4];
-    int i, j;
-    int mark = mark_tmps(c);
-
-    for (j = 0; j < 4; j++) {
-	tmp_arg0[j] = arg0[j];
-	tmp_arg1[j] = arg1[j];
-    }
-
-    for (i = 0; i < 4; i++) {
-	if (mask & (1<<i)) {
-	    for (j = 0; j < 4; j++) {
-		if (arg0[j].file == dst[i].file &&
-		    dst[i].nr == arg0[j].nr) {
-		    tmp_arg0[j] = alloc_tmp(c);
-		    brw_MOV(p, tmp_arg0[j], arg0[j]);
-		}
-		if (arg1[j].file == dst[i].file &&
-		    dst[i].nr == arg1[j].nr) {
-		    tmp_arg1[j] = alloc_tmp(c);
-		    brw_MOV(p, tmp_arg1[j], arg1[j]);
-		}
-	    }
-	}
-    }
-
-    func(p, dst, mask, tmp_arg0, tmp_arg1);
-
-    release_tmps(c, mark);
-}
-
 static void emit_arl(struct brw_wm_compile *c,
                      const struct prog_instruction *inst)
 {
@@ -1813,14 +1745,29 @@ static void
 get_argument_regs(struct brw_wm_compile *c,
 		  const struct prog_instruction *inst,
 		  int index,
+		  struct brw_reg *dst,
 		  struct brw_reg *regs,
 		  int mask)
 {
-    int i;
+    struct brw_compile *p = &c->func;
+    int i, j;
 
     for (i = 0; i < 4; i++) {
-	if (mask & (1 << i))
+	if (mask & (1 << i)) {
 	    regs[i] = get_src_reg(c, inst, index, i);
+
+	    /* Unalias destination registers from our sources. */
+	    if (regs[i].file == BRW_GENERAL_REGISTER_FILE) {
+	       for (j = 0; j < 4; j++) {
+		   if (memcmp(&regs[i], &dst[j], sizeof(regs[0])) == 0) {
+		       struct brw_reg tmp = alloc_tmp(c);
+		       brw_MOV(p, tmp, regs[i]);
+		       regs[i] = tmp;
+		       break;
+		   }
+	       }
+	    }
+	}
     }
 }
 
@@ -1845,6 +1792,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
 	int dst_flags;
 	struct brw_reg args[3][4], dst[4];
 	int j;
+	int mark = mark_tmps( c );
 
         c->cur_inst = i;
 
@@ -1866,7 +1814,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
 	   }
 	}
 	for (j = 0; j < brw_wm_nr_args(inst->Opcode); j++)
-	    get_argument_regs(c, inst, j, args[j], WRITEMASK_XYZW);
+	    get_argument_regs(c, inst, j, dst, args[j], WRITEMASK_XYZW);
 
 	dst_flags = inst->DstReg.WriteMask;
 	if (inst->SaturateMode == SATURATE_ZERO_ONE)
@@ -1920,8 +1868,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
 		emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
 		break;
 	    case OPCODE_LRP:
-		unalias3(c, emit_lrp,
-			 dst, dst_flags, args[0], args[1], args[2]);
+		emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
 		break;
 	    case OPCODE_TRUNC:
 		emit_alu1(p, brw_RNDZ, dst, dst_flags, args[0]);
@@ -1960,11 +1907,14 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
 	    case OPCODE_LG2:
 		emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
 		break;
+	    case OPCODE_CMP:
+		emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
+		break;
 	    case OPCODE_MIN:	
-		unalias2(c, emit_min, dst, dst_flags, args[0], args[1]);
+		emit_min(p, dst, dst_flags, args[0], args[1]);
 		break;
 	    case OPCODE_MAX:	
-		unalias2(c, emit_max, dst, dst_flags, args[0], args[1]);
+		emit_max(p, dst, dst_flags, args[0], args[1]);
 		break;
 	    case OPCODE_DDX:
 	    case OPCODE_DDY:
@@ -2103,11 +2053,13 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
                   /* patch all the BREAK/CONT instructions from last BGNLOOP */
                   while (inst0 > loop_inst[loop_depth]) {
                      inst0--;
-                     if (inst0->header.opcode == BRW_OPCODE_BREAK) {
+                     if (inst0->header.opcode == BRW_OPCODE_BREAK &&
+			 inst0->bits3.if_else.jump_count == 0) {
 			inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
 			inst0->bits3.if_else.pop_count = 0;
                      }
-                     else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) {
+                     else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
+			      inst0->bits3.if_else.jump_count == 0) {
                         inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
                         inst0->bits3.if_else.pop_count = 0;
                      }
@@ -2115,10 +2067,14 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
                }
                break;
 	    default:
-		printf("unsupported IR in fragment shader %d\n",
-			inst->Opcode);
+		printf("unsupported opcode %d (%s) in fragment shader\n",
+		       inst->Opcode, inst->Opcode < MAX_OPCODE ?
+		       _mesa_opcode_string(inst->Opcode) : "unknown");
 	}
 
+	/* Release temporaries containing any unaliased source regs. */
+	release_tmps( c, mark );
+
 	if (inst->CondUpdate)
 	    brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
 	else
diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
index c232cd2791..d7650af3d9 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
@@ -89,7 +89,6 @@ struct wm_sampler_key {
       float max_aniso;
       GLenum minfilter, magfilter;
       GLenum comparemode, comparefunc;
-      dri_bo *sdc_bo;
 
       /** If target is cubemap, take context setting.
        */
@@ -230,7 +229,7 @@ brw_wm_sampler_populate_key(struct brw_context *brw,
    GLcontext *ctx = &brw->intel.ctx;
    int unit;
 
-   memset(key, 0, sizeof(*key));
+   key->sampler_count = 0;
 
    for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) {
       if (ctx->Texture.Unit[unit]._ReallyEnabled) {
@@ -241,6 +240,8 @@ brw_wm_sampler_populate_key(struct brw_context *brw,
 	 struct gl_texture_image *firstImage =
 	    texObj->Image[0][intelObj->firstLevel];
 
+	 memset(entry, 0, sizeof(*entry));
+
          entry->tex_target = texObj->Target;
 
 	 entry->seamless_cube_map = (texObj->Target == GL_TEXTURE_CUBE_MAP)
@@ -289,7 +290,7 @@ static void upload_wm_samplers( struct brw_context *brw )
 {
    GLcontext *ctx = &brw->intel.ctx;
    struct wm_sampler_key key;
-   int i;
+   int i, sampler_key_size;
 
    brw_wm_sampler_populate_key(brw, &key);
 
@@ -303,8 +304,11 @@ static void upload_wm_samplers( struct brw_context *brw )
    if (brw->wm.sampler_count == 0)
       return;
 
+   /* Only include the populated portion of the key in the search. */
+   sampler_key_size = offsetof(struct wm_sampler_key,
+			       sampler[key.sampler_count]);
    brw->wm.sampler_bo = brw_search_cache(&brw->cache, BRW_SAMPLER,
-					 &key, sizeof(key),
+					 &key, sampler_key_size,
 					 brw->wm.sdc_bo, key.sampler_count,
 					 NULL);
 
@@ -324,7 +328,7 @@ static void upload_wm_samplers( struct brw_context *brw )
       }
 
       brw->wm.sampler_bo = brw_upload_cache(&brw->cache, BRW_SAMPLER,
-					    &key, sizeof(key),
+					    &key, sampler_key_size,
 					    brw->wm.sdc_bo, key.sampler_count,
 					    &sampler, sizeof(sampler));
 
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 1db438ae7b..ce0bf0b97d 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -207,33 +207,14 @@ brw_create_texture_surface( struct brw_context *brw,
 
    surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
    surf.ss0.surface_type = translate_tex_target(key->target);
-   if (key->bo) {
-      surf.ss0.surface_format = translate_tex_format(key->format,
-						     key->internal_format,
-						     key->depthmode);
-   }
-   else {
-      switch (key->depth) {
-      case 32:
-         surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
-         break;
-      default:
-      case 24:
-         surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8X8_UNORM;
-         break;
-      case 16:
-         surf.ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM;
-         break;
-      }
-   }
+   surf.ss0.surface_format = translate_tex_format(key->format,
+						  key->internal_format,
+						  key->depthmode);
 
    /* This is ok for all textures with channel width 8bit or less:
     */
 /*    surf.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */
-   if (key->bo)
-      surf.ss1.base_addr = key->bo->offset; /* reloc */
-   else
-      surf.ss1.base_addr = key->offset;
+   surf.ss1.base_addr = key->bo->offset; /* reloc */
 
    surf.ss2.mip_count = key->last_level - key->first_level;
    surf.ss2.width = key->width - 1;
@@ -255,17 +236,14 @@ brw_create_texture_surface( struct brw_context *brw,
 
    bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE,
 			 key, sizeof(*key),
-			 &key->bo, key->bo ? 1 : 0,
+			 &key->bo, 1,
 			 &surf, sizeof(surf));
 
-   if (key->bo) {
-      /* Emit relocation to surface contents */
-      dri_bo_emit_reloc(bo,
-			I915_GEM_DOMAIN_SAMPLER, 0,
-			0,
-			offsetof(struct brw_surface_state, ss1),
-			key->bo);
-   }
+   /* Emit relocation to surface contents */
+   drm_intel_bo_emit_reloc(bo, offsetof(struct brw_surface_state, ss1),
+			   key->bo, 0,
+			   I915_GEM_DOMAIN_SAMPLER, 0);
+
    return bo;
 }
 
@@ -281,19 +259,12 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit )
 
    memset(&key, 0, sizeof(key));
 
-   if (intelObj->imageOverride) {
-      key.pitch = intelObj->pitchOverride / intelObj->mt->cpp;
-      key.depth = intelObj->depthOverride;
-      key.bo = NULL;
-      key.offset = intelObj->textureOffset;
-   } else {
-      key.format = firstImage->TexFormat;
-      key.internal_format = firstImage->InternalFormat;
-      key.pitch = intelObj->mt->pitch;
-      key.depth = firstImage->Depth;
-      key.bo = intelObj->mt->region->buffer;
-      key.offset = 0;
-   }
+   key.format = firstImage->TexFormat;
+   key.internal_format = firstImage->InternalFormat;
+   key.pitch = intelObj->mt->pitch;
+   key.depth = firstImage->Depth;
+   key.bo = intelObj->mt->region->buffer;
+   key.offset = 0;
 
    key.target = tObj->Target;
    key.depthmode = tObj->DepthMode;
@@ -308,7 +279,7 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit )
    brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache,
                                             BRW_SS_SURFACE,
                                             &key, sizeof(key),
-                                            &key.bo, key.bo ? 1 : 0,
+                                            &key.bo, 1,
                                             NULL);
    if (brw->wm.surf_bo[surf] == NULL) {
       brw->wm.surf_bo[surf] = brw_create_texture_surface(brw, &key);
@@ -336,10 +307,7 @@ brw_create_constant_surface( struct brw_context *brw,
    surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
 
    assert(key->bo);
-   if (key->bo)
-      surf.ss1.base_addr = key->bo->offset; /* reloc */
-   else
-      surf.ss1.base_addr = key->offset;
+   surf.ss1.base_addr = key->bo->offset; /* reloc */
 
    surf.ss2.width = w & 0x7f;            /* bits 6:0 of size or width */
    surf.ss2.height = (w >> 7) & 0x1fff;  /* bits 19:7 of size or width */
@@ -349,20 +317,16 @@ brw_create_constant_surface( struct brw_context *brw,
  
    bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE,
 			 key, sizeof(*key),
-			 &key->bo, key->bo ? 1 : 0,
+			 &key->bo, 1,
 			 &surf, sizeof(surf));
 
-   if (key->bo) {
-      /* Emit relocation to surface contents.  Section 5.1.1 of the gen4
-       * bspec ("Data Cache") says that the data cache does not exist as
-       * a separate cache and is just the sampler cache.
-       */
-      dri_bo_emit_reloc(bo,
-			I915_GEM_DOMAIN_SAMPLER, 0,
-			0,
-			offsetof(struct brw_surface_state, ss1),
-			key->bo);
-   }
+   /* Emit relocation to surface contents.  Section 5.1.1 of the gen4
+    * bspec ("Data Cache") says that the data cache does not exist as
+    * a separate cache and is just the sampler cache.
+    */
+   drm_intel_bo_emit_reloc(bo, offsetof(struct brw_surface_state, ss1),
+			   key->bo, 0,
+			   I915_GEM_DOMAIN_SAMPLER, 0);
 
    return bo;
 }
@@ -420,7 +384,7 @@ brw_update_wm_constant_surface( GLcontext *ctx,
    /* If there's no constant buffer, then no surface BO is needed to point at
     * it.
     */
-   if (fp->const_buffer == 0) {
+   if (fp->const_buffer == NULL) {
       drm_intel_bo_unreference(brw->wm.surf_bo[surf]);
       brw->wm.surf_bo[surf] = NULL;
       return;
@@ -448,7 +412,7 @@ brw_update_wm_constant_surface( GLcontext *ctx,
    brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache,
                                             BRW_SS_SURFACE,
                                             &key, sizeof(key),
-                                            &key.bo, key.bo ? 1 : 0,
+                                            &key.bo, 1,
                                             NULL);
    if (brw->wm.surf_bo[surf] == NULL) {
       brw->wm.surf_bo[surf] = brw_create_constant_surface(brw, &key);
@@ -509,7 +473,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
 				struct gl_renderbuffer *rb,
 				unsigned int unit)
 {
-   struct intel_context *intel = &brw->intel;;
+   struct intel_context *intel = &brw->intel;
    GLcontext *ctx = &intel->ctx;
    dri_bo *region_bo = NULL;
    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
@@ -576,18 +540,21 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
       key.draw_x = 0;
       key.draw_y = 0;
    }
-   /* _NEW_COLOR */
-   memcpy(key.color_mask, ctx->Color.ColorMask[unit],
-	  sizeof(key.color_mask));
 
-   /* As mentioned above, disable writes to the alpha component when the
-    * renderbuffer is XRGB.
-    */
-   if (ctx->DrawBuffer->Visual.alphaBits == 0)
-     key.color_mask[3] = GL_FALSE;
+   if (intel->gen < 6) {
+      /* _NEW_COLOR */
+      memcpy(key.color_mask, ctx->Color.ColorMask[unit],
+	     sizeof(key.color_mask));
 
-   key.color_blend = (!ctx->Color._LogicOpEnabled &&
-		      (ctx->Color.BlendEnabled & (1 << unit)));
+      /* As mentioned above, disable writes to the alpha component when the
+       * renderbuffer is XRGB.
+       */
+      if (ctx->DrawBuffer->Visual.alphaBits == 0)
+	 key.color_mask[3] = GL_FALSE;
+
+      key.color_blend = (!ctx->Color._LogicOpEnabled &&
+			 (ctx->Color.BlendEnabled & (1 << unit)));
+   }
 
    dri_bo_unreference(brw->wm.surf_bo[unit]);
    brw->wm.surf_bo[unit] = brw_search_cache(&brw->surface_cache,
@@ -639,12 +606,14 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
       brw_set_surface_tiling(&surf, key.tiling);
       surf.ss3.pitch = (key.pitch * key.cpp) - 1;
 
-      /* _NEW_COLOR */
-      surf.ss0.color_blend = key.color_blend;
-      surf.ss0.writedisable_red =   !key.color_mask[0];
-      surf.ss0.writedisable_green = !key.color_mask[1];
-      surf.ss0.writedisable_blue =  !key.color_mask[2];
-      surf.ss0.writedisable_alpha = !key.color_mask[3];
+      if (intel->gen < 6) {
+	 /* _NEW_COLOR */
+	 surf.ss0.color_blend = key.color_blend;
+	 surf.ss0.writedisable_red =   !key.color_mask[0];
+	 surf.ss0.writedisable_green = !key.color_mask[1];
+	 surf.ss0.writedisable_blue =  !key.color_mask[2];
+	 surf.ss0.writedisable_alpha = !key.color_mask[3];
+      }
 
       /* Key size will never match key size for textures, so we're safe. */
       brw->wm.surf_bo[unit] = brw_upload_cache(&brw->surface_cache,
diff --git a/src/mesa/drivers/dri/i965/gen6_cc.c b/src/mesa/drivers/dri/i965/gen6_cc.c
new file mode 100644
index 0000000000..f7acad6912
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/gen6_cc.c
@@ -0,0 +1,296 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_util.h"
+#include "intel_batchbuffer.h"
+#include "main/macros.h"
+
+struct gen6_blend_state_key {
+   GLboolean color_blend, alpha_enabled;
+   GLboolean dither;
+
+   GLenum logic_op;
+
+   GLenum blend_eq_rgb, blend_eq_a;
+   GLenum blend_src_rgb, blend_src_a;
+   GLenum blend_dst_rgb, blend_dst_a;
+
+   GLenum alpha_func;
+};
+
+static void
+blend_state_populate_key(struct brw_context *brw,
+			 struct gen6_blend_state_key *key)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+
+   memset(key, 0, sizeof(*key));
+
+   /* _NEW_COLOR */
+   if (ctx->Color._LogicOpEnabled)
+      key->logic_op = ctx->Color.LogicOp;
+   else
+      key->logic_op = GL_COPY;
+
+   /* _NEW_COLOR */
+   key->color_blend = ctx->Color.BlendEnabled;
+   if (key->color_blend) {
+      key->blend_eq_rgb = ctx->Color.BlendEquationRGB;
+      key->blend_eq_a = ctx->Color.BlendEquationA;
+      key->blend_src_rgb = ctx->Color.BlendSrcRGB;
+      key->blend_dst_rgb = ctx->Color.BlendDstRGB;
+      key->blend_src_a = ctx->Color.BlendSrcA;
+      key->blend_dst_a = ctx->Color.BlendDstA;
+   }
+
+   /* _NEW_COLOR */
+   key->alpha_enabled = ctx->Color.AlphaEnabled;
+   if (key->alpha_enabled) {
+      key->alpha_func = ctx->Color.AlphaFunc;
+   }
+
+   /* _NEW_COLOR */
+   key->dither = ctx->Color.DitherFlag;
+}
+
+/**
+ * Creates the state cache entry for the given CC unit key.
+ */
+static drm_intel_bo *
+blend_state_create_from_key(struct brw_context *brw,
+			    struct gen6_blend_state_key *key)
+{
+   struct gen6_blend_state blend;
+   drm_intel_bo *bo;
+
+   memset(&blend, 0, sizeof(blend));
+
+   if (key->logic_op != GL_COPY) {
+      blend.blend1.logic_op_enable = 1;
+      blend.blend1.logic_op_func = intel_translate_logic_op(key->logic_op);
+   } else if (key->color_blend) {
+      GLenum eqRGB = key->blend_eq_rgb;
+      GLenum eqA = key->blend_eq_a;
+      GLenum srcRGB = key->blend_src_rgb;
+      GLenum dstRGB = key->blend_dst_rgb;
+      GLenum srcA = key->blend_src_a;
+      GLenum dstA = key->blend_dst_a;
+
+      if (eqRGB == GL_MIN || eqRGB == GL_MAX) {
+	 srcRGB = dstRGB = GL_ONE;
+      }
+
+      if (eqA == GL_MIN || eqA == GL_MAX) {
+	 srcA = dstA = GL_ONE;
+      }
+
+      blend.blend0.dest_blend_factor = brw_translate_blend_factor(dstRGB);
+      blend.blend0.source_blend_factor = brw_translate_blend_factor(srcRGB);
+      blend.blend0.blend_func = brw_translate_blend_equation(eqRGB);
+
+      blend.blend0.ia_dest_blend_factor = brw_translate_blend_factor(dstA);
+      blend.blend0.ia_source_blend_factor = brw_translate_blend_factor(srcA);
+      blend.blend0.ia_blend_func = brw_translate_blend_equation(eqA);
+
+      blend.blend0.blend_enable = 1;
+      blend.blend0.ia_blend_enable = (srcA != srcRGB ||
+				      dstA != dstRGB ||
+				      eqA != eqRGB);
+   }
+
+   if (key->alpha_enabled) {
+      blend.blend1.alpha_test_enable = 1;
+      blend.blend1.alpha_test_func = intel_translate_compare_func(key->alpha_func);
+
+   }
+
+   if (key->dither) {
+      blend.blend1.dither_enable = 1;
+      blend.blend1.y_dither_offset = 0;
+      blend.blend1.x_dither_offset = 0;
+   }
+
+   bo = brw_upload_cache(&brw->cache, BRW_BLEND_STATE,
+			 key, sizeof(*key),
+			 NULL, 0,
+			 &blend, sizeof(blend));
+
+   return bo;
+}
+
+static void
+prepare_blend_state(struct brw_context *brw)
+{
+   struct gen6_blend_state_key key;
+
+   blend_state_populate_key(brw, &key);
+
+   drm_intel_bo_unreference(brw->cc.blend_state_bo);
+   brw->cc.blend_state_bo = brw_search_cache(&brw->cache, BRW_BLEND_STATE,
+					     &key, sizeof(key),
+					     NULL, 0,
+					     NULL);
+
+   if (brw->cc.blend_state_bo == NULL)
+      brw->cc.blend_state_bo = blend_state_create_from_key(brw, &key);
+}
+
+const struct brw_tracked_state gen6_blend_state = {
+   .dirty = {
+      .mesa = _NEW_COLOR,
+      .brw = 0,
+      .cache = 0,
+   },
+   .prepare = prepare_blend_state,
+};
+
+struct gen6_color_calc_state_key {
+   GLubyte blend_constant_color[4];
+   GLclampf alpha_ref;
+   GLubyte stencil_ref[2];
+};
+
+static void
+color_calc_state_populate_key(struct brw_context *brw,
+			      struct gen6_color_calc_state_key *key)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+
+   memset(key, 0, sizeof(*key));
+
+   /* _NEW_STENCIL */
+   if (ctx->Stencil._Enabled) {
+      const unsigned back = ctx->Stencil._BackFace;
+
+      key->stencil_ref[0] = ctx->Stencil.Ref[0];
+      if (ctx->Stencil._TestTwoSide)
+	 key->stencil_ref[1] = ctx->Stencil.Ref[back];
+   }
+
+   /* _NEW_COLOR */
+   if (ctx->Color.AlphaEnabled)
+      key->alpha_ref = ctx->Color.AlphaRef;
+
+   key->blend_constant_color[0] = ctx->Color.BlendColor[0];
+   key->blend_constant_color[1] = ctx->Color.BlendColor[1];
+   key->blend_constant_color[2] = ctx->Color.BlendColor[2];
+   key->blend_constant_color[3] = ctx->Color.BlendColor[3];
+}
+
+/**
+ * Creates the state cache entry for the given CC state key.
+ */
+static drm_intel_bo *
+color_calc_state_create_from_key(struct brw_context *brw,
+				 struct gen6_color_calc_state_key *key)
+{
+   struct gen6_color_calc_state cc;
+   drm_intel_bo *bo;
+
+   memset(&cc, 0, sizeof(cc));
+
+   cc.cc0.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8;
+   UNCLAMPED_FLOAT_TO_UBYTE(cc.cc1.alpha_ref_fi.ui, key->alpha_ref);
+
+   cc.cc0.stencil_ref = key->stencil_ref[0];
+   cc.cc0.bf_stencil_ref = key->stencil_ref[1];
+
+   cc.constant_r = key->blend_constant_color[0];
+   cc.constant_g = key->blend_constant_color[1];
+   cc.constant_b = key->blend_constant_color[2];
+   cc.constant_a = key->blend_constant_color[3];
+
+   bo = brw_upload_cache(&brw->cache, BRW_COLOR_CALC_STATE,
+			 key, sizeof(*key),
+			 NULL, 0,
+			 &cc, sizeof(cc));
+
+   return bo;
+}
+
+static void
+prepare_color_calc_state(struct brw_context *brw)
+{
+   struct gen6_color_calc_state_key key;
+
+   color_calc_state_populate_key(brw, &key);
+
+   drm_intel_bo_unreference(brw->cc.state_bo);
+   brw->cc.state_bo = brw_search_cache(&brw->cache, BRW_COLOR_CALC_STATE,
+				       &key, sizeof(key),
+				       NULL, 0,
+				       NULL);
+
+   if (brw->cc.state_bo == NULL)
+      brw->cc.state_bo = color_calc_state_create_from_key(brw, &key);
+}
+
+const struct brw_tracked_state gen6_color_calc_state = {
+   .dirty = {
+      .mesa = _NEW_COLOR,
+      .brw = 0,
+      .cache = 0,
+   },
+   .prepare = prepare_color_calc_state,
+};
+
+static void upload_cc_state_pointers(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+
+   BEGIN_BATCH(4);
+   OUT_BATCH(CMD_3D_CC_STATE_POINTERS << 16 | (4 - 2));
+   OUT_RELOC(brw->cc.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+   OUT_RELOC(brw->cc.blend_state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+   OUT_RELOC(brw->cc.depth_stencil_state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+   ADVANCE_BATCH();
+
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+}
+
+
+static void prepare_cc_state_pointers(struct brw_context *brw)
+{
+   brw_add_validated_bo(brw, brw->cc.state_bo);
+   brw_add_validated_bo(brw, brw->cc.blend_state_bo);
+   brw_add_validated_bo(brw, brw->cc.depth_stencil_state_bo);
+}
+
+const struct brw_tracked_state gen6_cc_state_pointers = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_BATCH,
+      .cache = (CACHE_NEW_BLEND_STATE |
+		CACHE_NEW_COLOR_CALC_STATE |
+		CACHE_NEW_DEPTH_STENCIL_STATE)
+   },
+   .prepare = prepare_cc_state_pointers,
+   .emit = upload_cc_state_pointers,
+};
diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c b/src/mesa/drivers/dri/i965/gen6_clip_state.c
new file mode 100644
index 0000000000..06f8145e32
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c
@@ -0,0 +1,75 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "intel_batchbuffer.h"
+
+static void
+upload_clip_state(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+   GLcontext *ctx = &intel->ctx;
+   uint32_t depth_clamp = 0;
+   uint32_t provoking;
+
+   if (!ctx->Transform.DepthClamp)
+      depth_clamp = GEN6_CLIP_Z_TEST;
+
+   if (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION) {
+      provoking = 0;
+   } else {
+      provoking =
+	 (2 << GEN6_CLIP_TRI_PROVOKE_SHIFT) |
+	 (2 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT) |
+	 (1 << GEN6_CLIP_LINE_PROVOKE_SHIFT);
+   }
+
+   BEGIN_BATCH(4);
+   OUT_BATCH(CMD_3D_CLIP_STATE << 16 | (4 - 2));
+   OUT_BATCH(GEN6_CLIP_STATISTICS_ENABLE);
+   OUT_BATCH(GEN6_CLIP_ENABLE |
+	     GEN6_CLIP_API_OGL |
+	     GEN6_CLIP_MODE_REJECT_ALL | /* XXX: debug: get VS working */
+	     GEN6_CLIP_XY_TEST |
+	     depth_clamp |
+	     provoking);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+}
+
+const struct brw_tracked_state gen6_clip_state = {
+   .dirty = {
+      .mesa  = _NEW_TRANSFORM,
+      .brw   = BRW_NEW_CONTEXT,
+      .cache = 0
+   },
+   .emit = upload_clip_state,
+};
diff --git a/src/mesa/drivers/dri/i965/gen6_depthstencil.c b/src/mesa/drivers/dri/i965/gen6_depthstencil.c
new file mode 100644
index 0000000000..4924f0fd55
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/gen6_depthstencil.c
@@ -0,0 +1,165 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+
+struct brw_depth_stencil_state_key {
+   GLenum depth_func;
+   GLboolean depth_test, depth_write;
+   GLboolean stencil, stencil_two_side;
+   GLenum stencil_func[2], stencil_fail_op[2];
+   GLenum stencil_pass_depth_fail_op[2], stencil_pass_depth_pass_op[2];
+   GLubyte stencil_write_mask[2], stencil_test_mask[2];
+};
+
+static void
+depth_stencil_state_populate_key(struct brw_context *brw,
+				 struct brw_depth_stencil_state_key *key)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   const unsigned back = ctx->Stencil._BackFace;
+
+   memset(key, 0, sizeof(*key));
+
+   /* _NEW_STENCIL */
+   key->stencil = ctx->Stencil._Enabled;
+   key->stencil_two_side = ctx->Stencil._TestTwoSide;
+
+   if (key->stencil) {
+      key->stencil_func[0] = ctx->Stencil.Function[0];
+      key->stencil_fail_op[0] = ctx->Stencil.FailFunc[0];
+      key->stencil_pass_depth_fail_op[0] = ctx->Stencil.ZFailFunc[0];
+      key->stencil_pass_depth_pass_op[0] = ctx->Stencil.ZPassFunc[0];
+      key->stencil_write_mask[0] = ctx->Stencil.WriteMask[0];
+      key->stencil_test_mask[0] = ctx->Stencil.ValueMask[0];
+   }
+   if (key->stencil_two_side) {
+      key->stencil_func[1] = ctx->Stencil.Function[back];
+      key->stencil_fail_op[1] = ctx->Stencil.FailFunc[back];
+      key->stencil_pass_depth_fail_op[1] = ctx->Stencil.ZFailFunc[back];
+      key->stencil_pass_depth_pass_op[1] = ctx->Stencil.ZPassFunc[back];
+      key->stencil_write_mask[1] = ctx->Stencil.WriteMask[back];
+      key->stencil_test_mask[1] = ctx->Stencil.ValueMask[back];
+   }
+
+   key->depth_test = ctx->Depth.Test;
+   if (key->depth_test) {
+      key->depth_func = ctx->Depth.Func;
+      key->depth_write = ctx->Depth.Mask;
+   }
+}
+
+/**
+ * Creates the state cache entry for the given DEPTH_STENCIL_STATE state key.
+ */
+static dri_bo *
+depth_stencil_state_create_from_key(struct brw_context *brw,
+				    struct brw_depth_stencil_state_key *key)
+{
+   struct gen6_depth_stencil_state ds;
+   dri_bo *bo;
+
+   memset(&ds, 0, sizeof(ds));
+
+   /* _NEW_STENCIL */
+   if (key->stencil) {
+      ds.ds0.stencil_enable = 1;
+      ds.ds0.stencil_func =
+	 intel_translate_compare_func(key->stencil_func[0]);
+      ds.ds0.stencil_fail_op =
+	 intel_translate_stencil_op(key->stencil_fail_op[0]);
+      ds.ds0.stencil_pass_depth_fail_op =
+	 intel_translate_stencil_op(key->stencil_pass_depth_fail_op[0]);
+      ds.ds0.stencil_pass_depth_pass_op =
+	 intel_translate_stencil_op(key->stencil_pass_depth_pass_op[0]);
+      ds.ds1.stencil_write_mask = key->stencil_write_mask[0];
+      ds.ds1.stencil_test_mask = key->stencil_test_mask[0];
+
+      if (key->stencil_two_side) {
+	 ds.ds0.bf_stencil_enable = 1;
+	 ds.ds0.bf_stencil_func =
+	    intel_translate_compare_func(key->stencil_func[1]);
+	 ds.ds0.bf_stencil_fail_op =
+	    intel_translate_stencil_op(key->stencil_fail_op[1]);
+	 ds.ds0.bf_stencil_pass_depth_fail_op =
+	    intel_translate_stencil_op(key->stencil_pass_depth_fail_op[1]);
+	 ds.ds0.bf_stencil_pass_depth_pass_op =
+	    intel_translate_stencil_op(key->stencil_pass_depth_pass_op[1]);
+	 ds.ds1.bf_stencil_write_mask = key->stencil_write_mask[1];
+	 ds.ds1.bf_stencil_test_mask = key->stencil_test_mask[1];
+      }
+
+      /* Not really sure about this:
+       */
+      if (key->stencil_write_mask[0] ||
+	  (key->stencil_two_side && key->stencil_write_mask[1]))
+	 ds.ds0.stencil_write_enable = 1;
+   }
+
+   /* _NEW_DEPTH */
+   if (key->depth_test) {
+      ds.ds2.depth_test_enable = 1;
+      ds.ds2.depth_test_func = intel_translate_compare_func(key->depth_func);
+      ds.ds2.depth_write_enable = key->depth_write;
+   }
+
+   bo = brw_upload_cache(&brw->cache, BRW_DEPTH_STENCIL_STATE,
+			 key, sizeof(*key),
+			 NULL, 0,
+			 &ds, sizeof(ds));
+
+   return bo;
+}
+
+static void
+prepare_depth_stencil_state(struct brw_context *brw)
+{
+   struct brw_depth_stencil_state_key key;
+
+   depth_stencil_state_populate_key(brw, &key);
+
+   dri_bo_unreference(brw->cc.depth_stencil_state_bo);
+   brw->cc.depth_stencil_state_bo = brw_search_cache(&brw->cache,
+						     BRW_DEPTH_STENCIL_STATE,
+						     &key, sizeof(key),
+						     NULL, 0,
+						     NULL);
+
+   if (brw->cc.depth_stencil_state_bo == NULL)
+      brw->cc.depth_stencil_state_bo =
+	 depth_stencil_state_create_from_key(brw, &key);
+}
+
+const struct brw_tracked_state gen6_depth_stencil_state = {
+   .dirty = {
+      .mesa = _NEW_DEPTH | _NEW_STENCIL,
+      .brw = 0,
+      .cache = 0,
+   },
+   .prepare = prepare_depth_stencil_state,
+};
diff --git a/src/mesa/drivers/dri/i965/gen6_gs_state.c b/src/mesa/drivers/dri/i965/gen6_gs_state.c
new file mode 100644
index 0000000000..161e7b85c2
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/gen6_gs_state.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "intel_batchbuffer.h"
+
+static void
+upload_gs_state(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+
+   /* Disable all the constant buffers. */
+   BEGIN_BATCH(5);
+   OUT_BATCH(CMD_3D_CONSTANT_GS_STATE << 16 | (5 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+
+   if (brw->gs.prog_bo) {
+      BEGIN_BATCH(7);
+      OUT_BATCH(CMD_3D_GS_STATE << 16 | (7 - 2));
+      OUT_RELOC(brw->gs.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+      OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) |
+		(0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+      OUT_BATCH(0); /* scratch space base offset */
+      OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) |
+		(brw->gs.prog_data->urb_read_length << GEN6_GS_URB_READ_LENGTH_SHIFT) |
+		(0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT));
+      OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) |
+		GEN6_GS_STATISTICS_ENABLE |
+		GEN6_GS_RENDERING_ENABLE);
+      OUT_BATCH(GEN6_GS_ENABLE);
+      ADVANCE_BATCH();
+   } else {
+      BEGIN_BATCH(7);
+      OUT_BATCH(CMD_3D_GS_STATE << 16 | (7 - 2));
+      OUT_BATCH(0); /* prog_bo */
+      OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) |
+		(0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+      OUT_BATCH(0); /* scratch space base offset */
+      OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) |
+		(0 << GEN6_GS_URB_READ_LENGTH_SHIFT) |
+		(0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT));
+      OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) |
+		GEN6_GS_STATISTICS_ENABLE |
+		GEN6_GS_RENDERING_ENABLE);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+}
+
+const struct brw_tracked_state gen6_gs_state = {
+   .dirty = {
+      .mesa  = _NEW_TRANSFORM,
+      .brw   = (BRW_NEW_CURBE_OFFSETS |
+		BRW_NEW_URB_FENCE |
+		BRW_NEW_CONTEXT),
+      .cache = CACHE_NEW_GS_PROG
+   },
+   .emit = upload_gs_state,
+};
diff --git a/src/mesa/drivers/dri/i965/gen6_sampler_state.c b/src/mesa/drivers/dri/i965/gen6_sampler_state.c
new file mode 100644
index 0000000000..ab8e7516d2
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/gen6_sampler_state.c
@@ -0,0 +1,71 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "intel_batchbuffer.h"
+
+static void
+upload_sampler_state_pointers(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+
+   BEGIN_BATCH(4);
+   OUT_BATCH(CMD_3D_SAMPLER_STATE_POINTERS << 16 |
+	     VS_SAMPLER_STATE_CHANGE |
+	     GS_SAMPLER_STATE_CHANGE |
+	     PS_SAMPLER_STATE_CHANGE |
+	     (4 - 2));
+   OUT_BATCH(0); /* VS */
+   OUT_BATCH(0); /* GS */
+   if (brw->wm.sampler_bo)
+      OUT_RELOC(brw->wm.sampler_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+   else
+      OUT_BATCH(0);
+
+   ADVANCE_BATCH();
+
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+}
+
+
+static void
+prepare_sampler_state_pointers(struct brw_context *brw)
+{
+   brw_add_validated_bo(brw, brw->wm.sampler_bo);
+}
+
+const struct brw_tracked_state gen6_sampler_state = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_BATCH,
+      .cache = CACHE_NEW_SAMPLER
+   },
+   .prepare = prepare_sampler_state_pointers,
+   .emit = upload_sampler_state_pointers,
+};
diff --git a/src/mesa/drivers/dri/i965/gen6_scissor_state.c b/src/mesa/drivers/dri/i965/gen6_scissor_state.c
new file mode 100644
index 0000000000..2e21e5f733
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/gen6_scissor_state.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "intel_batchbuffer.h"
+
+static void
+prepare_scissor_state(struct brw_context *brw)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0);
+   struct gen6_scissor_state scissor;
+
+   /* _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT */
+
+   /* The scissor only needs to handle the intersection of drawable and
+    * scissor rect.  Clipping to the boundaries of static shared buffers
+    * for front/back/depth is covered by looping over cliprects in brw_draw.c.
+    *
+    * Note that the hardware's coordinates are inclusive, while Mesa's min is
+    * inclusive but max is exclusive.
+    */
+   if (render_to_fbo) {
+      /* texmemory: Y=0=bottom */
+      scissor.xmin = ctx->DrawBuffer->_Xmin;
+      scissor.xmax = ctx->DrawBuffer->_Xmax - 1;
+      scissor.ymin = ctx->DrawBuffer->_Ymin;
+      scissor.ymax = ctx->DrawBuffer->_Ymax - 1;
+   }
+   else {
+      /* memory: Y=0=top */
+      scissor.xmin = ctx->DrawBuffer->_Xmin;
+      scissor.xmax = ctx->DrawBuffer->_Xmax - 1;
+      scissor.ymin = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymax;
+      scissor.ymax = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymin - 1;
+   }
+
+   drm_intel_bo_unreference(brw->sf.state_bo);
+   brw->sf.state_bo = brw_cache_data(&brw->cache, BRW_SF_UNIT,
+				     &scissor, sizeof(scissor),
+				     NULL, 0);
+}
+
+const struct brw_tracked_state gen6_scissor_state = {
+   .dirty = {
+      .mesa = _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT,
+      .brw = 0,
+      .cache = 0,
+   },
+   .prepare = prepare_scissor_state,
+};
+
+static void upload_scissor_state_pointers(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+
+   BEGIN_BATCH(2);
+   OUT_BATCH(CMD_3D_SCISSOR_STATE_POINTERS << 16 | (2 - 2));
+   OUT_RELOC(brw->sf.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+   ADVANCE_BATCH();
+
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+}
+
+
+static void prepare_scissor_state_pointers(struct brw_context *brw)
+{
+   brw_add_validated_bo(brw, brw->sf.state_bo);
+}
+
+const struct brw_tracked_state gen6_scissor_state_pointers = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_BATCH,
+      .cache = CACHE_NEW_SF_UNIT
+   },
+   .prepare = prepare_scissor_state_pointers,
+   .emit = upload_scissor_state_pointers,
+};
diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c
new file mode 100644
index 0000000000..8d96b44f1d
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c
@@ -0,0 +1,187 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_util.h"
+#include "main/macros.h"
+#include "intel_batchbuffer.h"
+
+static uint32_t
+get_attr_override(struct brw_context *brw, int attr)
+{
+   uint32_t attr_override;
+   int attr_index = 0, i;
+
+   /* Find the source index (0 = first attribute after the 4D position)
+    * for this output attribute.  attr is currently a VERT_RESULT_* but should
+    * be FRAG_ATTRIB_*.
+    */
+   for (i = 0; i < attr; i++) {
+      if (brw->vs.prog_data->outputs_written & BITFIELD64_BIT(i))
+	 attr_index++;
+   }
+   attr_override = attr_index;
+
+   return attr_index;
+}
+
+static void
+upload_sf_state(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+   GLcontext *ctx = &intel->ctx;
+   /* CACHE_NEW_VS_PROG */
+   uint32_t num_inputs = brw_count_bits(brw->vs.prog_data->outputs_written);
+   /* This should probably be FS inputs read */
+   uint32_t num_outputs = brw_count_bits(brw->vs.prog_data->outputs_written);
+   uint32_t dw1, dw2, dw3, dw4;
+   int i;
+   /* _NEW_BUFFER */
+   GLboolean render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
+   int attr = 0;
+
+   dw1 =
+      num_outputs << GEN6_SF_NUM_OUTPUTS_SHIFT |
+      (num_inputs + 1) / 2 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
+      3 << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT;
+   dw2 = GEN6_SF_VIEWPORT_TRANSFORM_ENABLE |
+      GEN6_SF_STATISTICS_ENABLE;
+   dw3 = 0;
+   dw4 = 0;
+
+   /* _NEW_POLYGON */
+   if ((ctx->Polygon.FrontFace == GL_CCW) ^ render_to_fbo)
+      dw2 |= GEN6_SF_WINDING_CCW;
+
+   /* _NEW_SCISSOR */
+   if (ctx->Scissor.Enabled)
+      dw3 |= GEN6_SF_SCISSOR_ENABLE;
+
+   /* _NEW_POLYGON */
+   if (ctx->Polygon.CullFlag) {
+      switch (ctx->Polygon.CullFaceMode) {
+      case GL_FRONT:
+	 dw3 |= GEN6_SF_CULL_BOTH;
+	 break;
+      case GL_BACK:
+	 dw3 |= GEN6_SF_CULL_BACK;
+	 break;
+      case GL_FRONT_AND_BACK:
+	 dw3 |= GEN6_SF_CULL_BOTH;
+	 break;
+      default:
+	 assert(0);
+	 break;
+      }
+   } else {
+      dw3 |= GEN6_SF_CULL_NONE;
+   }
+
+   /* _NEW_LINE */
+   dw3 |= U_FIXED(CLAMP(ctx->Line.Width, 0.0, 7.99), 7) <<
+      GEN6_SF_LINE_WIDTH_SHIFT;
+   if (ctx->Line.SmoothFlag) {
+      dw3 |= GEN6_SF_LINE_AA_ENABLE;
+      dw3 |= GEN6_SF_LINE_AA_MODE_TRUE;
+      dw3 |= GEN6_SF_LINE_END_CAP_WIDTH_1_0;
+   }
+
+   /* _NEW_POINT */
+   if (ctx->Point._Attenuated)
+      dw4 |= GEN6_SF_USE_STATE_POINT_WIDTH;
+
+   dw4 |= U_FIXED(CLAMP(ctx->Point.Size, 0.125, 225.875), 3) <<
+      GEN6_SF_POINT_WIDTH_SHIFT;
+   if (render_to_fbo)
+      dw1 |= GEN6_SF_POINT_SPRITE_LOWERLEFT;
+
+   /* _NEW_LIGHT */
+   if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) {
+      dw4 |=
+	 (2 << GEN6_SF_TRI_PROVOKE_SHIFT) |
+	 (2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT) |
+	 (1 << GEN6_SF_LINE_PROVOKE_SHIFT);
+   } else {
+      dw4 |=
+	 (1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT);
+   }
+
+   BEGIN_BATCH(20);
+   OUT_BATCH(CMD_3D_SF_STATE << 16 | (20 - 2));
+   OUT_BATCH(dw1);
+   OUT_BATCH(dw2);
+   OUT_BATCH(dw3);
+   OUT_BATCH(dw4);
+   OUT_BATCH_F(ctx->Polygon.OffsetUnits * 2); /* constant.  copied from gen4 */
+   OUT_BATCH_F(ctx->Polygon.OffsetFactor); /* scale */
+   OUT_BATCH_F(0.0); /* XXX: global depth offset clamp */
+   for (i = 0; i < 8; i++) {
+      uint32_t attr_overrides = 0;
+
+      /* These should be generating FS inputs read instead of VS
+       * outputs written
+       */
+      for (; attr < 64; attr++) {
+	 if (brw->vs.prog_data->outputs_written & BITFIELD64_BIT(attr)) {
+	    attr_overrides |= get_attr_override(brw, attr);
+	    attr++;
+	    break;
+	 }
+      }
+
+      for (; attr < 64; attr++) {
+	 if (brw->vs.prog_data->outputs_written & BITFIELD64_BIT(attr)) {
+	    attr_overrides |= get_attr_override(brw, attr) << 16;
+	    attr++;
+	    break;
+	 }
+      }
+      OUT_BATCH(attr_overrides);
+   }
+   OUT_BATCH(0); /* point sprite texcoord bitmask */
+   OUT_BATCH(0); /* constant interp bitmask */
+   OUT_BATCH(0); /* wrapshortest enables 0-7 */
+   OUT_BATCH(0); /* wrapshortest enables 8-15 */
+   ADVANCE_BATCH();
+
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+}
+
+const struct brw_tracked_state gen6_sf_state = {
+   .dirty = {
+      .mesa  = (_NEW_LIGHT |
+		_NEW_POLYGON |
+		_NEW_LINE |
+		_NEW_SCISSOR |
+		_NEW_BUFFERS),
+      .brw   = BRW_NEW_CONTEXT,
+      .cache = CACHE_NEW_VS_PROG
+   },
+   .emit = upload_sf_state,
+};
diff --git a/src/mesa/drivers/dri/i965/gen6_urb.c b/src/mesa/drivers/dri/i965/gen6_urb.c
new file mode 100644
index 0000000000..5445e4035a
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/gen6_urb.c
@@ -0,0 +1,83 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "main/macros.h"
+#include "intel_batchbuffer.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+static void
+prepare_urb( struct brw_context *brw )
+{
+   brw->urb.nr_vs_entries = 24;
+   if (brw->gs.prog_bo)
+      brw->urb.nr_gs_entries = 4;
+   else
+      brw->urb.nr_gs_entries = 0;
+   /* CACHE_NEW_VS_PROG */
+   brw->urb.vs_size = MIN2(brw->vs.prog_data->urb_entry_size, 1);
+
+   /* Check that the number of URB rows (8 floats each) allocated is less
+    * than the URB space.
+    */
+   assert((brw->urb.nr_vs_entries +
+	   brw->urb.nr_gs_entries) * brw->urb.vs_size * 8 < 64 * 1024);
+}
+
+static void
+upload_urb(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+
+   assert(brw->urb.nr_vs_entries % 4 == 0);
+   assert(brw->urb.nr_gs_entries % 4 == 0);
+   /* GS requirement */
+   assert(!brw->gs.prog_bo || brw->urb.vs_size < 5);
+
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+
+   BEGIN_BATCH(3);
+   OUT_BATCH(CMD_URB << 16 | (3 - 2));
+   OUT_BATCH(((brw->urb.vs_size - 1) << GEN6_URB_VS_SIZE_SHIFT) |
+	     ((brw->urb.nr_vs_entries) << GEN6_URB_VS_ENTRIES_SHIFT));
+   OUT_BATCH(((brw->urb.vs_size - 1) << GEN6_URB_GS_SIZE_SHIFT) |
+	     ((brw->urb.nr_gs_entries) << GEN6_URB_GS_ENTRIES_SHIFT));
+   ADVANCE_BATCH();
+
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+}
+
+const struct brw_tracked_state gen6_urb = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_CONTEXT,
+      .cache = CACHE_NEW_VS_PROG,
+   },
+   .prepare = prepare_urb,
+   .emit = upload_urb,
+};
diff --git a/src/mesa/drivers/dri/i965/gen6_viewport_state.c b/src/mesa/drivers/dri/i965/gen6_viewport_state.c
new file mode 100644
index 0000000000..0c2aa4206c
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/gen6_viewport_state.c
@@ -0,0 +1,173 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "intel_batchbuffer.h"
+#include "main/macros.h"
+
+/* The clip VP defines the guardband region where expensive clipping is skipped
+ * and fragments are allowed to be generated and clipped out cheaply by the SF.
+ *
+ * By setting it to NDC bounds of [-1,1], we don't do GB clipping.  It's
+ * supposed to cause seams to become visible in apps due to shared edges taking
+ * different clip/no clip paths depending on whether the rest of the prim ends
+ * up in the guardband or not.
+ */
+static void
+prepare_clip_vp(struct brw_context *brw)
+{
+   struct brw_clipper_viewport vp;
+
+   vp.xmin = -1.0;
+   vp.xmax = 1.0;
+   vp.ymin = -1.0;
+   vp.ymax = 1.0;
+
+   drm_intel_bo_unreference(brw->clip.vp_bo);
+   brw->clip.vp_bo = brw_cache_data(&brw->cache, BRW_CLIP_VP,
+				    &vp, sizeof(vp),
+				    NULL, 0);
+}
+
+const struct brw_tracked_state gen6_clip_vp = {
+   .dirty = {
+      .mesa = _NEW_VIEWPORT, /* XXX: not really, but we need nonzero */
+      .brw = 0,
+      .cache = 0,
+   },
+   .prepare = prepare_clip_vp,
+};
+
+static void
+prepare_sf_vp(struct brw_context *brw)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
+   struct brw_sf_viewport sfv;
+   GLfloat y_scale, y_bias;
+   const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0);
+   const GLfloat *v = ctx->Viewport._WindowMap.m;
+
+   memset(&sfv, 0, sizeof(sfv));
+
+   /* _NEW_BUFFERS */
+   if (render_to_fbo) {
+      y_scale = 1.0;
+      y_bias = 0;
+   } else {
+      y_scale = -1.0;
+      y_bias = ctx->DrawBuffer->Height;
+   }
+
+   /* _NEW_VIEWPORT */
+   sfv.viewport.m00 = v[MAT_SX];
+   sfv.viewport.m11 = v[MAT_SY] * y_scale;
+   sfv.viewport.m22 = v[MAT_SZ] * depth_scale;
+   sfv.viewport.m30 = v[MAT_TX];
+   sfv.viewport.m31 = v[MAT_TY] * y_scale + y_bias;
+   sfv.viewport.m32 = v[MAT_TZ] * depth_scale;
+
+   drm_intel_bo_unreference(brw->sf.vp_bo);
+   brw->sf.vp_bo = brw_cache_data(&brw->cache, BRW_SF_VP,
+				  &sfv, sizeof(sfv),
+				  NULL, 0);
+}
+
+const struct brw_tracked_state gen6_sf_vp = {
+   .dirty = {
+      .mesa = _NEW_VIEWPORT | _NEW_BUFFERS,
+      .brw = 0,
+      .cache = 0,
+   },
+   .prepare = prepare_sf_vp,
+};
+
+static void
+prepare_cc_vp(struct brw_context *brw)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   struct brw_cc_viewport ccv;
+
+   /* _NEW_TRANSOFORM */
+   if (ctx->Transform.DepthClamp) {
+      /* _NEW_VIEWPORT */
+      ccv.min_depth = MIN2(ctx->Viewport.Near, ctx->Viewport.Far);
+      ccv.max_depth = MAX2(ctx->Viewport.Near, ctx->Viewport.Far);
+   } else {
+      ccv.min_depth = 0.0;
+      ccv.max_depth = 1.0;
+   }
+
+   drm_intel_bo_unreference(brw->cc.vp_bo);
+   brw->cc.vp_bo = brw_cache_data(&brw->cache, BRW_CC_VP, &ccv, sizeof(ccv),
+				  NULL, 0);
+}
+
+const struct brw_tracked_state gen6_cc_vp = {
+   .dirty = {
+      .mesa = _NEW_VIEWPORT | _NEW_TRANSFORM,
+      .brw = 0,
+      .cache = 0,
+   },
+   .prepare = prepare_cc_vp,
+};
+
+static void prepare_viewport_state_pointers(struct brw_context *brw)
+{
+   brw_add_validated_bo(brw, brw->sf.state_bo);
+}
+
+static void upload_viewport_state_pointers(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+
+   BEGIN_BATCH(4);
+   OUT_BATCH(CMD_VIEWPORT_STATE_POINTERS << 16 | (4 - 2) |
+	     GEN6_CC_VIEWPORT_MODIFY |
+	     GEN6_SF_VIEWPORT_MODIFY |
+	     GEN6_CLIP_VIEWPORT_MODIFY);
+   OUT_RELOC(brw->clip.vp_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+   OUT_RELOC(brw->sf.vp_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+   OUT_RELOC(brw->cc.vp_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+   ADVANCE_BATCH();
+
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+}
+
+const struct brw_tracked_state gen6_viewport_state = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_BATCH,
+      .cache = (CACHE_NEW_CLIP_VP |
+		CACHE_NEW_SF_VP |
+		CACHE_NEW_CC_VP)
+   },
+   .prepare = prepare_viewport_state_pointers,
+   .emit = upload_viewport_state_pointers,
+};
diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c
new file mode 100644
index 0000000000..fe597dfb94
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
@@ -0,0 +1,119 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_util.h"
+#include "shader/prog_parameter.h"
+#include "shader/prog_statevars.h"
+#include "intel_batchbuffer.h"
+
+static void
+upload_vs_state(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+   GLcontext *ctx = &intel->ctx;
+   const struct brw_vertex_program *vp =
+      brw_vertex_program_const(brw->vertex_program);
+   unsigned int nr_params = vp->program.Base.Parameters->NumParameters;
+   drm_intel_bo *constant_bo;
+   int i;
+
+   if (vp->use_const_buffer || nr_params == 0) {
+      /* Disable the push constant buffers. */
+      BEGIN_BATCH(5);
+      OUT_BATCH(CMD_3D_CONSTANT_VS_STATE << 16 | (5 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   } else {
+      if (brw->vertex_program->IsNVProgram)
+	 _mesa_load_tracked_matrices(ctx);
+
+      /* Updates the ParamaterValues[i] pointers for all parameters of the
+       * basic type of PROGRAM_STATE_VAR.
+       */
+      _mesa_load_state_parameters(ctx, vp->program.Base.Parameters);
+
+      constant_bo = drm_intel_bo_alloc(intel->bufmgr, "VS constant_bo",
+				       nr_params * 4 * sizeof(float),
+				       4096);
+      drm_intel_gem_bo_map_gtt(constant_bo);
+      for (i = 0; i < nr_params; i++) {
+	 memcpy((char *)constant_bo->virtual + i * 4 * sizeof(float),
+		vp->program.Base.Parameters->ParameterValues[i],
+		4 * sizeof(float));
+      }
+      drm_intel_gem_bo_unmap_gtt(constant_bo);
+
+      BEGIN_BATCH(5);
+      OUT_BATCH(CMD_3D_CONSTANT_VS_STATE << 16 |
+		GEN6_CONSTANT_BUFFER_0_ENABLE |
+		(5 - 2));
+      OUT_RELOC(constant_bo,
+		I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */
+		ALIGN(nr_params, 2) / 2 - 1);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+
+      drm_intel_bo_unreference(constant_bo);
+   }
+
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+
+   BEGIN_BATCH(6);
+   OUT_BATCH(CMD_3D_VS_STATE << 16 | (6 - 2));
+   OUT_RELOC(brw->vs.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+   OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) |
+	     (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+   OUT_BATCH(0); /* scratch space base offset */
+   OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) |
+	     (brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
+	     (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT));
+   OUT_BATCH((0 << GEN6_VS_MAX_THREADS_SHIFT) |
+	     GEN6_VS_STATISTICS_ENABLE);
+   ADVANCE_BATCH();
+
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+}
+
+const struct brw_tracked_state gen6_vs_state = {
+   .dirty = {
+      .mesa  = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS,
+      .brw   = (BRW_NEW_CURBE_OFFSETS |
+                BRW_NEW_NR_VS_SURFACES |
+		BRW_NEW_URB_FENCE |
+		BRW_NEW_CONTEXT),
+      .cache = CACHE_NEW_VS_PROG
+   },
+   .emit = upload_vs_state,
+};
diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c
new file mode 100644
index 0000000000..1eb17ca627
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_util.h"
+#include "shader/prog_parameter.h"
+#include "shader/prog_statevars.h"
+#include "intel_batchbuffer.h"
+
+static void
+upload_wm_state(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+   GLcontext *ctx = &intel->ctx;
+   const struct brw_fragment_program *fp =
+      brw_fragment_program_const(brw->fragment_program);
+   unsigned int nr_params = fp->program.Base.Parameters->NumParameters;
+   drm_intel_bo *constant_bo;
+   int i;
+   uint32_t dw2, dw4, dw5, dw6;
+
+   if (fp->use_const_buffer || nr_params == 0) {
+      /* Disable the push constant buffers. */
+      BEGIN_BATCH(5);
+      OUT_BATCH(CMD_3D_CONSTANT_PS_STATE << 16 | (5 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   } else {
+      /* Updates the ParamaterValues[i] pointers for all parameters of the
+       * basic type of PROGRAM_STATE_VAR.
+       */
+      _mesa_load_state_parameters(ctx, fp->program.Base.Parameters);
+
+      constant_bo = drm_intel_bo_alloc(intel->bufmgr, "WM constant_bo",
+				       nr_params * 4 * sizeof(float),
+				       4096);
+      drm_intel_gem_bo_map_gtt(constant_bo);
+      for (i = 0; i < nr_params; i++) {
+	 memcpy((char *)constant_bo->virtual + i * 4 * sizeof(float),
+		fp->program.Base.Parameters->ParameterValues[i],
+		4 * sizeof(float));
+      }
+      drm_intel_gem_bo_unmap_gtt(constant_bo);
+
+      BEGIN_BATCH(5);
+      OUT_BATCH(CMD_3D_CONSTANT_PS_STATE << 16 |
+		GEN6_CONSTANT_BUFFER_0_ENABLE |
+		(5 - 2));
+      OUT_RELOC(constant_bo,
+		I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */
+		ALIGN(nr_params, 2) / 2 - 1);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+
+      drm_intel_bo_unreference(constant_bo);
+   }
+
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+
+   dw2 = dw4 = dw5 = dw6 = 0;
+   dw4 |= GEN6_WM_STATISTICS_ENABLE;
+   dw5 |= GEN6_WM_LINE_AA_WIDTH_1_0;
+   dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5;
+
+   /* BRW_NEW_NR_SURFACES */
+   dw2 |= brw->wm.nr_surfaces << GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT;
+
+   /* CACHE_NEW_SAMPLER */
+   dw2 |= (ALIGN(brw->wm.sampler_count, 4) / 4) << GEN6_WM_SAMPLER_COUNT_SHIFT;
+   dw4 |= (1 << GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
+
+   dw5 |= (40 - 1) << GEN6_WM_MAX_THREADS_SHIFT;
+   dw5 |= GEN6_WM_DISPATCH_ENABLE;
+
+   /* BRW_NEW_FRAGMENT_PROGRAM */
+   if (fp->isGLSL)
+      dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
+   else
+      dw5 |= GEN6_WM_16_DISPATCH_ENABLE;
+
+   /* _NEW_LINE */
+   if (ctx->Line.StippleFlag)
+      dw5 |= GEN6_WM_LINE_STIPPLE_ENABLE;
+
+   /* _NEW_POLYGONSTIPPLE */
+   if (ctx->Polygon.StippleFlag)
+      dw5 |= GEN6_WM_POLYGON_STIPPLE_ENABLE;
+
+   /* BRW_NEW_FRAGMENT_PROGRAM */
+   if (fp->program.Base.InputsRead & (1 << FRAG_ATTRIB_WPOS))
+      dw5 |= GEN6_WM_USES_SOURCE_DEPTH | GEN6_WM_USES_SOURCE_W;
+   if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
+      dw5 |= GEN6_WM_COMPUTED_DEPTH;
+
+   /* _NEW_COLOR */
+   if (fp->program.UsesKill || ctx->Color.AlphaEnabled)
+      dw5 |= GEN6_WM_KILL_ENABLE;
+
+   /* This should probably be FS inputs read */
+   dw6 |= brw_count_bits(brw->vs.prog_data->outputs_written) <<
+      GEN6_WM_NUM_SF_OUTPUTS_SHIFT;
+
+   BEGIN_BATCH(9);
+   OUT_BATCH(CMD_3D_WM_STATE << 16 | (9 - 2));
+   OUT_RELOC(brw->wm.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+   OUT_BATCH(dw2);
+   OUT_BATCH(0); /* scratch space base offset */
+   OUT_BATCH(dw4);
+   OUT_BATCH(dw5);
+   OUT_BATCH(dw6);
+   OUT_BATCH(0); /* kernel 1 pointer */
+   OUT_BATCH(0); /* kernel 2 pointer */
+   ADVANCE_BATCH();
+
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+}
+
+const struct brw_tracked_state gen6_wm_state = {
+   .dirty = {
+      .mesa  = _NEW_LINE | _NEW_POLYGONSTIPPLE | _NEW_COLOR,
+      .brw   = (BRW_NEW_CURBE_OFFSETS |
+		BRW_NEW_FRAGMENT_PROGRAM |
+                BRW_NEW_NR_WM_SURFACES |
+		BRW_NEW_URB_FENCE |
+		BRW_NEW_BATCH),
+      .cache = CACHE_NEW_SAMPLER
+   },
+   .emit = upload_wm_state,
+};
diff --git a/src/mesa/drivers/dri/i965/server/intel_dri.c b/src/mesa/drivers/dri/i965/server/intel_dri.c
deleted file mode 120000
index effdd26448..0000000000
--- a/src/mesa/drivers/dri/i965/server/intel_dri.c
+++ /dev/null
@@ -1 +0,0 @@
-../../intel/server/intel_dri.c
-\ No newline at end of file
author	Zack Rusin <zackr@vmware.com>	2010-03-15 15:24:38 -0400
committer	Zack Rusin <zackr@vmware.com>	2010-03-15 15:24:38 -0400
commit	275c4bd3643d773210780cb8d578ca84f2604684 (patch)
tree	8266edc39d4253ac0f2a0ecd41f560f3d815bb5c /src/mesa/drivers/dri/i965
parent	c5c5cd7132e18f4aad8e73d8ee879f8823c4c1e7 (diff)
parent	d0b35352ed27b1e66785c45ee95a352ed06b47ce (diff)