summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/i965
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2010-07-26 17:47:59 -0700
committerEric Anholt <eric@anholt.net>2010-07-26 17:53:27 -0700
commitafe125e0a18ac3886c45c7e6b02b122fb2d327b5 (patch)
tree78621707e71154c0b388b0baacffc26432b7e992 /src/mesa/drivers/dri/i965
parentd64343f1ae84979bd154475badf11af8a9bfc2eb (diff)
parent5403ca79b225605c79f49866a6497c97da53be3b (diff)
Merge remote branch 'origin/master' into glsl2
This pulls in multiple i965 driver fixes which will help ensure better testing coverage during development, and also gets past the conflicts of the src/mesa/shader -> src/mesa/program move. Conflicts: src/mesa/Makefile src/mesa/main/shaderapi.c src/mesa/main/shaderobj.h
Diffstat (limited to 'src/mesa/drivers/dri/i965')
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip.c25
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip.h5
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip_line.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip_point.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip_tri.c7
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip_unfilled.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip_util.c20
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.c1
-rw-r--r--src/mesa/drivers/dri/i965/brw_curbe.c6
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h39
-rw-r--r--src/mesa/drivers/dri/i965/brw_disasm.c70
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.h29
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c188
-rw-r--r--src/mesa/drivers/dri/i965/brw_gs_emit.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_optimize.c592
-rw-r--r--src/mesa/drivers/dri/i965/brw_program.c37
-rw-r--r--src/mesa/drivers/dri/i965/brw_sf.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_sf_state.c7
-rw-r--r--src/mesa/drivers/dri/i965/brw_structs.h33
-rw-r--r--src/mesa/drivers/dri/i965/brw_util.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_emit.c225
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_surface_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_emit.c14
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_fp.c6
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_glsl.c6
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_pass0.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_surface_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/gen6_sf_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/gen6_vs_state.c4
-rw-r--r--src/mesa/drivers/dri/i965/gen6_wm_state.c4
34 files changed, 1092 insertions, 256 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_clip.c b/src/mesa/drivers/dri/i965/brw_clip.c
index 228ee3f3be..a1e9dae915 100644
--- a/src/mesa/drivers/dri/i965/brw_clip.c
+++ b/src/mesa/drivers/dri/i965/brw_clip.c
@@ -55,6 +55,7 @@ static void compile_clip_prog( struct brw_context *brw,
GLuint program_size;
GLuint delta;
GLuint i;
+ GLuint header_regs;
memset(&c, 0, sizeof(c));
@@ -72,22 +73,28 @@ static void compile_clip_prog( struct brw_context *brw,
c.header_position_offset = ATTR_SIZE;
if (intel->gen == 5)
- delta = 3 * REG_SIZE;
+ header_regs = 3;
else
- delta = REG_SIZE;
+ header_regs = 1;
- for (i = 0; i < VERT_RESULT_MAX; i++)
+ delta = header_regs * REG_SIZE;
+
+ for (i = 0; i < VERT_RESULT_MAX; i++) {
if (c.key.attrs & BITFIELD64_BIT(i)) {
c.offset[i] = delta;
delta += ATTR_SIZE;
+
+ c.idx_to_attr[c.nr_attrs] = i;
+ c.nr_attrs++;
}
+ }
- c.nr_attrs = brw_count_bits(c.key.attrs);
-
- if (intel->gen == 5)
- c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */
- else
- c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */
+ /* The vertex attributes start at a URB row-aligned offset after
+ * the 8-20 dword vertex header, and continue for a URB row-aligned
+ * length. nr_regs determines the urb_read_length from the start
+ * of the header to the end of the vertex data.
+ */
+ c.nr_regs = header_regs + (c.nr_attrs + 1) / 2;
c.nr_bytes = c.nr_regs * REG_SIZE;
diff --git a/src/mesa/drivers/dri/i965/brw_clip.h b/src/mesa/drivers/dri/i965/brw_clip.h
index 68222c6c27..3a8cd7bf39 100644
--- a/src/mesa/drivers/dri/i965/brw_clip.h
+++ b/src/mesa/drivers/dri/i965/brw_clip.h
@@ -115,7 +115,10 @@ struct brw_clip_compile {
GLboolean need_direction;
GLuint header_position_offset;
- GLuint offset[VERT_ATTRIB_MAX];
+ /** Mapping from VERT_RESULT_* to offset within the VUE. */
+ GLuint offset[VERT_RESULT_MAX];
+ /** Mapping from attribute index to VERT_RESULT_* */
+ GLuint idx_to_attr[VERT_RESULT_MAX];
};
#define ATTR_SIZE (4*4)
diff --git a/src/mesa/drivers/dri/i965/brw_clip_line.c b/src/mesa/drivers/dri/i965/brw_clip_line.c
index ceb62a3116..4b9117bb0b 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_line.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_line.c
@@ -32,7 +32,7 @@
#include "main/glheader.h"
#include "main/macros.h"
#include "main/enums.h"
-#include "shader/program.h"
+#include "program/program.h"
#include "intel_batchbuffer.h"
diff --git a/src/mesa/drivers/dri/i965/brw_clip_point.c b/src/mesa/drivers/dri/i965/brw_clip_point.c
index 7f47634dca..b994a32bc3 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_point.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_point.c
@@ -32,7 +32,7 @@
#include "main/glheader.h"
#include "main/macros.h"
#include "main/enums.h"
-#include "shader/program.h"
+#include "program/program.h"
#include "intel_batchbuffer.h"
diff --git a/src/mesa/drivers/dri/i965/brw_clip_tri.c b/src/mesa/drivers/dri/i965/brw_clip_tri.c
index 916a99ea00..cb58d1da9f 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_tri.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_tri.c
@@ -32,7 +32,7 @@
#include "main/glheader.h"
#include "main/macros.h"
#include "main/enums.h"
-#include "shader/program.h"
+#include "program/program.h"
#include "intel_batchbuffer.h"
@@ -76,10 +76,7 @@ void brw_clip_tri_alloc_regs( struct brw_clip_compile *c,
if (c->nr_attrs & 1) {
for (j = 0; j < 3; j++) {
- GLuint delta = c->nr_attrs*16 + 32;
-
- if (intel->gen == 5)
- delta = c->nr_attrs * 16 + 32 * 3;
+ GLuint delta = c->offset[c->idx_to_attr[c->nr_attrs - 1]] + ATTR_SIZE;
brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0));
}
diff --git a/src/mesa/drivers/dri/i965/brw_clip_unfilled.c b/src/mesa/drivers/dri/i965/brw_clip_unfilled.c
index f36d22fdbf..afd93f8be0 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_unfilled.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_unfilled.c
@@ -32,7 +32,7 @@
#include "main/glheader.h"
#include "main/macros.h"
#include "main/enums.h"
-#include "shader/program.h"
+#include "program/program.h"
#include "intel_batchbuffer.h"
diff --git a/src/mesa/drivers/dri/i965/brw_clip_util.c b/src/mesa/drivers/dri/i965/brw_clip_util.c
index 2148bc8244..d2ac1235e4 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_util.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_util.c
@@ -33,7 +33,7 @@
#include "main/glheader.h"
#include "main/macros.h"
#include "main/enums.h"
-#include "shader/program.h"
+#include "program/program.h"
#include "intel_batchbuffer.h"
@@ -134,7 +134,6 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c,
GLboolean force_edgeflag)
{
struct brw_compile *p = &c->func;
- struct intel_context *intel = &p->brw->intel;
struct brw_reg tmp = get_tmp(c);
GLuint i;
@@ -149,12 +148,9 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c,
/* Iterate over each attribute (could be done in pairs?)
*/
for (i = 0; i < c->nr_attrs; i++) {
- GLuint delta = i*16 + 32;
+ GLuint delta = c->offset[c->idx_to_attr[i]];
- if (intel->gen == 5)
- delta = i * 16 + 32 * 3;
-
- if (delta == c->offset[VERT_RESULT_EDGE]) {
+ if (c->idx_to_attr[i] == VERT_RESULT_EDGE) {
if (force_edgeflag)
brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1));
else
@@ -183,10 +179,7 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c,
}
if (i & 1) {
- GLuint delta = i*16 + 32;
-
- if (intel->gen == 5)
- delta = i * 16 + 32 * 3;
+ GLuint delta = c->offset[c->idx_to_attr[c->nr_attrs - 1]] + ATTR_SIZE;
brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0));
}
@@ -199,11 +192,6 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c,
brw_clip_project_vertex(c, dest_ptr );
}
-
-
-
-#define MAX_MRF 16
-
void brw_clip_emit_vue(struct brw_clip_compile *c,
struct brw_indirect vert,
GLboolean allocate,
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index d13b9ae298..6d064b822e 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -34,7 +34,6 @@
#include "main/api_noop.h"
#include "main/macros.h"
#include "main/simple_list.h"
-
#include "brw_context.h"
#include "brw_defines.h"
#include "brw_draw.h"
diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c
index 6c0b79f724..8196d8ca62 100644
--- a/src/mesa/drivers/dri/i965/brw_curbe.c
+++ b/src/mesa/drivers/dri/i965/brw_curbe.c
@@ -35,9 +35,9 @@
#include "main/context.h"
#include "main/macros.h"
#include "main/enums.h"
-#include "shader/prog_parameter.h"
-#include "shader/prog_print.h"
-#include "shader/prog_statevars.h"
+#include "program/prog_parameter.h"
+#include "program/prog_print.h"
+#include "program/prog_statevars.h"
#include "intel_batchbuffer.h"
#include "intel_regions.h"
#include "brw_context.h"
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 39bf5b63fc..f7a68cead7 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -501,6 +501,10 @@
#define BRW_MASK_ENABLE 0
#define BRW_MASK_DISABLE 1
+/* Sandybridge is WECtrl (Write enable control) */
+#define BRW_WE_NORMAL 0
+#define BRW_WE_KILL_PRED 1
+
#define BRW_OPCODE_MOV 1
#define BRW_OPCODE_SEL 2
#define BRW_OPCODE_NOT 4
@@ -600,6 +604,8 @@
#define BRW_ARF_NOTIFICATION_COUNT 0x90
#define BRW_ARF_IP 0xA0
+#define BRW_MRF_COMPR4 (1 << 7)
+
#define BRW_AMASK 0
#define BRW_IMASK 1
#define BRW_LMASK 2
@@ -646,13 +652,14 @@
#define BRW_POLYGON_FACING_BACK 1
#define BRW_MESSAGE_TARGET_NULL 0
-#define BRW_MESSAGE_TARGET_MATH 1
+#define BRW_MESSAGE_TARGET_MATH 1 /* reserved on GEN6 */
#define BRW_MESSAGE_TARGET_SAMPLER 2
#define BRW_MESSAGE_TARGET_GATEWAY 3
-#define BRW_MESSAGE_TARGET_DATAPORT_READ 4
-#define BRW_MESSAGE_TARGET_DATAPORT_WRITE 5
+#define BRW_MESSAGE_TARGET_DATAPORT_READ 4 /* sampler cache on GEN6 */
+#define BRW_MESSAGE_TARGET_DATAPORT_WRITE 5 /* render cache on Gen6 */
#define BRW_MESSAGE_TARGET_URB 6
#define BRW_MESSAGE_TARGET_THREAD_SPAWNER 7
+#define BRW_MESSAGE_TARGET_CONST_CACHE 9 /* GEN6 */
#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32 0
#define BRW_SAMPLER_RETURN_FORMAT_UINT32 2
@@ -698,10 +705,24 @@
#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2
#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3
+/* This one stays the same across generations. */
#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0
+/* GEN4 */
#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1
-#define BRW_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ 2
+#define BRW_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 2
#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3
+/* G45, GEN5 */
+#define G45_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1
+#define G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2
+#define G45_DATAPORT_READ_MESSAGE_AVC_LOOP_FILTER_READ 3
+#define G45_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4
+#define G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6
+/* GEN6 */
+#define GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1
+#define GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2
+#define GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4
+#define GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ 5
+#define GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6
#define BRW_DATAPORT_READ_TARGET_DATA_CACHE 0
#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE 1
@@ -721,6 +742,16 @@
#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5
#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7
+/* GEN6 */
+#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE_GEN6 7
+#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE_GEN6 8
+#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE_GEN6 9
+#define BRW_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE_GEN6 10
+#define BRW_DATAPORT_WRITE_MESSAGE_DWORLD_SCATTERED_WRITE_GEN6 11
+#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE_GEN6 12
+#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE_GEN6 13
+#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE_GEN6 14
+
#define BRW_MATH_FUNCTION_INV 1
#define BRW_MATH_FUNCTION_LOG 2
#define BRW_MATH_FUNCTION_EXP 3
diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c
index ff12daf497..d230714536 100644
--- a/src/mesa/drivers/dri/i965/brw_disasm.c
+++ b/src/mesa/drivers/dri/i965/brw_disasm.c
@@ -598,7 +598,7 @@ static int src_da16 (FILE *file,
format (file, ".%d", _subreg_nr);
string (file, "<");
err |= control (file, "vert stride", vert_stride, _vert_stride, NULL);
- string (file, ",1,1>");
+ string (file, ",4,1>");
err |= control (file, "src da16 reg type", reg_encoding, _reg_type, NULL);
/*
* Three kinds of swizzle display:
@@ -836,10 +836,12 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen)
if (inst->header.opcode == BRW_OPCODE_SEND) {
int target;
- if (gen >= 5)
- target = inst->bits2.send_gen5.sfid;
+ if (gen >= 6)
+ target = inst->header.destreg__conditionalmod;
+ else if (gen == 5)
+ target = inst->bits2.send_gen5.sfid;
else
- target = inst->bits3.generic.msg_target;
+ target = inst->bits3.generic.msg_target;
newline (file);
pad (file, 16);
@@ -868,13 +870,44 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen)
inst->bits3.sampler.return_format, NULL);
string (file, ")");
break;
+ case BRW_MESSAGE_TARGET_DATAPORT_READ:
+ if (gen >= 6) {
+ format (file, " (%d, %d, %d, %d, %d, %d)",
+ inst->bits3.dp_render_cache.binding_table_index,
+ inst->bits3.dp_render_cache.msg_control,
+ inst->bits3.dp_render_cache.msg_type,
+ inst->bits3.dp_render_cache.send_commit_msg,
+ inst->bits3.dp_render_cache.msg_length,
+ inst->bits3.dp_render_cache.response_length);
+ } else if (gen >= 5) {
+ format (file, " (%d, %d, %d)",
+ inst->bits3.dp_read_gen5.binding_table_index,
+ inst->bits3.dp_read_gen5.msg_control,
+ inst->bits3.dp_read_gen5.msg_type);
+ } else {
+ format (file, " (%d, %d, %d)",
+ inst->bits3.dp_read.binding_table_index,
+ inst->bits3.dp_read.msg_control,
+ inst->bits3.dp_read.msg_type);
+ }
+ break;
case BRW_MESSAGE_TARGET_DATAPORT_WRITE:
- format (file, " (%d, %d, %d, %d)",
- inst->bits3.dp_write.binding_table_index,
- (inst->bits3.dp_write.pixel_scoreboard_clear << 3) |
- inst->bits3.dp_write.msg_control,
- inst->bits3.dp_write.msg_type,
- inst->bits3.dp_write.send_commit_msg);
+ if (gen >= 6) {
+ format (file, " (%d, %d, %d, %d, %d, %d)",
+ inst->bits3.dp_render_cache.binding_table_index,
+ inst->bits3.dp_render_cache.msg_control,
+ inst->bits3.dp_render_cache.msg_type,
+ inst->bits3.dp_render_cache.send_commit_msg,
+ inst->bits3.dp_render_cache.msg_length,
+ inst->bits3.dp_render_cache.response_length);
+ } else {
+ format (file, " (%d, %d, %d, %d)",
+ inst->bits3.dp_write.binding_table_index,
+ (inst->bits3.dp_write.pixel_scoreboard_clear << 3) |
+ inst->bits3.dp_write.msg_control,
+ inst->bits3.dp_write.msg_type,
+ inst->bits3.dp_write.send_commit_msg);
+ }
break;
case BRW_MESSAGE_TARGET_URB:
if (gen >= 5) {
@@ -900,15 +933,22 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen)
case BRW_MESSAGE_TARGET_THREAD_SPAWNER:
break;
default:
- format (file, "unsupported target %d", inst->bits3.generic.msg_target);
+ format (file, "unsupported target %d", target);
break;
}
if (space)
string (file, " ");
- format (file, "mlen %d",
- inst->bits3.generic.msg_length);
- format (file, " rlen %d",
- inst->bits3.generic.response_length);
+ if (gen >= 5) {
+ format (file, "mlen %d",
+ inst->bits3.generic_gen5.msg_length);
+ format (file, " rlen %d",
+ inst->bits3.generic_gen5.response_length);
+ } else {
+ format (file, "mlen %d",
+ inst->bits3.generic.msg_length);
+ format (file, " rlen %d",
+ inst->bits3.generic.response_length);
+ }
}
pad (file, 64);
if (inst->header.opcode != BRW_OPCODE_NOP) {
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 3a32ad26c1..ffdddd0a38 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -35,7 +35,7 @@
#include "brw_structs.h"
#include "brw_defines.h"
-#include "shader/prog_instruction.h"
+#include "program/prog_instruction.h"
#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
@@ -520,6 +520,20 @@ static INLINE struct brw_reg brw_acc_reg( void )
0);
}
+static INLINE struct brw_reg brw_notification_1_reg(void)
+{
+
+ return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_NOTIFICATION_COUNT,
+ 1,
+ BRW_REGISTER_TYPE_UD,
+ BRW_VERTICAL_STRIDE_0,
+ BRW_WIDTH_1,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XXXX,
+ WRITEMASK_X);
+}
+
static INLINE struct brw_reg brw_flag_reg( void )
{
@@ -877,12 +891,15 @@ void brw_dp_READ_4( struct brw_compile *p,
void brw_dp_READ_4_vs( struct brw_compile *p,
struct brw_reg dest,
- GLuint oword,
- GLboolean relAddr,
- struct brw_reg addrReg,
GLuint location,
GLuint bind_table_index );
+void brw_dp_READ_4_vs_relative(struct brw_compile *p,
+ struct brw_reg dest,
+ struct brw_reg addrReg,
+ GLuint offset,
+ GLuint bind_table_index);
+
void brw_dp_WRITE_16( struct brw_compile *p,
struct brw_reg src,
GLuint scratch_offset );
@@ -919,6 +936,8 @@ void brw_land_fwd_jump(struct brw_compile *p,
void brw_NOP(struct brw_compile *p);
+void brw_WAIT(struct brw_compile *p);
+
/* Special case: there is never a destination, execution size will be
* taken from src0:
*/
@@ -965,5 +984,7 @@ void brw_set_src1( struct brw_instruction *insn,
/* brw_optimize.c */
void brw_optimize(struct brw_compile *p);
+void brw_remove_duplicate_mrf_moves(struct brw_compile *p);
+void brw_remove_grf_to_mrf_moves(struct brw_compile *p);
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 34dfe10cb9..0d5d17f501 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -364,7 +364,8 @@ static void brw_set_dp_write_message( struct brw_context *brw,
GLuint msg_length,
GLuint pixel_scoreboard_clear,
GLuint response_length,
- GLuint end_of_thread )
+ GLuint end_of_thread,
+ GLuint send_commit_msg)
{
struct intel_context *intel = &brw->intel;
brw_set_src1(insn, brw_imm_d(0));
@@ -374,7 +375,7 @@ static void brw_set_dp_write_message( struct brw_context *brw,
insn->bits3.dp_write_gen5.msg_control = msg_control;
insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear;
insn->bits3.dp_write_gen5.msg_type = msg_type;
- insn->bits3.dp_write_gen5.send_commit_msg = 0;
+ insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
insn->bits3.dp_write_gen5.header_present = 1;
insn->bits3.dp_write_gen5.response_length = response_length;
insn->bits3.dp_write_gen5.msg_length = msg_length;
@@ -386,7 +387,7 @@ static void brw_set_dp_write_message( struct brw_context *brw,
insn->bits3.dp_write.msg_control = msg_control;
insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
insn->bits3.dp_write.msg_type = msg_type;
- insn->bits3.dp_write.send_commit_msg = 0;
+ insn->bits3.dp_write.send_commit_msg = send_commit_msg;
insn->bits3.dp_write.response_length = response_length;
insn->bits3.dp_write.msg_length = msg_length;
insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
@@ -906,6 +907,20 @@ void brw_CMP(struct brw_compile *p,
}
}
+/* Issue 'wait' instruction for n1, host could program MMIO
+ to wake up thread. */
+void brw_WAIT (struct brw_compile *p)
+{
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
+ struct brw_reg src = brw_notification_1_reg();
+
+ brw_set_dest(insn, src);
+ brw_set_src0(insn, src);
+ brw_set_src1(insn, brw_null_reg());
+ insn->header.execution_size = 0; /* must */
+ insn->header.predicate_control = 0;
+ insn->header.compression_control = 0;
+}
/***********************************************************************
@@ -1040,6 +1055,7 @@ void brw_dp_WRITE_16( struct brw_compile *p,
struct brw_reg src,
GLuint scratch_offset )
{
+ struct intel_context *intel = &p->brw->intel;
GLuint msg_reg_nr = 1;
{
brw_push_insn_state(p);
@@ -1056,13 +1072,32 @@ void brw_dp_WRITE_16( struct brw_compile *p,
{
GLuint msg_length = 3;
- struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
+ struct brw_reg dest;
struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
-
+ int send_commit_msg;
+
insn->header.predicate_control = 0; /* XXX */
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.destreg__conditionalmod = msg_reg_nr;
-
+
+ /* Until gen6, writes followed by reads from the same location
+ * are not guaranteed to be ordered unless write_commit is set.
+ * If set, then a no-op write is issued to the destination
+ * register to set a dependency, and a read from the destination
+ * can be used to ensure the ordering.
+ *
+ * For gen6, only writes between different threads need ordering
+ * protection. Our use of DP writes is all about register
+ * spilling within a thread.
+ */
+ if (intel->gen >= 6) {
+ dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
+ send_commit_msg = 0;
+ } else {
+ dest = brw_uw16_grf(0, 0);
+ send_commit_msg = 1;
+ }
+
brw_set_dest(insn, dest);
brw_set_src0(insn, src);
@@ -1073,8 +1108,9 @@ void brw_dp_WRITE_16( struct brw_compile *p,
BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
msg_length,
0, /* pixel scoreboard */
- 0, /* response_length */
- 0); /* eot */
+ send_commit_msg, /* response_length */
+ 0, /* eot */
+ send_commit_msg);
}
}
@@ -1115,7 +1151,7 @@ void brw_dp_READ_16( struct brw_compile *p,
brw_set_dp_read_message(p->brw,
insn,
255, /* binding table index (255=stateless) */
- 3, /* msg_control (3 means 4 Owords) */
+ BRW_DATAPORT_OWORD_BLOCK_4_OWORDS,
BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1, /* target cache (render/scratch) */
1, /* msg_length */
@@ -1190,68 +1226,107 @@ void brw_dp_READ_4( struct brw_compile *p,
*/
void brw_dp_READ_4_vs(struct brw_compile *p,
struct brw_reg dest,
- GLuint oword,
- GLboolean relAddr,
- struct brw_reg addrReg,
GLuint location,
GLuint bind_table_index)
{
+ struct brw_instruction *insn;
GLuint msg_reg_nr = 1;
+ struct brw_reg b;
- assert(oword < 2);
/*
printf("vs const read msg, location %u, msg_reg_nr %d\n",
location, msg_reg_nr);
*/
/* Setup MRF[1] with location/offset into const buffer */
- {
- struct brw_reg b;
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
- brw_push_insn_state(p);
- brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- brw_set_mask_control(p, BRW_MASK_DISABLE);
- brw_set_predicate_control(p, BRW_PREDICATE_NONE);
- /*brw_set_access_mode(p, BRW_ALIGN_16);*/
+ /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
+ * when the docs say only dword[2] should be set. Hmmm. But it works.
+ */
+ b = brw_message_reg(msg_reg_nr);
+ b = retype(b, BRW_REGISTER_TYPE_UD);
+ /*b = get_element_ud(b, 2);*/
+ brw_MOV(p, b, brw_imm_ud(location));
- /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
- * when the docs say only dword[2] should be set. Hmmm. But it works.
- */
- b = brw_message_reg(msg_reg_nr);
- b = retype(b, BRW_REGISTER_TYPE_UD);
- /*b = get_element_ud(b, 2);*/
- if (relAddr) {
- brw_ADD(p, b, addrReg, brw_imm_ud(location));
- }
- else {
- brw_MOV(p, b, brw_imm_ud(location));
- }
+ brw_pop_insn_state(p);
- brw_pop_insn_state(p);
- }
+ insn = next_insn(p, BRW_OPCODE_SEND);
- {
- struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
-
- insn->header.predicate_control = BRW_PREDICATE_NONE;
- insn->header.compression_control = BRW_COMPRESSION_NONE;
- insn->header.destreg__conditionalmod = msg_reg_nr;
- insn->header.mask_control = BRW_MASK_DISABLE;
- /*insn->header.access_mode = BRW_ALIGN_16;*/
-
- brw_set_dest(insn, dest);
- brw_set_src0(insn, brw_null_reg());
+ insn->header.predicate_control = BRW_PREDICATE_NONE;
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+ insn->header.mask_control = BRW_MASK_DISABLE;
- brw_set_dp_read_message(p->brw,
- insn,
- bind_table_index,
- oword, /* 0 = lower Oword, 1 = upper Oword */
- BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
- 0, /* source cache = data cache */
- 1, /* msg_length */
- 1, /* response_length (1 Oword) */
- 0); /* eot */
- }
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, brw_null_reg());
+
+ brw_set_dp_read_message(p->brw,
+ insn,
+ bind_table_index,
+ 0,
+ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
+ 0, /* source cache = data cache */
+ 1, /* msg_length */
+ 1, /* response_length (1 Oword) */
+ 0); /* eot */
+}
+
+/**
+ * Read a float[4] constant per vertex from VS constant buffer, with
+ * relative addressing.
+ */
+void brw_dp_READ_4_vs_relative(struct brw_compile *p,
+ struct brw_reg dest,
+ struct brw_reg addr_reg,
+ GLuint offset,
+ GLuint bind_table_index)
+{
+ struct intel_context *intel = &p->brw->intel;
+ int msg_type;
+
+ /* Setup MRF[1] with offset into const buffer */
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ /* M1.0 is block offset 0, M1.4 is block offset 1, all other
+ * fields ignored.
+ */
+ brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD),
+ addr_reg, brw_imm_d(offset));
+ brw_pop_insn_state(p);
+
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ insn->header.predicate_control = BRW_PREDICATE_NONE;
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.destreg__conditionalmod = 0;
+ insn->header.mask_control = BRW_MASK_DISABLE;
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, brw_vec8_grf(0, 0));
+
+ if (intel->gen == 6)
+ msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
+ else if (intel->gen == 5 || intel->is_g4x)
+ msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
+ else
+ msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
+
+ brw_set_dp_read_message(p->brw,
+ insn,
+ bind_table_index,
+ BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
+ msg_type,
+ 0, /* source cache = data cache */
+ 2, /* msg_length */
+ 1, /* response_length */
+ 0); /* eot */
}
@@ -1281,7 +1356,8 @@ void brw_fb_WRITE(struct brw_compile *p,
msg_length,
1, /* pixel scoreboard */
response_length,
- eot);
+ eot,
+ 0 /* send_commit_msg */);
}
diff --git a/src/mesa/drivers/dri/i965/brw_gs_emit.c b/src/mesa/drivers/dri/i965/brw_gs_emit.c
index 99a6f6be11..a01d5576f8 100644
--- a/src/mesa/drivers/dri/i965/brw_gs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_gs_emit.c
@@ -34,7 +34,7 @@
#include "main/macros.h"
#include "main/enums.h"
-#include "shader/program.h"
+#include "program/program.h"
#include "intel_batchbuffer.h"
#include "brw_defines.h"
diff --git a/src/mesa/drivers/dri/i965/brw_optimize.c b/src/mesa/drivers/dri/i965/brw_optimize.c
index e79b3ddea3..8aa6fb6cc6 100644
--- a/src/mesa/drivers/dri/i965/brw_optimize.c
+++ b/src/mesa/drivers/dri/i965/brw_optimize.c
@@ -26,12 +26,600 @@
*/
#include "main/macros.h"
-#include "shader/program.h"
-#include "shader/prog_print.h"
+#include "program/program.h"
+#include "program/prog_print.h"
#include "brw_context.h"
#include "brw_defines.h"
#include "brw_eu.h"
+static const struct {
+ char *name;
+ int nsrc;
+ int ndst;
+ GLboolean is_arith;
+} inst_opcode[128] = {
+ [BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_RNDD] = { .name = "rndd", .nsrc = 1, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_RNDE] = { .name = "rnde", .nsrc = 1, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 },
+
+ [BRW_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_MATH] = { .name = "math", .nsrc = 2, .ndst = 1 },
+
+ [BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_XOR] = { .name = "xor", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_SHR] = { .name = "shr", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_SHL] = { .name = "shl", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_ASR] = { .name = "asr", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_CMP] = { .name = "cmp", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 },
+
+ [BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 },
+ [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_PUSH] = { .name = "push", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_MRESTORE] = { .name = "mrest", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_POP] = { .name = "pop", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_WAIT] = { .name = "wait", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_DO] = { .name = "do", .nsrc = 0, .ndst = 0 },
+ [BRW_OPCODE_ENDIF] = { .name = "endif", .nsrc = 2, .ndst = 0 },
+};
+
+static INLINE
+GLboolean brw_is_arithmetic_inst(const struct brw_instruction *inst)
+{
+ return inst_opcode[inst->header.opcode].is_arith;
+}
+
+static const GLuint inst_stride[7] = {
+ [0] = 0,
+ [1] = 1,
+ [2] = 2,
+ [3] = 4,
+ [4] = 8,
+ [5] = 16,
+ [6] = 32
+};
+
+static const GLuint inst_type_size[8] = {
+ [BRW_REGISTER_TYPE_UD] = 4,
+ [BRW_REGISTER_TYPE_D] = 4,
+ [BRW_REGISTER_TYPE_UW] = 2,
+ [BRW_REGISTER_TYPE_W] = 2,
+ [BRW_REGISTER_TYPE_UB] = 1,
+ [BRW_REGISTER_TYPE_B] = 1,
+ [BRW_REGISTER_TYPE_F] = 4
+};
+
+static INLINE GLboolean
+brw_is_grf_written(const struct brw_instruction *inst,
+ int reg_index, int size,
+ int gen)
+{
+ if (inst_opcode[inst->header.opcode].ndst == 0)
+ return GL_FALSE;
+
+ if (inst->bits1.da1.dest_address_mode != BRW_ADDRESS_DIRECT)
+ if (inst->bits1.ia1.dest_reg_file == BRW_GENERAL_REGISTER_FILE)
+ return GL_TRUE;
+
+ if (inst->bits1.da1.dest_reg_file != BRW_GENERAL_REGISTER_FILE)
+ return GL_FALSE;
+
+ const int reg_start = reg_index * REG_SIZE;
+ const int reg_end = reg_start + size;
+
+ const int type_size = inst_type_size[inst->bits1.da1.dest_reg_type];
+ const int write_start = inst->bits1.da1.dest_reg_nr*REG_SIZE
+ + inst->bits1.da1.dest_subreg_nr;
+ int length, write_end;
+
+ /* SEND is specific */
+ if (inst->header.opcode == BRW_OPCODE_SEND) {
+ if (gen >= 5)
+ length = inst->bits3.generic_gen5.response_length*REG_SIZE;
+ else
+ length = inst->bits3.generic.response_length*REG_SIZE;
+ }
+ else {
+ length = 1 << inst->header.execution_size;
+ length *= type_size;
+ length *= inst->bits1.da1.dest_horiz_stride;
+ }
+
+ /* If the two intervals intersect, we overwrite the register */
+ write_end = write_start + length;
+ const int left = MAX2(write_start, reg_start);
+ const int right = MIN2(write_end, reg_end);
+
+ return left < right;
+}
+
+/* Specific path for message register since we need to handle the compr4 case */
+static INLINE GLboolean
+brw_is_mrf_written(const struct brw_instruction *inst, int reg_index, int size)
+{
+ if (inst_opcode[inst->header.opcode].ndst == 0)
+ return GL_FALSE;
+
+ if (inst->bits1.da1.dest_address_mode != BRW_ADDRESS_DIRECT)
+ if (inst->bits1.ia1.dest_reg_file == BRW_MESSAGE_REGISTER_FILE)
+ return GL_TRUE;
+
+ if (inst->bits1.da1.dest_reg_file != BRW_MESSAGE_REGISTER_FILE)
+ return GL_FALSE;
+
+ const int reg_start = reg_index * REG_SIZE;
+ const int reg_end = reg_start + size;
+
+ const int mrf_index = inst->bits1.da1.dest_reg_nr & 0x0f;
+ const int is_compr4 = inst->bits1.da1.dest_reg_nr & BRW_MRF_COMPR4;
+ const int type_size = inst_type_size[inst->bits1.da1.dest_reg_type];
+
+ /* We use compr4 with a size != 16 elements. Strange, we conservatively
+ * consider that we are writing the register.
+ */
+ if (is_compr4 && inst->header.execution_size != BRW_EXECUTE_16)
+ return GL_TRUE;
+
+ GLboolean is_written = GL_FALSE;
+
+ /* Here we write mrf_{i} and mrf_{i+4}. So we read two times 8 elements */
+ if (is_compr4) {
+ const int length = 8 * type_size * inst->bits1.da1.dest_horiz_stride;
+
+ /* First 8-way register */
+ const int write_start0 = mrf_index*REG_SIZE
+ + inst->bits1.da1.dest_subreg_nr;
+ const int write_end0 = write_start0 + length;
+
+ /* Second 8-way register */
+ const int write_start1 = (mrf_index+4)*REG_SIZE
+ + inst->bits1.da1.dest_subreg_nr;
+ const int write_end1 = write_start1 + length;
+
+ /* If the two intervals intersect, we overwrite the register */
+ const int left0 = MAX2(write_start0, reg_start);
+ const int right0 = MIN2(write_end0, reg_end);
+ const int left1 = MAX2(write_start1, reg_start);
+ const int right1 = MIN2(write_end1, reg_end);
+
+ is_written = left0 < right0 || left1 < right1;
+ }
+ else {
+ int length;
+ length = 1 << inst->header.execution_size;
+ length *= type_size;
+ length *= inst->bits1.da1.dest_horiz_stride;
+
+ /* If the two intervals intersect, we write into the register */
+ const int write_start = inst->bits1.da1.dest_reg_nr*REG_SIZE
+ + inst->bits1.da1.dest_subreg_nr;
+ const int write_end = write_start + length;
+ const int left = MAX2(write_start, reg_start);
+ const int right = MIN2(write_end, reg_end);;
+
+ is_written = left < right;
+ }
+
+ /* SEND may perform an implicit mov to a mrf register */
+ if (is_written == GL_FALSE &&
+ inst->header.opcode == BRW_OPCODE_SEND &&
+ inst->bits1.da1.src0_reg_file != 0) {
+
+ const int mrf_start = inst->header.destreg__conditionalmod;
+ const int write_start = mrf_start * REG_SIZE;
+ const int write_end = write_start + REG_SIZE;
+ const int left = MAX2(write_start, reg_start);
+ const int right = MIN2(write_end, reg_end);;
+ is_written = left < right;
+ }
+
+ return is_written;
+}
+
+static INLINE GLboolean
+brw_is_mrf_read(const struct brw_instruction *inst,
+ int reg_index, int size, int gen)
+{
+ if (inst->header.opcode != BRW_OPCODE_SEND)
+ return GL_FALSE;
+ if (inst->bits2.da1.src0_address_mode != BRW_ADDRESS_DIRECT)
+ return GL_TRUE;
+
+ const int reg_start = reg_index*REG_SIZE;
+ const int reg_end = reg_start + size;
+
+ int length, read_start, read_end;
+ if (gen >= 5)
+ length = inst->bits3.generic_gen5.msg_length*REG_SIZE;
+ else
+ length = inst->bits3.generic.msg_length*REG_SIZE;
+
+ /* Look if SEND uses an implicit mov. In that case, we read one less register
+ * (but we write it)
+ */
+ if (inst->bits1.da1.src0_reg_file != 0)
+ read_start = inst->header.destreg__conditionalmod;
+ else {
+ length--;
+ read_start = inst->header.destreg__conditionalmod + 1;
+ }
+ read_start *= REG_SIZE;
+ read_end = read_start + length;
+
+ const int left = MAX2(read_start, reg_start);
+ const int right = MIN2(read_end, reg_end);
+
+ return left < right;
+}
+
+static INLINE GLboolean
+brw_is_grf_read(const struct brw_instruction *inst, int reg_index, int size)
+{
+ int i, j;
+ if (inst_opcode[inst->header.opcode].nsrc == 0)
+ return GL_FALSE;
+
+ /* Look at first source. We must take into account register regions to
+ * monitor carefully the read. Note that we are a bit too conservative here
+ * since we do not take into account the fact that some complete registers
+ * may be skipped
+ */
+ if (inst_opcode[inst->header.opcode].nsrc >= 1) {
+
+ if (inst->bits2.da1.src0_address_mode != BRW_ADDRESS_DIRECT)
+ if (inst->bits1.ia1.src0_reg_file == BRW_GENERAL_REGISTER_FILE)
+ return GL_TRUE;
+ if (inst->bits1.da1.src0_reg_file != BRW_GENERAL_REGISTER_FILE)
+ return GL_FALSE;
+
+ const int reg_start = reg_index*REG_SIZE;
+ const int reg_end = reg_start + size;
+
+ /* See if at least one of this element intersects the interval */
+ const int type_size = inst_type_size[inst->bits1.da1.src0_reg_type];
+ const int elem_num = 1 << inst->header.execution_size;
+ const int width = 1 << inst->bits2.da1.src0_width;
+ const int row_num = elem_num >> inst->bits2.da1.src0_width;
+ const int hs = type_size*inst_stride[inst->bits2.da1.src0_horiz_stride];
+ const int vs = type_size*inst_stride[inst->bits2.da1.src0_vert_stride];
+ int row_start = inst->bits2.da1.src0_reg_nr*REG_SIZE
+ + inst->bits2.da1.src0_subreg_nr;
+ for (j = 0; j < row_num; ++j) {
+ int write_start = row_start;
+ for (i = 0; i < width; ++i) {
+ const int write_end = write_start + type_size;
+ const int left = write_start > reg_start ? write_start : reg_start;
+ const int right = write_end < reg_end ? write_end : reg_end;
+ if (left < right)
+ return GL_TRUE;
+ write_start += hs;
+ }
+ row_start += vs;
+ }
+ }
+
+ /* Second src register */
+ if (inst_opcode[inst->header.opcode].nsrc >= 2) {
+
+ if (inst->bits3.da1.src1_address_mode != BRW_ADDRESS_DIRECT)
+ if (inst->bits1.ia1.src1_reg_file == BRW_GENERAL_REGISTER_FILE)
+ return GL_TRUE;
+ if (inst->bits1.da1.src1_reg_file != BRW_GENERAL_REGISTER_FILE)
+ return GL_FALSE;
+
+ const int reg_start = reg_index*REG_SIZE;
+ const int reg_end = reg_start + size;
+
+ /* See if at least one of this element intersects the interval */
+ const int type_size = inst_type_size[inst->bits1.da1.src1_reg_type];
+ const int elem_num = 1 << inst->header.execution_size;
+ const int width = 1 << inst->bits3.da1.src1_width;
+ const int row_num = elem_num >> inst->bits3.da1.src1_width;
+ const int hs = type_size*inst_stride[inst->bits3.da1.src1_horiz_stride];
+ const int vs = type_size*inst_stride[inst->bits3.da1.src1_vert_stride];
+ int row_start = inst->bits3.da1.src1_reg_nr*REG_SIZE
+ + inst->bits3.da1.src1_subreg_nr;
+ for (j = 0; j < row_num; ++j) {
+ int write_start = row_start;
+ for (i = 0; i < width; ++i) {
+ const int write_end = write_start + type_size;
+ const int left = write_start > reg_start ? write_start : reg_start;
+ const int right = write_end < reg_end ? write_end : reg_end;
+ if (left < right)
+ return GL_TRUE;
+ write_start += hs;
+ }
+ row_start += vs;
+ }
+ }
+
+ return GL_FALSE;
+}
+
+static INLINE GLboolean
+brw_is_control_done(const struct brw_instruction *mov) {
+ return
+ mov->header.dependency_control != 0 ||
+ mov->header.thread_control != 0 ||
+ mov->header.mask_control != 0 ||
+ mov->header.saturate != 0 ||
+ mov->header.debug_control != 0;
+}
+
+static INLINE GLboolean
+brw_is_predicated(const struct brw_instruction *mov) {
+ return mov->header.predicate_control != 0;
+}
+
+static INLINE GLboolean
+brw_is_grf_to_mrf_mov(const struct brw_instruction *mov,
+ int *mrf_index,
+ int *grf_index,
+ GLboolean *is_compr4)
+{
+ if (brw_is_predicated(mov) ||
+ brw_is_control_done(mov) ||
+ mov->header.debug_control != 0)
+ return GL_FALSE;
+
+ if (mov->bits1.da1.dest_address_mode != BRW_ADDRESS_DIRECT ||
+ mov->bits1.da1.dest_reg_file != BRW_MESSAGE_REGISTER_FILE ||
+ mov->bits1.da1.dest_reg_type != BRW_REGISTER_TYPE_F ||
+ mov->bits1.da1.dest_horiz_stride != BRW_HORIZONTAL_STRIDE_1 ||
+ mov->bits1.da1.dest_subreg_nr != 0)
+ return GL_FALSE;
+
+ if (mov->bits2.da1.src0_address_mode != BRW_ADDRESS_DIRECT ||
+ mov->bits1.da1.src0_reg_file != BRW_GENERAL_REGISTER_FILE ||
+ mov->bits1.da1.src0_reg_type != BRW_REGISTER_TYPE_F ||
+ mov->bits2.da1.src0_width != BRW_WIDTH_8 ||
+ mov->bits2.da1.src0_horiz_stride != BRW_HORIZONTAL_STRIDE_1 ||
+ mov->bits2.da1.src0_vert_stride != BRW_VERTICAL_STRIDE_8 ||
+ mov->bits2.da1.src0_subreg_nr != 0 ||
+ mov->bits2.da1.src0_abs != 0 ||
+ mov->bits2.da1.src0_negate != 0)
+ return GL_FALSE;
+
+ *grf_index = mov->bits2.da1.src0_reg_nr;
+ *mrf_index = mov->bits1.da1.dest_reg_nr & 0x0f;
+ *is_compr4 = (mov->bits1.da1.dest_reg_nr & BRW_MRF_COMPR4) != 0;
+ return GL_TRUE;
+}
+
+static INLINE GLboolean
+brw_is_grf_straight_write(const struct brw_instruction *inst, int grf_index)
+{
+ /* remark: no problem to predicate a SEL instruction */
+ if ((!brw_is_predicated(inst) || inst->header.opcode == BRW_OPCODE_SEL) &&
+ brw_is_control_done(inst) == GL_FALSE &&
+ inst->header.execution_size == 4 &&
+ inst->header.access_mode == BRW_ALIGN_1 &&
+ inst->bits1.da1.dest_address_mode == BRW_ADDRESS_DIRECT &&
+ inst->bits1.da1.dest_reg_file == BRW_GENERAL_REGISTER_FILE &&
+ inst->bits1.da1.dest_reg_type == BRW_REGISTER_TYPE_F &&
+ inst->bits1.da1.dest_horiz_stride == BRW_HORIZONTAL_STRIDE_1 &&
+ inst->bits1.da1.dest_reg_nr == grf_index &&
+ inst->bits1.da1.dest_subreg_nr == 0 &&
+ brw_is_arithmetic_inst(inst))
+ return GL_TRUE;
+
+ return GL_FALSE;
+}
+
+static INLINE GLboolean
+brw_inst_are_equal(const struct brw_instruction *src0,
+ const struct brw_instruction *src1)
+{
+ const GLuint *field0 = (GLuint *) src0;
+ const GLuint *field1 = (GLuint *) src1;
+ return field0[0] == field1[0] &&
+ field0[1] == field1[1] &&
+ field0[2] == field1[2] &&
+ field0[3] == field1[3];
+}
+
+static INLINE void
+brw_inst_copy(struct brw_instruction *dst,
+ const struct brw_instruction *src)
+{
+ GLuint *field_dst = (GLuint *) dst;
+ const GLuint *field_src = (GLuint *) src;
+ field_dst[0] = field_src[0];
+ field_dst[1] = field_src[1];
+ field_dst[2] = field_src[2];
+ field_dst[3] = field_src[3];
+}
+
+static void brw_remove_inst(struct brw_compile *p, const GLboolean *removeInst)
+{
+ int i, nr_insn = 0, to = 0, from = 0;
+
+ for (from = 0; from < p->nr_insn; ++from) {
+ if (removeInst[from])
+ continue;
+ if(to != from)
+ brw_inst_copy(p->store + to, p->store + from);
+ to++;
+ }
+
+ for (i = 0; i < p->nr_insn; ++i)
+ if (removeInst[i] == GL_FALSE)
+ nr_insn++;
+ p->nr_insn = nr_insn;
+}
+
+/* The gen code emitter generates a lot of duplications in the
+ * grf-to-mrf moves, for example when texture sampling with the same
+ * coordinates from multiple textures.. Here, we monitor same mov
+ * grf-to-mrf instrutions and remove repeated ones where the operands
+ * and dst ahven't changed in between.
+ */
+void brw_remove_duplicate_mrf_moves(struct brw_compile *p)
+{
+ const int gen = p->brw->intel.gen;
+ int i, j;
+
+ GLboolean *removeInst = calloc(sizeof(GLboolean), p->nr_insn);
+ for (i = 0; i < p->nr_insn; i++) {
+ if (removeInst[i])
+ continue;
+
+ const struct brw_instruction *mov = p->store + i;
+ int mrf_index, grf_index;
+ GLboolean is_compr4;
+
+ /* Only consider _straight_ grf-to-mrf moves */
+ if (!brw_is_grf_to_mrf_mov(mov, &mrf_index, &grf_index, &is_compr4))
+ continue;
+
+ const int mrf_index0 = mrf_index;
+ const int mrf_index1 = is_compr4 ? mrf_index0+4 : mrf_index0+1;
+ const int simd16_size = 2 * REG_SIZE;
+
+ for (j = i + 1; j < p->nr_insn; j++) {
+ const struct brw_instruction *inst = p->store + j;
+
+ if (brw_inst_are_equal(mov, inst)) {
+ removeInst[j] = GL_TRUE;
+ continue;
+ }
+
+ if (brw_is_grf_written(inst, grf_index, simd16_size, gen) ||
+ brw_is_mrf_written(inst, mrf_index0, REG_SIZE) ||
+ brw_is_mrf_written(inst, mrf_index1, REG_SIZE))
+ break;
+ }
+ }
+
+ brw_remove_inst(p, removeInst);
+ free(removeInst);
+}
+
+/* Replace moves to MRFs where the value moved is the result of a
+ * normal arithmetic operation with computation right into the MRF.
+ */
+void brw_remove_grf_to_mrf_moves(struct brw_compile *p)
+{
+ int i, j, prev;
+ struct brw_context *brw = p->brw;
+ const int gen = brw->intel.gen;
+ const int simd16_size = 2*REG_SIZE;
+
+ GLboolean *removeInst = calloc(sizeof(GLboolean), p->nr_insn);
+ assert(removeInst);
+
+ for (i = 0; i < p->nr_insn; i++) {
+ if (removeInst[i])
+ continue;
+
+ struct brw_instruction *grf_inst = NULL;
+ const struct brw_instruction *mov = p->store + i;
+ int mrf_index, grf_index;
+ GLboolean is_compr4;
+
+ /* Only consider _straight_ grf-to-mrf moves */
+ if (!brw_is_grf_to_mrf_mov(mov, &mrf_index, &grf_index, &is_compr4))
+ continue;
+
+ /* Using comp4 enables a stride of 4 for this instruction */
+ const int mrf_index0 = mrf_index;
+ const int mrf_index1 = is_compr4 ? mrf_index+4 : mrf_index+1;
+
+ /* Look where the register has been set */
+ prev = i;
+ GLboolean potential_remove = GL_FALSE;
+ while (prev--) {
+
+ /* If _one_ instruction writes the grf, we try to remove the mov */
+ struct brw_instruction *inst = p->store + prev;
+ if (brw_is_grf_straight_write(inst, grf_index)) {
+ potential_remove = GL_TRUE;
+ grf_inst = inst;
+ break;
+ }
+
+ }
+
+ if (potential_remove == GL_FALSE)
+ continue;
+ removeInst[i] = GL_TRUE;
+
+ /* Monitor first the section of code between the grf computation and the
+ * mov. Here we cannot read or write both mrf and grf register
+ */
+ for (j = prev + 1; j < i; ++j) {
+ struct brw_instruction *inst = p->store + j;
+ if (removeInst[j])
+ continue;
+ if (brw_is_grf_written(inst, grf_index, simd16_size, gen) ||
+ brw_is_grf_read(inst, grf_index, simd16_size) ||
+ brw_is_mrf_written(inst, mrf_index0, REG_SIZE) ||
+ brw_is_mrf_written(inst, mrf_index1, REG_SIZE) ||
+ brw_is_mrf_read(inst, mrf_index0, REG_SIZE, gen) ||
+ brw_is_mrf_read(inst, mrf_index1, REG_SIZE, gen)) {
+ removeInst[i] = GL_FALSE;
+ break;
+ }
+ }
+
+ /* After the mov, we can read or write the mrf. If the grf is overwritten,
+ * we are done
+ */
+ for (j = i + 1; j < p->nr_insn; ++j) {
+ struct brw_instruction *inst = p->store + j;
+ if (removeInst[j])
+ continue;
+
+ if (brw_is_grf_read(inst, grf_index, simd16_size)) {
+ removeInst[i] = GL_FALSE;
+ break;
+ }
+
+ if (brw_is_grf_straight_write(inst, grf_index))
+ break;
+ }
+
+ /* Note that with the top down traversal, we can safely pacth the mov
+ * instruction
+ */
+ if (removeInst[i]) {
+ grf_inst->bits1.da1.dest_reg_file = mov->bits1.da1.dest_reg_file;
+ grf_inst->bits1.da1.dest_reg_nr = mov->bits1.da1.dest_reg_nr;
+ }
+ }
+
+ brw_remove_inst(p, removeInst);
+ free(removeInst);
+}
+
static GLboolean
is_single_channel_dp4(struct brw_instruction *insn)
{
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index bd560acdad..4b08d2599b 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -31,10 +31,10 @@
#include "main/imports.h"
#include "main/enums.h"
-#include "shader/prog_parameter.h"
-#include "shader/program.h"
-#include "shader/programopt.h"
-#include "shader/shader_api.h"
+#include "main/shaderobj.h"
+#include "program/prog_parameter.h"
+#include "program/program.h"
+#include "program/programopt.h"
#include "tnl/tnl.h"
#include "brw_context.h"
@@ -174,9 +174,36 @@ static GLboolean brwProgramStringNotify( GLcontext *ctx,
shader_error(ctx, prog,
"i965 driver doesn't yet support uninlined function "
"calls. Move to using a single return statement at "
- "the end of the function to work around it.");
+ "the end of the function to work around it.\n");
return GL_FALSE;
}
+ if (prog->Instructions[i].DstReg.RelAddr &&
+ prog->Instructions[i].DstReg.File == PROGRAM_INPUT) {
+ shader_error(ctx, prog,
+ "Variable indexing of shader inputs unsupported\n");
+ return GL_FALSE;
+ }
+ if (prog->Instructions[i].DstReg.RelAddr &&
+ prog->Instructions[i].DstReg.File == PROGRAM_OUTPUT) {
+ shader_error(ctx, prog,
+ "Variable indexing of shader outputs unsupported\n");
+ return GL_FALSE;
+ }
+ if (target == GL_FRAGMENT_PROGRAM_ARB) {
+ if ((prog->Instructions[i].DstReg.RelAddr &&
+ prog->Instructions[i].DstReg.File == PROGRAM_TEMPORARY) ||
+ (prog->Instructions[i].SrcReg[0].RelAddr &&
+ prog->Instructions[i].SrcReg[0].File == PROGRAM_TEMPORARY) ||
+ (prog->Instructions[i].SrcReg[1].RelAddr &&
+ prog->Instructions[i].SrcReg[1].File == PROGRAM_TEMPORARY) ||
+ (prog->Instructions[i].SrcReg[2].RelAddr &&
+ prog->Instructions[i].SrcReg[2].File == PROGRAM_TEMPORARY)) {
+ shader_error(ctx, prog,
+ "Variable indexing of variable arrays in the FS "
+ "unsupported\n");
+ return GL_FALSE;
+ }
+ }
}
return GL_TRUE;
diff --git a/src/mesa/drivers/dri/i965/brw_sf.h b/src/mesa/drivers/dri/i965/brw_sf.h
index a0680a56f2..e525c730d3 100644
--- a/src/mesa/drivers/dri/i965/brw_sf.h
+++ b/src/mesa/drivers/dri/i965/brw_sf.h
@@ -34,7 +34,7 @@
#define BRW_SF_H
-#include "shader/program.h"
+#include "program/program.h"
#include "brw_context.h"
#include "brw_eu.h"
diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c
index e290ca92f6..914f275cc6 100644
--- a/src/mesa/drivers/dri/i965/brw_sf_state.c
+++ b/src/mesa/drivers/dri/i965/brw_sf_state.c
@@ -130,7 +130,7 @@ struct brw_sf_unit_key {
unsigned scissor:1;
unsigned line_smooth:1;
unsigned point_sprite:1;
- unsigned point_attenuated:1;
+ unsigned use_vs_point_size:1;
unsigned render_to_fbo:1;
float line_width;
float point_size;
@@ -164,7 +164,8 @@ sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key)
key->point_sprite = ctx->Point.PointSprite;
key->point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);
- key->point_attenuated = ctx->Point._Attenuated;
+ key->use_vs_point_size = (ctx->VertexProgram.PointSizeEnabled ||
+ ctx->Point._Attenuated);
/* _NEW_LIGHT */
key->pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION);
@@ -296,7 +297,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
/* _NEW_POINT */
sf.sf7.sprite_point = key->point_sprite;
sf.sf7.point_size = CLAMP(rint(key->point_size), 1, 255) * (1<<3);
- sf.sf7.use_point_size_state = !key->point_attenuated;
+ sf.sf7.use_point_size_state = !key->use_vs_point_size;
sf.sf7.aa_line_distance_mode = 0;
/* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons:
diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h
index 2a7fa5b699..2fde42a706 100644
--- a/src/mesa/drivers/dri/i965/brw_structs.h
+++ b/src/mesa/drivers/dri/i965/brw_structs.h
@@ -1657,8 +1657,36 @@ struct brw_instruction
GLuint end_of_thread:1;
} dp_write_gen5;
+ /* Sandybridge DP for sample cache, constant cache, render cache */
struct {
- GLuint pad:16;
+ GLuint binding_table_index:8;
+ GLuint msg_control:5;
+ GLuint msg_type:3;
+ GLuint pad0:3;
+ GLuint header_present:1;
+ GLuint response_length:5;
+ GLuint msg_length:4;
+ GLuint pad1:2;
+ GLuint end_of_thread:1;
+ } dp_sampler_const_cache;
+
+ struct {
+ GLuint binding_table_index:8;
+ GLuint msg_control:3;
+ GLuint slot_group_select:1;
+ GLuint pixel_scoreboard_clear:1;
+ GLuint msg_type:4;
+ GLuint send_commit_msg:1;
+ GLuint pad0:1;
+ GLuint header_present:1;
+ GLuint response_length:5;
+ GLuint msg_length:4;
+ GLuint pad1:2;
+ GLuint end_of_thread:1;
+ } dp_render_cache;
+
+ struct {
+ GLuint function_control:16;
GLuint response_length:4;
GLuint msg_length:4;
GLuint msg_target:4;
@@ -1666,8 +1694,9 @@ struct brw_instruction
GLuint end_of_thread:1;
} generic;
+ /* Of this struct, only end_of_thread is not present for gen6. */
struct {
- GLuint pad:19;
+ GLuint function_control:19;
GLuint header_present:1;
GLuint response_length:5;
GLuint msg_length:4;
diff --git a/src/mesa/drivers/dri/i965/brw_util.c b/src/mesa/drivers/dri/i965/brw_util.c
index bba9249d1b..1db2a210d4 100644
--- a/src/mesa/drivers/dri/i965/brw_util.c
+++ b/src/mesa/drivers/dri/i965/brw_util.c
@@ -31,7 +31,7 @@
#include "main/mtypes.h"
-#include "shader/prog_parameter.h"
+#include "program/prog_parameter.h"
#include "brw_util.h"
#include "brw_defines.h"
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index 3c12f11ea7..9a832af9a9 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -34,8 +34,8 @@
#include "brw_vs.h"
#include "brw_util.h"
#include "brw_state.h"
-#include "shader/prog_print.h"
-#include "shader/prog_parameter.h"
+#include "program/prog_print.h"
+#include "program/prog_parameter.h"
diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h
index 6493744f3e..9338a6b7db 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.h
+++ b/src/mesa/drivers/dri/i965/brw_vs.h
@@ -36,7 +36,7 @@
#include "brw_context.h"
#include "brw_eu.h"
-#include "shader/program.h"
+#include "program/program.h"
struct brw_vs_prog_key {
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index 128987d78a..c1d6525e9b 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -31,9 +31,9 @@
#include "main/macros.h"
-#include "shader/program.h"
-#include "shader/prog_parameter.h"
-#include "shader/prog_print.h"
+#include "program/program.h"
+#include "program/prog_parameter.h"
+#include "program/prog_print.h"
#include "brw_context.h"
#include "brw_vs.h"
@@ -44,6 +44,7 @@ static GLboolean
brw_vs_arg_can_be_immediate(enum prog_opcode opcode, int arg)
{
int opcode_array[] = {
+ [OPCODE_MOV] = 1,
[OPCODE_ADD] = 2,
[OPCODE_CMP] = 3,
[OPCODE_DP3] = 2,
@@ -218,7 +219,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
c->first_overflow_output = 0;
if (intel->gen >= 6)
- mrf = 6;
+ mrf = 4;
else if (intel->gen == 5)
mrf = 8;
else
@@ -238,12 +239,25 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
mrf++; /* just a placeholder? XXX fix later stages & remove this */
}
else {
- if (mrf < 16) {
+ /* Two restrictions on our compute-to-MRF here. The
+ * message length for all SEND messages is restricted to
+ * [1,15], so we can't use mrf 15, as that means a length
+ * of 16.
+ *
+ * Additionally, URB writes are aligned to URB rows, so we
+ * need to put an even number of registers of URB data in
+ * each URB write so that the later write is aligned. A
+ * message length of 15 means 1 message header reg plus 14
+ * regs of URB data.
+ *
+ * For attributes beyond the compute-to-MRF, we compute to
+ * GRFs and they will be written in the second URB_WRITE.
+ */
+ if (mrf < 15) {
c->regs[PROGRAM_OUTPUT][i] = brw_message_reg(mrf);
mrf++;
}
else {
- /* too many vertex results to fit in MRF, use GRF for overflow */
if (!c->first_overflow_output)
c->first_overflow_output = i;
c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0);
@@ -318,8 +332,11 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
*/
attributes_in_vue = MAX2(c->nr_outputs, c->nr_inputs);
+ /* See emit_vertex_write() for where the VUE's overhead on top of the
+ * attributes comes from.
+ */
if (intel->gen >= 6)
- c->prog_data.urb_entry_size = (attributes_in_vue + 4 + 7) / 8;
+ c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 7) / 8;
else if (intel->gen == 5)
c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4;
else
@@ -869,8 +886,6 @@ get_constant(struct brw_vs_compile *c,
assert(argIndex < 3);
if (c->current_const[argIndex].index != src->Index) {
- struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0];
-
/* Keep track of the last constant loaded in this slot, for reuse. */
c->current_const[argIndex].index = src->Index;
@@ -881,9 +896,6 @@ get_constant(struct brw_vs_compile *c,
/* need to fetch the constant now */
brw_dp_READ_4_vs(p,
const_reg, /* writeback dest */
- 0, /* oword */
- 0, /* relative indexing? */
- addrReg, /* address register */
16 * src->Index, /* byte offset */
SURF_INDEX_VERT_CONST_BUFFER /* binding table index */
);
@@ -904,8 +916,8 @@ get_reladdr_constant(struct brw_vs_compile *c,
const struct prog_src_register *src = &inst->SrcReg[argIndex];
struct brw_compile *p = &c->func;
struct brw_reg const_reg = c->current_const[argIndex].reg;
- struct brw_reg const2_reg;
struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0];
+ struct brw_reg byte_addr_reg = get_tmp(c);
assert(argIndex < 3);
@@ -917,37 +929,15 @@ get_reladdr_constant(struct brw_vs_compile *c,
src->Index, argIndex, c->current_const[argIndex].reg.nr);
#endif
+ brw_MUL(p, byte_addr_reg, addrReg, brw_imm_ud(16));
+
/* fetch the first vec4 */
- brw_dp_READ_4_vs(p,
- const_reg, /* writeback dest */
- 0, /* oword */
- 1, /* relative indexing? */
- addrReg, /* address register */
- 16 * src->Index, /* byte offset */
- SURF_INDEX_VERT_CONST_BUFFER /* binding table index */
- );
- /* second vec4 */
- const2_reg = get_tmp(c);
-
- /* use upper half of address reg for second read */
- addrReg = stride(addrReg, 0, 4, 0);
- addrReg.subnr = 16;
-
- brw_dp_READ_4_vs(p,
- const2_reg, /* writeback dest */
- 1, /* oword */
- 1, /* relative indexing? */
- addrReg, /* address register */
- 16 * src->Index, /* byte offset */
- SURF_INDEX_VERT_CONST_BUFFER
- );
-
- /* merge the two Owords into the constant register */
- /* const_reg[7..4] = const2_reg[7..4] */
- brw_MOV(p,
- suboffset(stride(const_reg, 0, 4, 1), 4),
- suboffset(stride(const2_reg, 0, 4, 1), 4));
- release_tmp(c, const2_reg);
+ brw_dp_READ_4_vs_relative(p,
+ const_reg, /* writeback dest */
+ byte_addr_reg, /* address register */
+ 16 * src->Index, /* byte offset */
+ SURF_INDEX_VERT_CONST_BUFFER /* binding table index */
+ );
return const_reg;
}
@@ -993,36 +983,71 @@ static struct brw_reg get_reg( struct brw_vs_compile *c,
*/
static struct brw_reg deref( struct brw_vs_compile *c,
struct brw_reg arg,
- GLint offset)
+ GLint offset,
+ GLuint reg_size )
{
struct brw_compile *p = &c->func;
- struct brw_reg tmp = vec4(get_tmp(c));
+ struct brw_reg tmp = get_tmp(c);
struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0];
- struct brw_reg vp_address = retype(vec1(addr_reg), BRW_REGISTER_TYPE_UW);
- GLuint byte_offset = arg.nr * 32 + arg.subnr + offset * 16;
+ struct brw_reg vp_address = retype(vec1(addr_reg), BRW_REGISTER_TYPE_D);
+ GLuint byte_offset = arg.nr * 32 + arg.subnr + offset * reg_size;
struct brw_reg indirect = brw_vec4_indirect(0,0);
+ struct brw_reg acc = retype(vec1(get_tmp(c)), BRW_REGISTER_TYPE_UW);
+
+ /* Set the vertical stride on the register access so that the first
+ * 4 components come from a0.0 and the second 4 from a0.1.
+ */
+ indirect.vstride = BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL;
{
brw_push_insn_state(p);
brw_set_access_mode(p, BRW_ALIGN_1);
- /* This is pretty clunky - load the address register twice and
- * fetch each 4-dword value in turn. There must be a way to do
- * this in a single pass, but I couldn't get it to work.
- */
- brw_ADD(p, brw_address_reg(0), vp_address, brw_imm_d(byte_offset));
- brw_MOV(p, tmp, indirect);
+ brw_MUL(p, acc, vp_address, brw_imm_uw(reg_size));
+ brw_ADD(p, brw_address_reg(0), acc, brw_imm_uw(byte_offset));
+
+ brw_MUL(p, acc, suboffset(vp_address, 4), brw_imm_uw(reg_size));
+ brw_ADD(p, brw_address_reg(1), acc, brw_imm_uw(byte_offset));
- brw_ADD(p, brw_address_reg(0), suboffset(vp_address, 8), brw_imm_d(byte_offset));
- brw_MOV(p, suboffset(tmp, 4), indirect);
+ brw_MOV(p, tmp, indirect);
brw_pop_insn_state(p);
}
-
+
/* NOTE: tmp not released */
- return vec8(tmp);
+ return tmp;
}
+static void
+move_to_reladdr_dst(struct brw_vs_compile *c,
+ const struct prog_instruction *inst,
+ struct brw_reg val)
+{
+ struct brw_compile *p = &c->func;
+ int reg_size = 32;
+ struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0];
+ struct brw_reg vp_address = retype(vec1(addr_reg), BRW_REGISTER_TYPE_D);
+ struct brw_reg temp_base = c->regs[inst->DstReg.File][0];
+ GLuint byte_offset = temp_base.nr * 32 + temp_base.subnr;
+ struct brw_reg indirect = brw_vec4_indirect(0,0);
+ struct brw_reg acc = retype(vec1(get_tmp(c)), BRW_REGISTER_TYPE_UW);
+
+ byte_offset += inst->DstReg.Index * reg_size;
+
+ brw_push_insn_state(p);
+ brw_set_access_mode(p, BRW_ALIGN_1);
+
+ brw_MUL(p, acc, vp_address, brw_imm_uw(reg_size));
+ brw_ADD(p, brw_address_reg(0), acc, brw_imm_uw(byte_offset));
+ brw_MOV(p, indirect, val);
+
+ brw_MUL(p, acc, suboffset(vp_address, 4), brw_imm_uw(reg_size));
+ brw_ADD(p, brw_address_reg(0), acc,
+ brw_imm_uw(byte_offset + reg_size / 2));
+ brw_MOV(p, indirect, suboffset(val, 4));
+
+ brw_pop_insn_state(p);
+}
/**
* Get brw reg corresponding to the instruction's [argIndex] src reg.
@@ -1091,7 +1116,7 @@ get_src_reg( struct brw_vs_compile *c,
case PROGRAM_INPUT:
case PROGRAM_OUTPUT:
if (relAddr) {
- return deref(c, c->regs[file][0], index);
+ return deref(c, c->regs[file][0], index, 32);
}
else {
assert(c->regs[file][index].nr != 0);
@@ -1113,7 +1138,7 @@ get_src_reg( struct brw_vs_compile *c,
return get_constant(c, inst, argIndex);
}
else if (relAddr) {
- return deref(c, c->regs[PROGRAM_STATE_VAR][0], index);
+ return deref(c, c->regs[PROGRAM_STATE_VAR][0], index, 16);
}
else {
assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0);
@@ -1134,26 +1159,6 @@ get_src_reg( struct brw_vs_compile *c,
}
}
-
-static void emit_arl( struct brw_vs_compile *c,
- struct brw_reg dst,
- struct brw_reg arg0 )
-{
- struct brw_compile *p = &c->func;
- struct brw_reg tmp = dst;
- GLboolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE);
-
- if (need_tmp)
- tmp = get_tmp(c);
-
- brw_RNDD(p, tmp, arg0); /* tmp = round(arg0) */
- brw_MUL(p, dst, tmp, brw_imm_d(16)); /* dst = tmp * 16 */
-
- if (need_tmp)
- release_tmp(c, tmp);
-}
-
-
/**
* Return the brw reg for the given instruction's src argument.
* Will return mangled results for SWZ op. The emit_swz() function
@@ -1198,8 +1203,17 @@ static struct brw_reg get_dst( struct brw_vs_compile *c,
switch (dst.File) {
case PROGRAM_TEMPORARY:
case PROGRAM_OUTPUT:
- assert(c->regs[dst.File][dst.Index].nr != 0);
- reg = c->regs[dst.File][dst.Index];
+ /* register-indirect addressing is only 1x1, not VxH, for
+ * destination regs. So, for RelAddr we'll return a temporary
+ * for the dest and do a move of the result to the RelAddr
+ * register after the instruction emit.
+ */
+ if (dst.RelAddr) {
+ reg = get_tmp(c);
+ } else {
+ assert(c->regs[dst.File][dst.Index].nr != 0);
+ reg = c->regs[dst.File][dst.Index];
+ }
break;
case PROGRAM_ADDRESS:
assert(dst.Index == 0);
@@ -1298,7 +1312,6 @@ static void emit_vertex_write( struct brw_vs_compile *c)
struct brw_compile *p = &c->func;
struct brw_context *brw = p->brw;
struct intel_context *intel = &brw->intel;
- struct brw_reg m0 = brw_message_reg(0);
struct brw_reg pos = c->regs[PROGRAM_OUTPUT][VERT_RESULT_HPOS];
struct brw_reg ndc;
int eot;
@@ -1381,16 +1394,19 @@ static void emit_vertex_write( struct brw_vs_compile *c)
*/
brw_set_access_mode(p, BRW_ALIGN_1);
+ /* The VUE layout is documented in Volume 2a. */
if (intel->gen >= 6) {
- /* There are 16 DWs (D0-D15) in VUE header on Sandybridge:
+ /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge:
* dword 0-3 (m1) of the header is indices, point width, clip flags.
* dword 4-7 (m2) is the 4D space position
- * dword 8-15 (m3,m4) of the vertex header is the user clip distance.
- * m5 is the first vertex data we fill, which is the vertex position.
+ * dword 8-15 (m3,m4) of the vertex header is the user clip distance if
+ * enabled. We don't use it, so skip it.
+ * m3 is the first vertex element data we fill, which is the vertex
+ * position.
*/
- brw_MOV(p, offset(m0, 2), pos);
- brw_MOV(p, offset(m0, 5), pos);
- len_vertex_header = 4;
+ brw_MOV(p, brw_message_reg(2), pos);
+ brw_MOV(p, brw_message_reg(3), pos);
+ len_vertex_header = 2;
} else if (intel->gen == 5) {
/* There are 20 DWs (D0-D19) in VUE header on Ironlake:
* dword 0-3 (m1) of the header is indices, point width, clip flags.
@@ -1400,9 +1416,9 @@ static void emit_vertex_write( struct brw_vs_compile *c)
* m6 is a pad so that the vertex element data is aligned
* m7 is the first vertex data we fill, which is the vertex position.
*/
- brw_MOV(p, offset(m0, 2), ndc);
- brw_MOV(p, offset(m0, 3), pos);
- brw_MOV(p, offset(m0, 7), pos);
+ brw_MOV(p, brw_message_reg(2), ndc);
+ brw_MOV(p, brw_message_reg(3), pos);
+ brw_MOV(p, brw_message_reg(7), pos);
len_vertex_header = 6;
} else {
/* There are 8 dwords in VUE header pre-Ironlake:
@@ -1412,8 +1428,8 @@ static void emit_vertex_write( struct brw_vs_compile *c)
* dword 8-11 (m3) is the first vertex data, which we always have be the
* vertex position.
*/
- brw_MOV(p, offset(m0, 2), ndc);
- brw_MOV(p, offset(m0, 3), pos);
+ brw_MOV(p, brw_message_reg(2), ndc);
+ brw_MOV(p, brw_message_reg(3), pos);
len_vertex_header = 2;
}
@@ -1437,29 +1453,26 @@ static void emit_vertex_write( struct brw_vs_compile *c)
* Move the overflowed attributes from the GRF to the MRF and
* issue another brw_urb_WRITE().
*/
- /* XXX I'm not 100% sure about which MRF regs to use here. Starting
- * at mrf[4] atm...
- */
- GLuint i, mrf = 0;
+ GLuint i, mrf = 1;
for (i = c->first_overflow_output; i < VERT_RESULT_MAX; i++) {
if (c->prog_data.outputs_written & BITFIELD64_BIT(i)) {
/* move from GRF to MRF */
- brw_MOV(p, brw_message_reg(4+mrf), c->regs[PROGRAM_OUTPUT][i]);
+ brw_MOV(p, brw_message_reg(mrf), c->regs[PROGRAM_OUTPUT][i]);
mrf++;
}
}
brw_urb_WRITE(p,
brw_null_reg(), /* dest */
- 4, /* starting mrf reg nr */
+ 0, /* starting mrf reg nr */
c->r0, /* src */
0, /* allocate */
1, /* used */
- mrf+1, /* msg len */
+ mrf, /* msg len */
0, /* response len */
1, /* eot */
1, /* writes complete */
- BRW_MAX_MRF-1, /* urb destination offset */
+ 14 / 2, /* urb destination offset */
BRW_URB_SWIZZLE_INTERLEAVE);
}
}
@@ -1665,7 +1678,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, args[0], BRW_MATH_PRECISION_FULL);
break;
case OPCODE_ARL:
- emit_arl(c, dst, args[0]);
+ brw_RNDD(p, dst, args[0]);
break;
case OPCODE_FLR:
brw_RNDD(p, dst, args[0]);
@@ -1890,6 +1903,14 @@ void brw_vs_emit(struct brw_vs_compile *c )
}
}
+ if (inst->DstReg.RelAddr && inst->DstReg.File == PROGRAM_TEMPORARY) {
+ /* We don't do RelAddr of PROGRAM_OUTPUT yet, because of the
+ * compute-to-mrf and the fact that we are allocating
+ * registers for only the used PROGRAM_OUTPUTs.
+ */
+ move_to_reladdr_dst(c, inst, dst);
+ }
+
release_tmps(c);
}
diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
index be9e415cb0..0250a68d29 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
@@ -31,7 +31,7 @@
#include "main/mtypes.h"
#include "main/texstore.h"
-#include "shader/prog_parameter.h"
+#include "program/prog_parameter.h"
#include "brw_context.h"
#include "brw_state.h"
diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
index 197b875434..40f51c21c9 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@@ -34,7 +34,7 @@
#define BRW_WM_H
-#include "shader/prog_instruction.h"
+#include "program/prog_instruction.h"
#include "brw_context.h"
#include "brw_eu.h"
diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c
index a90a2d3cf2..0c625a4cd0 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -1326,7 +1326,7 @@ void emit_fb_write(struct brw_wm_compile *c,
* + 1 for the second half we get destination + 4.
*/
brw_MOV(p,
- brw_message_reg(nr + channel + (1 << 7)),
+ brw_message_reg(nr + channel + BRW_MRF_COMPR4),
arg0[channel]);
} else {
/* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
@@ -1763,12 +1763,20 @@ void brw_wm_emit( struct brw_wm_compile *c )
inst->dst[i]->spill_slot);
}
+ /* Only properly tested on ILK */
+ if (p->brw->intel.gen == 5) {
+ brw_remove_duplicate_mrf_moves(p);
+ if (c->dispatch_width == 16)
+ brw_remove_grf_to_mrf_moves(p);
+ }
+
if (INTEL_DEBUG & DEBUG_WM) {
int i;
- printf("wm-native:\n");
- for (i = 0; i < p->nr_insn; i++)
+ printf("wm-native:\n");
+ for (i = 0; i < p->nr_insn; i++)
brw_disasm(stderr, &p->store[i], p->brw->intel.gen);
printf("\n");
}
}
+
diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c
index d73c391582..0bef874b88 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_fp.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c
@@ -37,9 +37,9 @@
#include "brw_wm.h"
#include "brw_util.h"
-#include "shader/prog_parameter.h"
-#include "shader/prog_print.h"
-#include "shader/prog_statevars.h"
+#include "program/prog_parameter.h"
+#include "program/prog_print.h"
+#include "program/prog_statevars.h"
/** An invalid texture target */
diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
index 57be08a8d1..2dd346d6dd 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
@@ -1,7 +1,7 @@
#include "main/macros.h"
-#include "shader/prog_parameter.h"
-#include "shader/prog_print.h"
-#include "shader/prog_optimize.h"
+#include "program/prog_parameter.h"
+#include "program/prog_print.h"
+#include "program/prog_optimize.h"
#include "brw_context.h"
#include "brw_eu.h"
#include "brw_wm.h"
diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c
index 60bd92ed22..05de85a957 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_pass0.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_pass0.c
@@ -32,7 +32,7 @@
#include "brw_context.h"
#include "brw_wm.h"
-#include "shader/prog_parameter.h"
+#include "program/prog_parameter.h"
diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c
index 1789b21451..c1cf4db1ca 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@@ -222,7 +222,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
drm_intel_bo_emit_reloc(bo, offsetof(struct brw_wm_unit_state, thread2),
brw->wm.scratch_bo,
wm.thread2.per_thread_scratch_space,
- 0, 0);
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
}
/* Emit sampler state relocation */
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 77898dbbe7..17b016b569 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -32,7 +32,7 @@
#include "main/mtypes.h"
#include "main/texstore.h"
-#include "shader/prog_parameter.h"
+#include "program/prog_parameter.h"
#include "intel_mipmap_tree.h"
#include "intel_batchbuffer.h"
diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c
index 51940efb44..6820ca3abf 100644
--- a/src/mesa/drivers/dri/i965/gen6_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c
@@ -69,7 +69,7 @@ upload_sf_state(struct brw_context *brw)
dw1 =
num_outputs << GEN6_SF_NUM_OUTPUTS_SHIFT |
(num_inputs + 1) / 2 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
- 3 << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT;
+ 1 << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT;
dw2 = GEN6_SF_VIEWPORT_TRANSFORM_ENABLE |
GEN6_SF_STATISTICS_ENABLE;
dw3 = 0;
diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c
index 5916a13994..4080a9dedf 100644
--- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
@@ -29,8 +29,8 @@
#include "brw_state.h"
#include "brw_defines.h"
#include "brw_util.h"
-#include "shader/prog_parameter.h"
-#include "shader/prog_statevars.h"
+#include "program/prog_parameter.h"
+#include "program/prog_statevars.h"
#include "intel_batchbuffer.h"
static void
diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c
index ed1a72f03b..863c85449d 100644
--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -29,8 +29,8 @@
#include "brw_state.h"
#include "brw_defines.h"
#include "brw_util.h"
-#include "shader/prog_parameter.h"
-#include "shader/prog_statevars.h"
+#include "program/prog_parameter.h"
+#include "program/prog_statevars.h"
#include "intel_batchbuffer.h"
static void