summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/i965
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/i965')
-rw-r--r--src/mesa/drivers/dri/i965/brw_cc.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip_line.c66
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip_tri.c76
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h49
-rw-r--r--src/mesa/drivers/dri/i965/brw_curbe.c56
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw.c77
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.c123
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.h27
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c61
-rw-r--r--src/mesa/drivers/dri/i965/brw_fallback.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_misc_state.c37
-rw-r--r--src/mesa/drivers/dri/i965/brw_program.c31
-rw-r--r--src/mesa/drivers/dri/i965/brw_sf.c10
-rw-r--r--src/mesa/drivers/dri/i965/brw_sf_state.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_state.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_batch.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_dump.c8
-rw-r--r--src/mesa/drivers/dri/i965/brw_structs.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_constval.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_emit.c80
-rw-r--r--src/mesa/drivers/dri/i965/brw_vtbl.c43
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.c115
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.h9
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_emit.c8
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_fp.c195
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_glsl.c365
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_pass0.c12
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_pass1.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_sampler_state.c22
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_state.c8
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_surface_state.c26
33 files changed, 1040 insertions, 491 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c
index 82370162f5..c724218cf5 100644
--- a/src/mesa/drivers/dri/i965/brw_cc.c
+++ b/src/mesa/drivers/dri/i965/brw_cc.c
@@ -88,7 +88,7 @@ cc_unit_populate_key(struct brw_context *brw, struct brw_cc_unit_key *key)
memset(key, 0, sizeof(*key));
- key->stencil = ctx->Stencil.Enabled;
+ key->stencil = ctx->Stencil._Enabled;
key->stencil_two_side = ctx->Stencil._TestTwoSide;
if (key->stencil) {
diff --git a/src/mesa/drivers/dri/i965/brw_clip_line.c b/src/mesa/drivers/dri/i965/brw_clip_line.c
index c45d48dff8..d830e49e50 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_line.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_line.c
@@ -181,34 +181,54 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
brw_DP4(p, vec4(c->reg.dp1), deref_4f(vtx1, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
is_negative = brw_IF(p, BRW_EXECUTE_1);
{
- brw_ADD(p, c->reg.t, c->reg.dp1, negate(c->reg.dp0));
- brw_math_invert(p, c->reg.t, c->reg.t);
- brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp1);
-
- brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t1 );
- brw_MOV(p, c->reg.t1, c->reg.t);
- brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ /*
+ * Both can be negative on GM965/G965 due to RHW workaround
+ * if so, this object should be rejected.
+ */
+ if (!BRW_IS_G4X(p->brw)) {
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, c->reg.dp0, brw_imm_f(0.0));
+ is_neg2 = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_kill_thread(c);
+ }
+ brw_ENDIF(p, is_neg2);
+ }
+
+ brw_ADD(p, c->reg.t, c->reg.dp1, negate(c->reg.dp0));
+ brw_math_invert(p, c->reg.t, c->reg.t);
+ brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp1);
+
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t1 );
+ brw_MOV(p, c->reg.t1, c->reg.t);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
}
is_negative = brw_ELSE(p, is_negative);
{
- /* Coming back in. We know that both cannot be negative
- * because the line would have been culled in that case.
- */
+ /* Coming back in. We know that both cannot be negative
+ * because the line would have been culled in that case.
+ */
+
+ /* If both are positive, do nothing */
+ /* Only on GM965/G965 */
+ if (!BRW_IS_G4X(p->brw)) {
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0));
+ is_neg2 = brw_IF(p, BRW_EXECUTE_1);
+ }
- /* If both are positive, do nothing */
- brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0));
- is_neg2 = brw_IF(p, BRW_EXECUTE_1);
{
- brw_ADD(p, c->reg.t, c->reg.dp0, negate(c->reg.dp1));
- brw_math_invert(p, c->reg.t, c->reg.t);
- brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp0);
-
- brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t0 );
- brw_MOV(p, c->reg.t0, c->reg.t);
- brw_set_predicate_control(p, BRW_PREDICATE_NONE);
- }
- brw_ENDIF(p, is_neg2);
- }
+ brw_ADD(p, c->reg.t, c->reg.dp0, negate(c->reg.dp1));
+ brw_math_invert(p, c->reg.t, c->reg.t);
+ brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp0);
+
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t0 );
+ brw_MOV(p, c->reg.t0, c->reg.t);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ }
+
+ if (!BRW_IS_G4X(p->brw)) {
+ brw_ENDIF(p, is_neg2);
+ }
+ }
brw_ENDIF(p, is_negative);
}
brw_ENDIF(p, plane_active);
diff --git a/src/mesa/drivers/dri/i965/brw_clip_tri.c b/src/mesa/drivers/dri/i965/brw_clip_tri.c
index 1dbba37fe7..7fd37bd05f 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_tri.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_tri.c
@@ -455,6 +455,8 @@ static void brw_clip_test( struct brw_clip_compile *c )
struct brw_indirect vt2 = brw_indirect(2, 0);
struct brw_compile *p = &c->func;
+ struct brw_instruction *is_outside;
+ struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */
brw_MOV(p, get_addr_reg(vt0), brw_address(c->reg.vertex[0]));
brw_MOV(p, get_addr_reg(vt1), brw_address(c->reg.vertex[1]));
@@ -462,53 +464,87 @@ static void brw_clip_test( struct brw_clip_compile *c )
brw_MOV(p, v0, deref_4f(vt0, c->offset[VERT_RESULT_HPOS]));
brw_MOV(p, v1, deref_4f(vt1, c->offset[VERT_RESULT_HPOS]));
brw_MOV(p, v2, deref_4f(vt2, c->offset[VERT_RESULT_HPOS]));
+ brw_AND(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(~0x3f));
/* test nearz, xmin, ymin plane */
- brw_CMP(p, t1, BRW_CONDITIONAL_LE, negate(v0), get_element(v0, 3));
+ /* clip.xyz < -clip.w */
+ brw_CMP(p, t1, BRW_CONDITIONAL_L, v0, negate(get_element(v0, 3)));
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
- brw_CMP(p, t2, BRW_CONDITIONAL_LE, negate(v1), get_element(v1, 3));
+ brw_CMP(p, t2, BRW_CONDITIONAL_L, v1, negate(get_element(v1, 3)));
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
- brw_CMP(p, t3, BRW_CONDITIONAL_LE, negate(v2), get_element(v2, 3));
+ brw_CMP(p, t3, BRW_CONDITIONAL_L, v2, negate(get_element(v2, 3)));
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ /* All vertices are outside of a plane, rejected */
+ brw_AND(p, t, t1, t2);
+ brw_AND(p, t, t, t3);
+ brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1));
+ brw_OR(p, tmp0, tmp0, get_element(t, 2));
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1));
+ is_outside = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_kill_thread(c);
+ }
+ brw_ENDIF(p, is_outside);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ /* some vertices are inside a plane, some are outside,need to clip */
brw_XOR(p, t, t1, t2);
brw_XOR(p, t1, t2, t3);
brw_OR(p, t, t, t1);
-
- brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
- get_element(t, 0), brw_imm_ud(0));
+ brw_AND(p, t, t, brw_imm_ud(0x1));
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+ get_element(t, 0), brw_imm_ud(0));
brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<5)));
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
- brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
- get_element(t, 1), brw_imm_ud(0));
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+ get_element(t, 1), brw_imm_ud(0));
brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<3)));
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
- brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
- get_element(t, 2), brw_imm_ud(0));
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+ get_element(t, 2), brw_imm_ud(0));
brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<1)));
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
/* test farz, xmax, ymax plane */
- brw_CMP(p, t1, BRW_CONDITIONAL_L, v0, get_element(v0, 3));
+ /* clip.xyz > clip.w */
+ brw_CMP(p, t1, BRW_CONDITIONAL_G, v0, get_element(v0, 3));
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
- brw_CMP(p, t2, BRW_CONDITIONAL_L, v1, get_element(v1, 3));
+ brw_CMP(p, t2, BRW_CONDITIONAL_G, v1, get_element(v1, 3));
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
- brw_CMP(p, t3, BRW_CONDITIONAL_L, v2, get_element(v2, 3));
+ brw_CMP(p, t3, BRW_CONDITIONAL_G, v2, get_element(v2, 3));
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ /* All vertices are outside of a plane, rejected */
+ brw_AND(p, t, t1, t2);
+ brw_AND(p, t, t, t3);
+ brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1));
+ brw_OR(p, tmp0, tmp0, get_element(t, 2));
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1));
+ is_outside = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_kill_thread(c);
+ }
+ brw_ENDIF(p, is_outside);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ /* some vertices are inside a plane, some are outside,need to clip */
brw_XOR(p, t, t1, t2);
brw_XOR(p, t1, t2, t3);
brw_OR(p, t, t, t1);
-
- brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
- get_element(t, 0), brw_imm_ud(0));
+ brw_AND(p, t, t, brw_imm_ud(0x1));
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+ get_element(t, 0), brw_imm_ud(0));
brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<4)));
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
- brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
- get_element(t, 1), brw_imm_ud(0));
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+ get_element(t, 1), brw_imm_ud(0));
brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<2)));
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
- brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
- get_element(t, 2), brw_imm_ud(0));
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+ get_element(t, 2), brw_imm_ud(0));
brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<0)));
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index eaac6224f6..d96ff29310 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -121,6 +121,9 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
/* if conformance mode is set, swrast can handle any size AA point */
ctx->Const.MaxPointSizeAA = 255.0;
+ /* We want the GLSL compiler to emit code that uses condition codes */
+ ctx->Shader.EmitCondCodes = GL_TRUE;
+
/* ctx->Const.MaxNativeVertexProgramTemps = 32; */
brw_init_state( brw );
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index df90c2027f..48ed4325be 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -46,7 +46,7 @@
*
* CURBE - constant URB entry. An urb region (entry) used to hold
* constant values which the fixed function units can be instructed to
- * preload into the GRF when spawining a thread.
+ * preload into the GRF when spawning a thread.
*
* VUE - vertex URB entry. An urb entry holding a vertex and usually
* a vertex header. The header contains control information and
@@ -63,7 +63,7 @@
* special and may be overwritten.
*
* MRF - message register file. Threads communicate (and terminate)
- * by sending messages. Message parameters are placed in contigous
+ * by sending messages. Message parameters are placed in contiguous
* MRF registers. All program output is via these messages. URB
* entries are populated by sending a message to the shared URB
* function containing the new data, together with a control word,
@@ -154,21 +154,22 @@ struct brw_state_flags {
GLuint cache;
};
+
+/** Subclass of Mesa vertex program */
struct brw_vertex_program {
struct gl_vertex_program program;
GLuint id;
};
-
+/** Subclass of Mesa fragment program */
struct brw_fragment_program {
struct gl_fragment_program program;
- GLuint id;
+ GLuint id; /**< serial no. to identify frag progs, never re-used */
+ GLboolean isGLSL; /**< really, any IF/LOOP/CONT/BREAK instructions */
};
-
-
/* Data about a particular attempt to compile a program. Note that
* there can be many of these, each in a different GL state
* corresponding to a different brw_wm_prog_key struct, with different
@@ -418,8 +419,8 @@ struct brw_context
struct brw_tracked_state **atoms;
GLuint nr_atoms;
- GLuint nr_draw_regions;
- struct intel_region *draw_regions[MAX_DRAW_BUFFERS];
+ GLuint nr_color_regions;
+ struct intel_region *color_regions[MAX_DRAW_BUFFERS];
struct intel_region *depth_region;
/**
@@ -627,8 +628,6 @@ struct brw_context
* brw_vtbl.c
*/
void brwInitVtbl( struct brw_context *brw );
-void brw_do_flush( struct brw_context *brw,
- GLuint flags );
/*======================================================================
* brw_context.c
@@ -670,7 +669,9 @@ void brwInitFragProgFuncs( struct dd_function_table *functions );
*/
void brw_upload_urb_fence(struct brw_context *brw);
-void brw_upload_constant_buffer_state(struct brw_context *brw);
+/* brw_curbe.c
+ */
+void brw_upload_cs_urb_state(struct brw_context *brw);
/*======================================================================
@@ -683,6 +684,32 @@ brw_context( GLcontext *ctx )
return (struct brw_context *)ctx;
}
+static INLINE struct brw_vertex_program *
+brw_vertex_program(struct gl_vertex_program *p)
+{
+ return (struct brw_vertex_program *) p;
+}
+
+static INLINE const struct brw_vertex_program *
+brw_vertex_program_const(const struct gl_vertex_program *p)
+{
+ return (const struct brw_vertex_program *) p;
+}
+
+static INLINE struct brw_fragment_program *
+brw_fragment_program(struct gl_fragment_program *p)
+{
+ return (struct brw_fragment_program *) p;
+}
+
+static INLINE const struct brw_fragment_program *
+brw_fragment_program_const(const struct gl_fragment_program *p)
+{
+ return (const struct brw_fragment_program *) p;
+}
+
+
+
#define DO_SETUP_BITS ((1<<(FRAG_ATTRIB_MAX)) - 1)
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c
index 4eaaa5f871..545dedd34b 100644
--- a/src/mesa/drivers/dri/i965/brw_curbe.c
+++ b/src/mesa/drivers/dri/i965/brw_curbe.c
@@ -53,7 +53,7 @@ static void calculate_curbe_offsets( struct brw_context *brw )
GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16;
/* BRW_NEW_VERTEX_PROGRAM */
- struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program;
+ const struct brw_vertex_program *vp = brw_vertex_program_const(brw->vertex_program);
GLuint nr_vp_regs = (vp->program.Base.Parameters->NumParameters * 4 + 15) / 16;
GLuint nr_clip_regs = 0;
GLuint total_regs;
@@ -138,24 +138,24 @@ const struct brw_tracked_state brw_curbe_offsets = {
* fixed-function hardware in a double-buffering scheme to avoid a
* pipeline stall each time the contents of the curbe is changed.
*/
-void brw_upload_constant_buffer_state(struct brw_context *brw)
+void brw_upload_cs_urb_state(struct brw_context *brw)
{
- struct brw_constant_buffer_state cbs;
- memset(&cbs, 0, sizeof(cbs));
+ struct brw_cs_urb_state cs_urb;
+ memset(&cs_urb, 0, sizeof(cs_urb));
/* It appears that this is the state packet for the CS unit, ie. the
* urb entries detailed here are housed in the CS range from the
* URB_FENCE command.
*/
- cbs.header.opcode = CMD_CONST_BUFFER_STATE;
- cbs.header.length = sizeof(cbs)/4 - 2;
+ cs_urb.header.opcode = CMD_CS_URB_STATE;
+ cs_urb.header.length = sizeof(cs_urb)/4 - 2;
/* BRW_NEW_URB_FENCE */
- cbs.bits0.nr_urb_entries = brw->urb.nr_cs_entries;
- cbs.bits0.urb_entry_size = brw->urb.csize - 1;
+ cs_urb.bits0.nr_urb_entries = brw->urb.nr_cs_entries;
+ cs_urb.bits0.urb_entry_size = brw->urb.csize - 1;
assert(brw->urb.nr_cs_entries);
- BRW_CACHED_BATCH_STRUCT(brw, &cbs);
+ BRW_CACHED_BATCH_STRUCT(brw, &cs_urb);
}
static GLfloat fixed_plane[6][4] = {
@@ -174,10 +174,12 @@ static GLfloat fixed_plane[6][4] = {
static void prepare_constant_buffer(struct brw_context *brw)
{
GLcontext *ctx = &brw->intel.ctx;
- struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program;
- struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program;
- GLuint sz = brw->curbe.total_size;
- GLuint bufsz = sz * 16 * sizeof(GLfloat);
+ const struct brw_vertex_program *vp =
+ brw_vertex_program_const(brw->vertex_program);
+ const struct brw_fragment_program *fp =
+ brw_fragment_program_const(brw->fragment_program);
+ const GLuint sz = brw->curbe.total_size;
+ const GLuint bufsz = sz * 16 * sizeof(GLfloat);
GLfloat *buf;
GLuint i;
@@ -189,27 +191,25 @@ static void prepare_constant_buffer(struct brw_context *brw)
brw->curbe.tracked_state.dirty.mesa |= fp->program.Base.Parameters->StateFlags;
if (sz == 0) {
-
if (brw->curbe.last_buf) {
free(brw->curbe.last_buf);
brw->curbe.last_buf = NULL;
brw->curbe.last_bufsz = 0;
}
-
return;
}
- buf = (GLfloat *)malloc(bufsz);
-
- memset(buf, 0, bufsz);
+ buf = (GLfloat *) _mesa_calloc(bufsz);
+ /* fragment shader constants */
if (brw->curbe.wm_size) {
GLuint offset = brw->curbe.wm_start * 16;
_mesa_load_state_parameters(ctx, fp->program.Base.Parameters);
+ /* copy float constants */
for (i = 0; i < brw->wm.prog_data->nr_params; i++)
- buf[offset + i] = brw->wm.prog_data->param[i][0];
+ buf[offset + i] = *brw->wm.prog_data->param[i];
}
@@ -244,7 +244,7 @@ static void prepare_constant_buffer(struct brw_context *brw)
}
}
-
+ /* vertex shader constants */
if (brw->curbe.vs_size) {
GLuint offset = brw->curbe.vs_start * 16;
GLuint nr = vp->program.Base.Parameters->NumParameters;
@@ -252,10 +252,11 @@ static void prepare_constant_buffer(struct brw_context *brw)
_mesa_load_state_parameters(ctx, vp->program.Base.Parameters);
for (i = 0; i < nr; i++) {
- buf[offset + i * 4 + 0] = vp->program.Base.Parameters->ParameterValues[i][0];
- buf[offset + i * 4 + 1] = vp->program.Base.Parameters->ParameterValues[i][1];
- buf[offset + i * 4 + 2] = vp->program.Base.Parameters->ParameterValues[i][2];
- buf[offset + i * 4 + 3] = vp->program.Base.Parameters->ParameterValues[i][3];
+ const GLfloat *value = vp->program.Base.Parameters->ParameterValues[i];
+ buf[offset + i * 4 + 0] = value[0];
+ buf[offset + i * 4 + 1] = value[1];
+ buf[offset + i * 4 + 2] = value[2];
+ buf[offset + i * 4 + 3] = value[3];
}
}
@@ -274,11 +275,14 @@ static void prepare_constant_buffer(struct brw_context *brw)
brw->curbe.last_buf &&
bufsz == brw->curbe.last_bufsz &&
memcmp(buf, brw->curbe.last_buf, bufsz) == 0) {
- free(buf);
+ /* constants have not changed */
+ _mesa_free(buf);
}
else {
+ /* constants have changed */
if (brw->curbe.last_buf)
- free(brw->curbe.last_buf);
+ _mesa_free(brw->curbe.last_buf);
+
brw->curbe.last_buf = buf;
brw->curbe.last_bufsz = bufsz;
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 39c32255f8..590b064c7e 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -734,7 +734,7 @@
#define CMD_URB_FENCE 0x6000
-#define CMD_CONST_BUFFER_STATE 0x6001
+#define CMD_CS_URB_STATE 0x6001
#define CMD_CONST_BUFFER 0x6002
#define CMD_STATE_BASE_ADDRESS 0x6101
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index 99fd587e9f..5342622a73 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -127,6 +127,7 @@ static void brw_emit_prim(struct brw_context *brw,
uint32_t hw_prim)
{
struct brw_3d_primitive prim_packet;
+ struct intel_context *intel = &brw->intel;
if (INTEL_DEBUG & DEBUG_PRIMS)
_mesa_printf("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode),
@@ -146,10 +147,27 @@ static void brw_emit_prim(struct brw_context *brw,
/* Can't wrap here, since we rely on the validated state. */
brw->no_batch_wrap = GL_TRUE;
+
+ /* If we're set to always flush, do it before and after the primitive emit.
+ * We want to catch both missed flushes that hurt instruction/state cache
+ * and missed flushes of the render cache as it heads to other parts of
+ * the besides the draw code.
+ */
+ if (intel->always_flush_cache) {
+ BEGIN_BATCH(1, IGNORE_CLIPRECTS);
+ OUT_BATCH(intel->vtbl.flush_cmd());
+ ADVANCE_BATCH();
+ }
if (prim_packet.verts_per_instance) {
intel_batchbuffer_data( brw->intel.batch, &prim_packet,
sizeof(prim_packet), LOOP_CLIPRECTS);
}
+ if (intel->always_flush_cache) {
+ BEGIN_BATCH(1, IGNORE_CLIPRECTS);
+ OUT_BATCH(intel->vtbl.flush_cmd());
+ ADVANCE_BATCH();
+ }
+
brw->no_batch_wrap = GL_FALSE;
}
@@ -194,9 +212,16 @@ static GLboolean check_fallbacks( struct brw_context *brw,
GLcontext *ctx = &brw->intel.ctx;
GLuint i;
- if (!brw->intel.strict_conformance)
+ /* If we don't require strict OpenGL conformance, never
+ * use fallbacks. If we're forcing fallbacks, always
+ * use fallfacks.
+ */
+ if (brw->intel.conformance_mode == 0)
return GL_FALSE;
+ if (brw->intel.conformance_mode == 2)
+ return GL_TRUE;
+
if (ctx->Polygon.SmoothFlag) {
for (i = 0; i < nr_prims; i++)
if (reduced_prim[prim[i].mode] == GL_TRIANGLES)
@@ -220,7 +245,7 @@ static GLboolean check_fallbacks( struct brw_context *brw,
/* GS doesn't get enough information to know when to reset
* the stipple counter?!?
*/
- if (prim[i].mode == GL_LINE_LOOP)
+ if (prim[i].mode == GL_LINE_LOOP || prim[i].mode == GL_LINE_STRIP)
return GL_TRUE;
if (prim[i].mode == GL_POLYGON &&
@@ -230,13 +255,46 @@ static GLboolean check_fallbacks( struct brw_context *brw,
}
}
-
if (ctx->Point.SmoothFlag) {
for (i = 0; i < nr_prims; i++)
if (prim[i].mode == GL_POINTS)
return GL_TRUE;
}
+
+ /* BRW hardware doesn't handle GL_CLAMP texturing correctly;
+ * brw_wm_sampler_state:translate_wrap_mode() treats GL_CLAMP
+ * as GL_CLAMP_TO_EDGE instead. If we're using GL_CLAMP, and
+ * we want strict conformance, force the fallback.
+ * Right now, we only do this for 2D textures.
+ */
+ {
+ int u;
+ for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
+ struct gl_texture_unit *texUnit = &ctx->Texture.Unit[u];
+ if (texUnit->Enabled) {
+ if (texUnit->Enabled & TEXTURE_1D_BIT) {
+ if (texUnit->CurrentTex[TEXTURE_1D_INDEX]->WrapS == GL_CLAMP) {
+ return GL_TRUE;
+ }
+ }
+ if (texUnit->Enabled & TEXTURE_2D_BIT) {
+ if (texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapS == GL_CLAMP ||
+ texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapT == GL_CLAMP) {
+ return GL_TRUE;
+ }
+ }
+ if (texUnit->Enabled & TEXTURE_3D_BIT) {
+ if (texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapS == GL_CLAMP ||
+ texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapT == GL_CLAMP ||
+ texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapR == GL_CLAMP) {
+ return GL_TRUE;
+ }
+ }
+ }
+ }
+ }
+ /* Nothing stopping us from the fast path now */
return GL_FALSE;
}
@@ -261,11 +319,18 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
if (ctx->NewState)
_mesa_update_state( ctx );
+ /* We have to validate the textures *before* checking for fallbacks;
+ * otherwise, the software fallback won't be able to rely on the
+ * texture state, the firstLevel and lastLevel fields won't be
+ * set in the intel texture object (they'll both be 0), and the
+ * software fallback will segfault if it attempts to access any
+ * texture level other than level 0.
+ */
+ brw_validate_textures( brw );
+
if (check_fallbacks(brw, prim, nr_prims))
return GL_FALSE;
- brw_validate_textures( brw );
-
/* Bind all inputs, derive varying and size information:
*/
brw_merge_inputs( brw, arrays );
@@ -346,6 +411,8 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
retval = GL_TRUE;
}
+ if (intel->always_flush_batch)
+ intel_batchbuffer_flush(intel->batch);
out:
UNLOCK_HARDWARE(intel);
diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c
index b3ae4eef33..c53efba599 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.c
+++ b/src/mesa/drivers/dri/i965/brw_eu.c
@@ -129,3 +129,126 @@ const GLuint *brw_get_program( struct brw_compile *p,
return (const GLuint *)p->store;
}
+
+
+/**
+ * Subroutine calls require special attention.
+ * Mesa instructions may be expanded into multiple hardware instructions
+ * so the prog_instruction::BranchTarget field can't be used as an index
+ * into the hardware instructions.
+ *
+ * The BranchTarget field isn't needed, however. Mesa's GLSL compiler
+ * emits CAL and BGNSUB instructions with labels that can be used to map
+ * subroutine calls to actual subroutine code blocks.
+ *
+ * The structures and function here implement patching of CAL instructions
+ * so they jump to the right subroutine code...
+ */
+
+
+/**
+ * For each OPCODE_BGNSUB we create one of these.
+ */
+struct brw_glsl_label
+{
+ const char *name; /**< the label string */
+ GLuint position; /**< the position of the brw instruction for this label */
+ struct brw_glsl_label *next; /**< next in linked list */
+};
+
+
+/**
+ * For each OPCODE_CAL we create one of these.
+ */
+struct brw_glsl_call
+{
+ GLuint call_inst_pos; /**< location of the CAL instruction */
+ const char *sub_name; /**< name of subroutine to call */
+ struct brw_glsl_call *next; /**< next in linked list */
+};
+
+
+/**
+ * Called for each OPCODE_BGNSUB.
+ */
+void
+brw_save_label(struct brw_compile *c, const char *name, GLuint position)
+{
+ struct brw_glsl_label *label = CALLOC_STRUCT(brw_glsl_label);
+ label->name = name;
+ label->position = position;
+ label->next = c->first_label;
+ c->first_label = label;
+}
+
+
+/**
+ * Called for each OPCODE_CAL.
+ */
+void
+brw_save_call(struct brw_compile *c, const char *name, GLuint call_pos)
+{
+ struct brw_glsl_call *call = CALLOC_STRUCT(brw_glsl_call);
+ call->call_inst_pos = call_pos;
+ call->sub_name = name;
+ call->next = c->first_call;
+ c->first_call = call;
+}
+
+
+/**
+ * Lookup a label, return label's position/offset.
+ */
+static GLuint
+brw_lookup_label(struct brw_compile *c, const char *name)
+{
+ const struct brw_glsl_label *label;
+ for (label = c->first_label; label; label = label->next) {
+ if (strcmp(name, label->name) == 0) {
+ return label->position;
+ }
+ }
+ abort(); /* should never happen */
+ return ~0;
+}
+
+
+/**
+ * When we're done generating code, this function is called to resolve
+ * subroutine calls.
+ */
+void
+brw_resolve_cals(struct brw_compile *c)
+{
+ const struct brw_glsl_call *call;
+
+ for (call = c->first_call; call; call = call->next) {
+ const GLuint sub_loc = brw_lookup_label(c, call->sub_name);
+ struct brw_instruction *brw_call_inst = &c->store[call->call_inst_pos];
+ struct brw_instruction *brw_sub_inst = &c->store[sub_loc];
+ GLint offset = brw_sub_inst - brw_call_inst;
+
+ /* patch brw_inst1 to point to brw_inst2 */
+ brw_set_src1(brw_call_inst, brw_imm_d(offset * 16));
+ }
+
+ /* free linked list of calls */
+ {
+ struct brw_glsl_call *call, *next;
+ for (call = c->first_call; call; call = next) {
+ next = call->next;
+ _mesa_free(call);
+ }
+ c->first_call = NULL;
+ }
+
+ /* free linked list of labels */
+ {
+ struct brw_glsl_label *label, *next;
+ for (label = c->first_label; label; label = next) {
+ next = label->next;
+ _mesa_free(label);
+ }
+ c->first_label = NULL;
+ }
+}
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 9e2b39af9b..eb99c21711 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -91,8 +91,13 @@ struct brw_indirect {
};
+struct brw_glsl_label;
+struct brw_glsl_call;
+
+
+
#define BRW_EU_MAX_INSN_STACK 5
-#define BRW_EU_MAX_INSN 1200
+#define BRW_EU_MAX_INSN 4000
struct brw_compile {
struct brw_instruction store[BRW_EU_MAX_INSN];
@@ -106,9 +111,22 @@ struct brw_compile {
GLuint flag_value;
GLboolean single_program_flow;
struct brw_context *brw;
+
+ struct brw_glsl_label *first_label; /**< linked list of labels */
+ struct brw_glsl_call *first_call; /**< linked list of CALs */
};
+void
+brw_save_label(struct brw_compile *c, const char *name, GLuint position);
+
+void
+brw_save_call(struct brw_compile *c, const char *name, GLuint call_pos);
+
+void
+brw_resolve_cals(struct brw_compile *c);
+
+
static INLINE int type_sz( GLuint type )
{
@@ -152,6 +170,13 @@ static INLINE struct brw_reg brw_reg( GLuint file,
GLuint writemask )
{
struct brw_reg reg;
+ if (type == BRW_GENERAL_REGISTER_FILE)
+ assert(nr < 128);
+ else if (type == BRW_MESSAGE_REGISTER_FILE)
+ assert(nr < 9);
+ else if (type == BRW_ARCHITECTURE_REGISTER_FILE)
+ assert(nr <= BRW_ARF_IP);
+
reg.type = type;
reg.file = file;
reg.nr = nr;
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 4e099b5945..6dce1ca48e 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -55,6 +55,9 @@ static void guess_execution_size( struct brw_instruction *insn,
static void brw_set_dest( struct brw_instruction *insn,
struct brw_reg dest )
{
+ if (dest.type != BRW_ARCHITECTURE_REGISTER_FILE)
+ assert(dest.nr < 128);
+
insn->bits1.da1.dest_reg_file = dest.file;
insn->bits1.da1.dest_reg_type = dest.type;
insn->bits1.da1.dest_address_mode = dest.address_mode;
@@ -96,10 +99,13 @@ static void brw_set_dest( struct brw_instruction *insn,
}
static void brw_set_src0( struct brw_instruction *insn,
- struct brw_reg reg )
+ struct brw_reg reg )
{
assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
+ if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
+ assert(reg.nr < 128);
+
insn->bits1.da1.src0_reg_file = reg.file;
insn->bits1.da1.src0_reg_type = reg.type;
insn->bits2.da1.src0_abs = reg.abs;
@@ -169,10 +175,12 @@ static void brw_set_src0( struct brw_instruction *insn,
void brw_set_src1( struct brw_instruction *insn,
- struct brw_reg reg )
+ struct brw_reg reg )
{
assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
+ assert(reg.nr < 128);
+
insn->bits1.da1.src1_reg_file = reg.file;
insn->bits1.da1.src1_reg_type = reg.type;
insn->bits3.da1.src1_abs = reg.abs;
@@ -323,13 +331,13 @@ static void brw_set_dp_read_message( struct brw_instruction *insn,
}
static void brw_set_sampler_message(struct brw_context *brw,
- struct brw_instruction *insn,
- GLuint binding_table_index,
- GLuint sampler,
- GLuint msg_type,
- GLuint response_length,
- GLuint msg_length,
- GLboolean eot)
+ struct brw_instruction *insn,
+ GLuint binding_table_index,
+ GLuint sampler,
+ GLuint msg_type,
+ GLuint response_length,
+ GLuint msg_length,
+ GLboolean eot)
{
brw_set_src1(insn, brw_imm_d(0));
@@ -407,7 +415,7 @@ static struct brw_instruction *brw_alu2(struct brw_compile *p,
* Convenience routines.
*/
#define ALU1(OP) \
-struct brw_instruction *brw_##OP(struct brw_compile *p, \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
struct brw_reg dest, \
struct brw_reg src0) \
{ \
@@ -415,7 +423,7 @@ struct brw_instruction *brw_##OP(struct brw_compile *p, \
}
#define ALU2(OP) \
-struct brw_instruction *brw_##OP(struct brw_compile *p, \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
struct brw_reg dest, \
struct brw_reg src0, \
struct brw_reg src1) \
@@ -469,9 +477,9 @@ void brw_NOP(struct brw_compile *p)
*/
struct brw_instruction *brw_JMPI(struct brw_compile *p,
- struct brw_reg dest,
- struct brw_reg src0,
- struct brw_reg src1)
+ struct brw_reg dest,
+ struct brw_reg src0,
+ struct brw_reg src1)
{
struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
@@ -674,7 +682,7 @@ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
struct brw_instruction *brw_WHILE(struct brw_compile *p,
- struct brw_instruction *do_insn)
+ struct brw_instruction *do_insn)
{
struct brw_instruction *insn;
@@ -931,13 +939,13 @@ void brw_dp_READ_16( struct brw_compile *p,
void brw_fb_WRITE(struct brw_compile *p,
- struct brw_reg dest,
- GLuint msg_reg_nr,
- struct brw_reg src0,
- GLuint binding_table_index,
- GLuint msg_length,
- GLuint response_length,
- GLboolean eot)
+ struct brw_reg dest,
+ GLuint msg_reg_nr,
+ struct brw_reg src0,
+ GLuint binding_table_index,
+ GLuint msg_length,
+ GLuint response_length,
+ GLboolean eot)
{
struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
@@ -973,8 +981,8 @@ void brw_SAMPLE(struct brw_compile *p,
{
GLboolean need_stall = 0;
- if(writemask == 0) {
-/* _mesa_printf("%s: zero writemask??\n", __FUNCTION__); */
+ if (writemask == 0) {
+ /*_mesa_printf("%s: zero writemask??\n", __FUNCTION__); */
return;
}
@@ -1006,7 +1014,7 @@ void brw_SAMPLE(struct brw_compile *p,
if (newmask != writemask) {
need_stall = 1;
-/* _mesa_printf("need stall %x %x\n", newmask , writemask); */
+ /* _mesa_printf("need stall %x %x\n", newmask , writemask); */
}
else {
struct brw_reg m1 = brw_message_reg(msg_reg_nr);
@@ -1047,8 +1055,7 @@ void brw_SAMPLE(struct brw_compile *p,
eot);
}
- if (need_stall)
- {
+ if (need_stall) {
struct brw_reg reg = vec8(offset(dest, response_length-1));
/* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
diff --git a/src/mesa/drivers/dri/i965/brw_fallback.c b/src/mesa/drivers/dri/i965/brw_fallback.c
index e63098fdd4..299357409c 100644
--- a/src/mesa/drivers/dri/i965/brw_fallback.c
+++ b/src/mesa/drivers/dri/i965/brw_fallback.c
@@ -75,8 +75,8 @@ static GLboolean do_check_fallback(struct brw_context *brw)
/* _NEW_STENCIL
*/
- if (ctx->Stencil.Enabled &&
- !brw->intel.hw_stencil) {
+ if (ctx->Stencil._Enabled &&
+ (ctx->DrawBuffer->Name == 0 && !brw->intel.hw_stencil)) {
DBG("FALLBACK: stencil\n");
return GL_TRUE;
}
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 9dcdad7b4e..5c94a49f60 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -178,7 +178,7 @@ static void upload_psp_urb_cbs(struct brw_context *brw )
{
upload_pipelined_state_pointers(brw);
brw_upload_urb_fence(brw);
- brw_upload_constant_buffer_state(brw);
+ brw_upload_cs_urb_state(brw);
}
const struct brw_tracked_state brw_psp_urb_cbs = {
@@ -290,8 +290,21 @@ static void upload_polygon_stipple(struct brw_context *brw)
bps.header.opcode = CMD_POLY_STIPPLE_PATTERN;
bps.header.length = sizeof(bps)/4-2;
- for (i = 0; i < 32; i++)
- bps.stipple[i] = ctx->PolygonStipple[31 - i]; /* invert */
+ /* Polygon stipple is provided in OpenGL order, i.e. bottom
+ * row first. If we're rendering to a window (i.e. the
+ * default frame buffer object, 0), then we need to invert
+ * it to match our pixel layout. But if we're rendering
+ * to a FBO (i.e. any named frame buffer object), we *don't*
+ * need to invert - we already match the layout.
+ */
+ if (ctx->DrawBuffer->Name == 0) {
+ for (i = 0; i < 32; i++)
+ bps.stipple[i] = ctx->PolygonStipple[31 - i]; /* invert */
+ }
+ else {
+ for (i = 0; i < 32; i++)
+ bps.stipple[i] = ctx->PolygonStipple[i]; /* don't invert */
+ }
BRW_CACHED_BATCH_STRUCT(brw, &bps);
}
@@ -319,8 +332,22 @@ static void upload_polygon_stipple_offset(struct brw_context *brw)
bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET;
bpso.header.length = sizeof(bpso)/4-2;
- bpso.bits0.x_offset = (32 - (dPriv->x & 31)) & 31;
- bpso.bits0.y_offset = (32 - ((dPriv->y + dPriv->h) & 31)) & 31;
+ /* If we're drawing to a system window (ctx->DrawBuffer->Name == 0),
+ * we have to invert the Y axis in order to match the OpenGL
+ * pixel coordinate system, and our offset must be matched
+ * to the window position. If we're drawing to a FBO
+ * (ctx->DrawBuffer->Name != 0), then our native pixel coordinate
+ * system works just fine, and there's no window system to
+ * worry about.
+ */
+ if (brw->intel.ctx.DrawBuffer->Name == 0) {
+ bpso.bits0.x_offset = (32 - (dPriv->x & 31)) & 31;
+ bpso.bits0.y_offset = (32 - ((dPriv->y + dPriv->h) & 31)) & 31;
+ }
+ else {
+ bpso.bits0.y_offset = 0;
+ bpso.bits0.x_offset = 0;
+ }
BRW_CACHED_BATCH_STRUCT(brw, &bpso);
}
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index 0c86911044..d90bd82038 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -38,6 +38,7 @@
#include "brw_context.h"
#include "brw_util.h"
+#include "brw_wm.h"
static void brwBindProgram( GLcontext *ctx,
GLenum target,
@@ -94,7 +95,6 @@ static struct gl_program *brwNewProgram( GLcontext *ctx,
static void brwDeleteProgram( GLcontext *ctx,
struct gl_program *prog )
{
-
_mesa_delete_program( ctx, prog );
}
@@ -110,30 +110,35 @@ static void brwProgramStringNotify( GLcontext *ctx,
GLenum target,
struct gl_program *prog )
{
+ struct brw_context *brw = brw_context(ctx);
if (target == GL_FRAGMENT_PROGRAM_ARB) {
struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog;
- struct brw_context *brw = brw_context(ctx);
- struct brw_fragment_program *p = (struct brw_fragment_program *)prog;
- struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program;
+ struct brw_fragment_program *newFP = brw_fragment_program(fprog);
+ const struct brw_fragment_program *curFP =
+ brw_fragment_program_const(brw->fragment_program);
+
if (fprog->FogOption) {
_mesa_append_fog_code(ctx, fprog);
fprog->FogOption = GL_NONE;
}
- if (p == fp)
+ if (newFP == curFP)
brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
- p->id = brw->program_id++;
+ newFP->id = brw->program_id++;
+ newFP->isGLSL = brw_wm_is_glsl(fprog);
}
else if (target == GL_VERTEX_PROGRAM_ARB) {
- struct brw_context *brw = brw_context(ctx);
- struct brw_vertex_program *p = (struct brw_vertex_program *)prog;
- struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program;
- if (p == vp)
+ struct gl_vertex_program *vprog = (struct gl_vertex_program *) prog;
+ struct brw_vertex_program *newVP = brw_vertex_program(vprog);
+ const struct brw_vertex_program *curVP =
+ brw_vertex_program_const(brw->vertex_program);
+
+ if (newVP == curVP)
brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
- if (p->program.IsPositionInvariant) {
- _mesa_insert_mvp_code(ctx, &p->program);
+ if (newVP->program.IsPositionInvariant) {
+ _mesa_insert_mvp_code(ctx, &newVP->program);
}
- p->id = brw->program_id++;
+ newVP->id = brw->program_id++;
/* Also tell tnl about it:
*/
diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c
index 8c1711538a..c3c85978f4 100644
--- a/src/mesa/drivers/dri/i965/brw_sf.c
+++ b/src/mesa/drivers/dri/i965/brw_sf.c
@@ -167,8 +167,14 @@ static void upload_sf_prog(struct brw_context *brw)
key.do_twoside_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide);
/* _NEW_POLYGON */
- if (key.do_twoside_color)
- key.frontface_ccw = (ctx->Polygon.FrontFace == GL_CCW);
+ if (key.do_twoside_color) {
+ /* If we're rendering to a FBO, we have to invert the polygon
+ * face orientation, just as we invert the viewport in
+ * sf_unit_create_from_key(). ctx->DrawBuffer->Name will be
+ * nonzero if we're rendering to such an FBO.
+ */
+ key.frontface_ccw = (ctx->Polygon.FrontFace == GL_CCW) ^ (ctx->DrawBuffer->Name != 0);
+ }
dri_bo_unreference(brw->sf.prog_bo);
brw->sf.prog_bo = brw_search_cache(&brw->cache, BRW_SF_PROG,
diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c
index e96d5354b3..93a9686f71 100644
--- a/src/mesa/drivers/dri/i965/brw_sf_state.c
+++ b/src/mesa/drivers/dri/i965/brw_sf_state.c
@@ -44,6 +44,7 @@ static void upload_sf_vp(struct brw_context *brw)
struct brw_sf_viewport sfv;
GLfloat y_scale, y_bias;
const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0);
+ const GLfloat *v = ctx->Viewport._WindowMap.m;
memset(&sfv, 0, sizeof(sfv));
@@ -58,8 +59,6 @@ static void upload_sf_vp(struct brw_context *brw)
/* _NEW_VIEWPORT */
- const GLfloat *v = ctx->Viewport._WindowMap.m;
-
sfv.viewport.m00 = v[MAT_SX];
sfv.viewport.m11 = v[MAT_SY] * y_scale;
sfv.viewport.m22 = v[MAT_SZ] * depth_scale;
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index df839c5b30..81b0a45998 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -52,7 +52,6 @@ const struct brw_tracked_state brw_cc_vp;
const struct brw_tracked_state brw_check_fallback;
const struct brw_tracked_state brw_clip_prog;
const struct brw_tracked_state brw_clip_unit;
-const struct brw_tracked_state brw_constant_buffer_state;
const struct brw_tracked_state brw_constant_buffer;
const struct brw_tracked_state brw_curbe_offsets;
const struct brw_tracked_state brw_invarient_state;
diff --git a/src/mesa/drivers/dri/i965/brw_state_batch.c b/src/mesa/drivers/dri/i965/brw_state_batch.c
index dc87859f3f..811940edc0 100644
--- a/src/mesa/drivers/dri/i965/brw_state_batch.c
+++ b/src/mesa/drivers/dri/i965/brw_state_batch.c
@@ -97,8 +97,6 @@ void brw_clear_batch_cache_flush( struct brw_context *brw )
{
clear_batch_cache(brw);
-/* brw_do_flush(brw, BRW_FLUSH_STATE_CACHE|BRW_FLUSH_READ_CACHE); */
-
brw->state.dirty.mesa |= ~0;
brw->state.dirty.brw |= ~0;
brw->state.dirty.cache |= ~0;
diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c
index b28c57c2bc..5d332d010c 100644
--- a/src/mesa/drivers/dri/i965/brw_state_dump.c
+++ b/src/mesa/drivers/dri/i965/brw_state_dump.c
@@ -162,6 +162,14 @@ static void brw_debug_prog(const char *name, dri_bo *prog)
fprintf(stderr, "%8s: 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n",
name, (unsigned int)prog->offset + i * 4 * 4,
data[i * 4], data[i * 4 + 1], data[i * 4 + 2], data[i * 4 + 3]);
+ /* Stop at the end of the program. It'd be nice to keep track of the actual
+ * intended program size instead of guessing like this.
+ */
+ if (data[i * 4 + 0] == 0 &&
+ data[i * 4 + 1] == 0 &&
+ data[i * 4 + 2] == 0 &&
+ data[i * 4 + 3] == 0)
+ break;
}
dri_bo_unmap(prog);
diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h
index d97ff27f0a..89e2981203 100644
--- a/src/mesa/drivers/dri/i965/brw_structs.h
+++ b/src/mesa/drivers/dri/i965/brw_structs.h
@@ -439,7 +439,7 @@ struct brw_urb_fence
} bits1;
};
-struct brw_constant_buffer_state /* previously brw_command_streamer */
+struct brw_cs_urb_state
{
struct header header;
diff --git a/src/mesa/drivers/dri/i965/brw_vs_constval.c b/src/mesa/drivers/dri/i965/brw_vs_constval.c
index 9977677fd7..d29eb17f8c 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_constval.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_constval.c
@@ -170,8 +170,8 @@ static void calc_wm_input_sizes( struct brw_context *brw )
{
GLcontext *ctx = &brw->intel.ctx;
/* BRW_NEW_VERTEX_PROGRAM */
- struct brw_vertex_program *vp =
- (struct brw_vertex_program *)brw->vertex_program;
+ const struct brw_vertex_program *vp =
+ brw_vertex_program_const(brw->vertex_program);
/* BRW_NEW_INPUT_DIMENSIONS */
struct tracker t;
GLuint insn;
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index 24b7dc30fe..3807dff991 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -156,6 +156,12 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
c->prog_data.urb_entry_size = (c->nr_outputs+2+3)/4;
c->prog_data.total_grf = reg;
+
+ if (INTEL_DEBUG & DEBUG_VS) {
+ _mesa_printf("%s NumAddrRegs %d\n", __FUNCTION__, c->vp->program.Base.NumAddressRegs);
+ _mesa_printf("%s NumTemps %d\n", __FUNCTION__, c->vp->program.Base.NumTemporaries);
+ _mesa_printf("%s reg = %d\n", __FUNCTION__, reg);
+ }
}
@@ -658,7 +664,7 @@ static void emit_nrm( struct brw_vs_compile *c,
/* TODO: relative addressing!
*/
static struct brw_reg get_reg( struct brw_vs_compile *c,
- GLuint file,
+ gl_register_file file,
GLuint index )
{
@@ -954,36 +960,27 @@ static void emit_vertex_write( struct brw_vs_compile *c)
}
+/**
+ * Called after code generation to resolve subroutine calls and the
+ * END instruction.
+ * \param end_inst points to brw code for END instruction
+ * \param last_inst points to last instruction emitted before vertex write
+ */
static void
-post_vs_emit( struct brw_vs_compile *c, struct brw_instruction *end_inst )
+post_vs_emit( struct brw_vs_compile *c,
+ struct brw_instruction *end_inst,
+ struct brw_instruction *last_inst )
{
- GLuint nr_insns = c->vp->program.Base.NumInstructions;
- GLuint insn, target_insn;
- struct prog_instruction *inst1, *inst2;
- struct brw_instruction *brw_inst1, *brw_inst2;
- int offset;
- for (insn = 0; insn < nr_insns; insn++) {
- inst1 = &c->vp->program.Base.Instructions[insn];
- brw_inst1 = inst1->Data;
- switch (inst1->Opcode) {
- case OPCODE_CAL:
- case OPCODE_BRA:
- target_insn = inst1->BranchTarget;
- inst2 = &c->vp->program.Base.Instructions[target_insn];
- brw_inst2 = inst2->Data;
- offset = brw_inst2 - brw_inst1;
- brw_set_src1(brw_inst1, brw_imm_d(offset*16));
- break;
- case OPCODE_END:
- offset = end_inst - brw_inst1;
- brw_set_src1(brw_inst1, brw_imm_d(offset*16));
- break;
- default:
- break;
- }
- }
+ GLint offset;
+
+ brw_resolve_cals(&c->func);
+
+ /* patch up the END code to jump past subroutines, etc */
+ offset = last_inst - end_inst;
+ brw_set_src1(end_inst, brw_imm_d(offset * 16));
}
+
/* Emit the fragment program instructions here.
*/
void brw_vs_emit(struct brw_vs_compile *c )
@@ -992,7 +989,8 @@ void brw_vs_emit(struct brw_vs_compile *c )
struct brw_compile *p = &c->func;
GLuint nr_insns = c->vp->program.Base.NumInstructions;
GLuint insn, if_insn = 0;
- struct brw_instruction *end_inst;
+ GLuint end_offset = 0;
+ struct brw_instruction *end_inst, *last_inst;
struct brw_instruction *if_inst[MAX_IFSN];
struct brw_indirect stack_index = brw_indirect(0, 0);
@@ -1035,7 +1033,6 @@ void brw_vs_emit(struct brw_vs_compile *c )
/* Get argument regs. SWZ is special and does this itself.
*/
- inst->Data = &p->store[p->nr_insn];
if (inst->Opcode != OPCODE_SWZ)
for (i = 0; i < 3; i++) {
struct prog_src_register *src = &inst->SrcReg[i];
@@ -1203,7 +1200,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
brw_set_access_mode(p, BRW_ALIGN_16);
brw_ADD(p, get_addr_reg(stack_index),
get_addr_reg(stack_index), brw_imm_d(4));
- inst->Data = &p->store[p->nr_insn];
+ brw_save_call(p, inst->Comment, p->nr_insn);
brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
break;
case OPCODE_RET:
@@ -1212,14 +1209,23 @@ void brw_vs_emit(struct brw_vs_compile *c )
brw_set_access_mode(p, BRW_ALIGN_1);
brw_MOV(p, brw_ip_reg(), deref_1d(stack_index, 0));
brw_set_access_mode(p, BRW_ALIGN_16);
+ break;
case OPCODE_END:
+ end_offset = p->nr_insn;
+ /* this instruction will get patched later to jump past subroutine
+ * code, etc.
+ */
brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
break;
case OPCODE_PRINT:
+ /* no-op */
+ break;
case OPCODE_BGNSUB:
+ brw_save_label(p, inst->Comment, p->nr_insn);
+ break;
case OPCODE_ENDSUB:
- /* no-op instructions */
- break;
+ /* no-op */
+ break;
default:
_mesa_problem(NULL, "Unsupported opcode %i (%s) in vertex shader",
inst->Opcode, inst->Opcode < MAX_OPCODE ?
@@ -1257,9 +1263,11 @@ void brw_vs_emit(struct brw_vs_compile *c )
release_tmps(c);
}
- end_inst = &p->store[p->nr_insn];
+ end_inst = &p->store[end_offset];
+ last_inst = &p->store[p->nr_insn];
+
+ /* The END instruction will be patched to jump to this code */
emit_vertex_write(c);
- post_vs_emit(c, end_inst);
- for (insn = 0; insn < nr_insns; insn++)
- c->vp->program.Base.Instructions[insn].Data = NULL;
+
+ post_vs_emit(c, end_inst, last_inst);
}
diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c
index b501a59ccd..960bbb311e 100644
--- a/src/mesa/drivers/dri/i965/brw_vtbl.c
+++ b/src/mesa/drivers/dri/i965/brw_vtbl.c
@@ -67,11 +67,13 @@ static void brw_destroy_context( struct intel_context *intel )
brw_destroy_state(brw);
brw_draw_destroy( brw );
+ _mesa_free(brw->wm.compile_data);
+
brw_FrameBufferTexDestroy( brw );
- for (i = 0; i < brw->state.nr_draw_regions; i++)
- intel_region_release(&brw->state.draw_regions[i]);
- brw->state.nr_draw_regions = 0;
+ for (i = 0; i < brw->state.nr_color_regions; i++)
+ intel_region_release(&brw->state.color_regions[i]);
+ brw->state.nr_color_regions = 0;
intel_region_release(&brw->state.depth_region);
dri_bo_release(&brw->curbe.curbe_bo);
@@ -90,6 +92,7 @@ static void brw_destroy_context( struct intel_context *intel )
dri_bo_release(&brw->wm.bind_bo);
for (i = 0; i < BRW_WM_MAX_SURF; i++)
dri_bo_release(&brw->wm.surf_bo[i]);
+ dri_bo_release(&brw->wm.sampler_bo);
dri_bo_release(&brw->wm.prog_bo);
dri_bo_release(&brw->wm.state_bo);
dri_bo_release(&brw->cc.prog_bo);
@@ -102,25 +105,25 @@ static void brw_destroy_context( struct intel_context *intel )
* called from intelDrawBuffer()
*/
static void brw_set_draw_region( struct intel_context *intel,
- struct intel_region *draw_regions[],
+ struct intel_region *color_regions[],
struct intel_region *depth_region,
- GLuint num_regions)
+ GLuint num_color_regions)
{
struct brw_context *brw = brw_context(&intel->ctx);
- int i;
+ GLuint i;
/* release old color/depth regions */
if (brw->state.depth_region != depth_region)
brw->state.dirty.brw |= BRW_NEW_DEPTH_BUFFER;
- for (i = 0; i < brw->state.nr_draw_regions; i++)
- intel_region_release(&brw->state.draw_regions[i]);
+ for (i = 0; i < brw->state.nr_color_regions; i++)
+ intel_region_release(&brw->state.color_regions[i]);
intel_region_release(&brw->state.depth_region);
/* reference new color/depth regions */
- for (i = 0; i < num_regions; i++)
- intel_region_reference(&brw->state.draw_regions[i], draw_regions[i]);
+ for (i = 0; i < num_color_regions; i++)
+ intel_region_reference(&brw->state.color_regions[i], color_regions[i]);
intel_region_reference(&brw->state.depth_region, depth_region);
- brw->state.nr_draw_regions = num_regions;
+ brw->state.nr_color_regions = num_color_regions;
}
@@ -181,23 +184,6 @@ static void brw_note_unlock( struct intel_context *intel )
}
-void brw_do_flush( struct brw_context *brw, GLuint flags )
-{
- struct brw_mi_flush flush;
- memset(&flush, 0, sizeof(flush));
- flush.opcode = CMD_MI_FLUSH;
- flush.flags = flags;
- BRW_BATCH_STRUCT(brw, &flush);
-}
-
-
-static void brw_emit_flush( struct intel_context *intel, GLuint unused )
-{
- brw_do_flush(brw_context(&intel->ctx),
- BRW_FLUSH_STATE_CACHE|BRW_FLUSH_READ_CACHE);
-}
-
-
/* called from intelWaitForIdle() and intelFlush()
*
* For now, just flush everything. Could be smarter later.
@@ -234,6 +220,5 @@ void brwInitVtbl( struct brw_context *brw )
brw->intel.vtbl.destroy = brw_destroy_context;
brw->intel.vtbl.set_draw_region = brw_set_draw_region;
brw->intel.vtbl.flush_cmd = brw_flush_cmd;
- brw->intel.vtbl.emit_flush = brw_emit_flush;
brw->intel.vtbl.debug_batch = brw_debug_batch;
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index ea708a0681..1645ca0b70 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -80,6 +80,53 @@ GLuint brw_wm_is_scalar_result( GLuint opcode )
}
+/**
+ * Do GPU code generation for non-GLSL shader. non-GLSL shaders have
+ * no flow control instructions so we can more readily do SSA-style
+ * optimizations.
+ */
+static void
+brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
+{
+ /* Augment fragment program. Add instructions for pre- and
+ * post-fragment-program tasks such as interpolation and fogging.
+ */
+ brw_wm_pass_fp(c);
+
+ /* Translate to intermediate representation. Build register usage
+ * chains.
+ */
+ brw_wm_pass0(c);
+
+ /* Dead code removal.
+ */
+ brw_wm_pass1(c);
+
+ /* Register allocation.
+ */
+ c->grf_limit = BRW_WM_MAX_GRF / 2;
+
+ brw_wm_pass2(c);
+
+ c->prog_data.total_grf = c->max_wm_grf;
+ if (c->last_scratch) {
+ c->prog_data.total_scratch = c->last_scratch + 0x40;
+ }
+ else {
+ c->prog_data.total_scratch = 0;
+ }
+
+ /* Emit GEN4 code.
+ */
+ brw_wm_emit(c);
+}
+
+
+/**
+ * All Mesa program -> GPU code generation goes through this function.
+ * Depending on the instructions used (i.e. flow control instructions)
+ * we'll use one of two code generators.
+ */
static void do_wm_prog( struct brw_context *brw,
struct brw_fragment_program *fp,
struct brw_wm_prog_key *key)
@@ -90,52 +137,32 @@ static void do_wm_prog( struct brw_context *brw,
c = brw->wm.compile_data;
if (c == NULL) {
- brw->wm.compile_data = calloc(1, sizeof(*brw->wm.compile_data));
- c = brw->wm.compile_data;
+ brw->wm.compile_data = calloc(1, sizeof(*brw->wm.compile_data));
+ c = brw->wm.compile_data;
} else {
- memset(c, 0, sizeof(*brw->wm.compile_data));
+ memset(c, 0, sizeof(*brw->wm.compile_data));
}
memcpy(&c->key, key, sizeof(*key));
c->fp = fp;
c->env_param = brw->intel.ctx.FragmentProgram.Parameters;
- brw_init_compile(brw, &c->func);
- if (brw_wm_is_glsl(&c->fp->program)) {
- brw_wm_glsl_emit(brw, c);
- } else {
- /* Augment fragment program. Add instructions for pre- and
- * post-fragment-program tasks such as interpolation and fogging.
- */
- brw_wm_pass_fp(c);
-
- /* Translate to intermediate representation. Build register usage
- * chains.
- */
- brw_wm_pass0(c);
-
- /* Dead code removal.
- */
- brw_wm_pass1(c);
-
- /* Register allocation.
- */
- c->grf_limit = BRW_WM_MAX_GRF/2;
-
- brw_wm_pass2(c);
-
- c->prog_data.total_grf = c->max_wm_grf;
- if (c->last_scratch) {
- c->prog_data.total_scratch =
- c->last_scratch + 0x40;
- } else {
- c->prog_data.total_scratch = 0;
- }
-
- /* Emit GEN4 code.
- */
- brw_wm_emit(c);
+ brw_init_compile(brw, &c->func);
+
+ /* temporary sanity check assertion */
+ ASSERT(fp->isGLSL == brw_wm_is_glsl(&c->fp->program));
+
+ /*
+ * Shader which use GLSL features such as flow control are handled
+ * differently from "simple" shaders.
+ */
+ if (fp->isGLSL) {
+ brw_wm_glsl_emit(brw, c);
}
+ else {
+ brw_wm_non_glsl_emit(brw, c);
+ }
+
if (INTEL_DEBUG & DEBUG_WM)
fprintf(stderr, "\n");
@@ -159,7 +186,7 @@ static void brw_wm_populate_key( struct brw_context *brw,
{
GLcontext *ctx = &brw->intel.ctx;
/* BRW_NEW_FRAGMENT_PROGRAM */
- struct brw_fragment_program *fp =
+ const struct brw_fragment_program *fp =
(struct brw_fragment_program *)brw->fragment_program;
GLuint lookup = 0;
GLuint line_aa;
@@ -174,7 +201,7 @@ static void brw_wm_populate_key( struct brw_context *brw,
ctx->Color.AlphaEnabled)
lookup |= IZ_PS_KILL_ALPHATEST_BIT;
- if (fp->program.Base.OutputsWritten & (1<<FRAG_RESULT_DEPR))
+ if (fp->program.Base.OutputsWritten & (1<<FRAG_RESULT_DEPTH))
lookup |= IZ_PS_COMPUTES_DEPTH_BIT;
/* _NEW_DEPTH */
@@ -186,7 +213,7 @@ static void brw_wm_populate_key( struct brw_context *brw,
lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
/* _NEW_STENCIL */
- if (ctx->Stencil.Enabled) {
+ if (ctx->Stencil._Enabled) {
lookup |= IZ_STENCIL_TEST_ENABLE_BIT;
if (ctx->Stencil.WriteMask[0] ||
@@ -278,10 +305,8 @@ static void brw_wm_populate_key( struct brw_context *brw,
key->drawable_height = brw->intel.driDrawable->h;
}
- /* Extra info:
- */
+ /* The unique fragment program ID */
key->program_string_id = fp->id;
-
}
@@ -305,8 +330,6 @@ static void brw_prepare_wm_prog(struct brw_context *brw)
}
-/* See brw_wm.c:
- */
const struct brw_tracked_state brw_wm_prog = {
.dirty = {
.mesa = (_NEW_COLOR |
diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
index 0f46a25b1a..7f0e5702f2 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@@ -143,13 +143,12 @@ struct brw_wm_instruction {
GLuint writemask:4;
GLuint tex_unit:4; /* texture unit for TEX, TXD, TXP instructions */
GLuint tex_idx:3; /* TEXTURE_1D,2D,3D,CUBE,RECT_INDEX source target */
+ GLuint tex_shadow:1; /* do shadow comparison? */
GLuint eot:1; /* End of thread indicator for FB_WRITE*/
GLuint target:10; /* target binding table index for FB_WRITE*/
};
-#define PROGRAM_INTERNAL_PARAM
-
#define BRW_WM_MAX_INSN (MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS*3 + FRAG_ATTRIB_MAX + 3)
#define BRW_WM_MAX_GRF 128 /* hardware limit */
#define BRW_WM_MAX_VREG (BRW_WM_MAX_INSN * 4)
@@ -240,13 +239,15 @@ struct brw_wm_compile {
GLuint max_wm_grf;
GLuint last_scratch;
+ /** Mapping from Mesa registers to hardware registers */
struct {
GLboolean inited;
struct brw_reg reg;
} wm_regs[PROGRAM_PAYLOAD+1][256][4];
+
struct brw_reg stack;
struct brw_reg emit_mask_reg;
- GLuint reg_index;
+ GLuint reg_index; /**< Index of next free GRF register */
GLuint tmp_regs[BRW_WM_MAX_GRF];
GLuint tmp_index;
GLuint tmp_max;
@@ -281,4 +282,6 @@ void brw_wm_lookup_iz( GLuint line_aa,
GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp);
void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c);
+
+
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c
index b5050a3e40..f2dca9caa6 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -671,7 +671,6 @@ static void emit_tex( struct brw_wm_compile *c,
{
struct brw_compile *p = &c->func;
GLuint msgLength, responseLength;
- GLboolean shadow = (c->key.shadowtex_mask & (1<<inst->tex_unit)) ? 1 : 0;
GLuint i, nr;
GLuint emit;
@@ -693,7 +692,7 @@ static void emit_tex( struct brw_wm_compile *c,
break;
}
- if (shadow) {
+ if (inst->tex_shadow) {
nr = 4;
emit |= WRITEMASK_W;
}
@@ -718,7 +717,7 @@ static void emit_tex( struct brw_wm_compile *c,
inst->tex_unit + MAX_DRAW_BUFFERS, /* surface */
inst->tex_unit, /* sampler */
inst->writemask,
- (shadow ?
+ (inst->tex_shadow ?
BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE :
BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE),
responseLength,
@@ -886,6 +885,9 @@ static void emit_aa( struct brw_wm_compile *c,
/* Post-fragment-program processing. Send the results to the
* framebuffer.
+ * \param arg0 the fragment color
+ * \param arg1 the pass-through depth value
+ * \param arg2 the shader-computed depth value
*/
static void emit_fb_write( struct brw_wm_compile *c,
struct brw_reg *arg0,
diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c
index ea3f3fc678..533be3858e 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_fp.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c
@@ -129,7 +129,7 @@ static struct prog_dst_register dst_reg(GLuint file, GLuint idx)
reg.Index = idx;
reg.WriteMask = WRITEMASK_XYZW;
reg.RelAddr = 0;
- reg.CondMask = 0;
+ reg.CondMask = COND_TR;
reg.CondSwizzle = 0;
reg.CondSrc = 0;
reg.pad = 0;
@@ -183,16 +183,16 @@ static struct prog_instruction *emit_insn(struct brw_wm_compile *c,
{
struct prog_instruction *inst = get_fp_inst(c);
*inst = *inst0;
- inst->Data = (void *)inst0;
return inst;
}
-static struct prog_instruction * emit_op(struct brw_wm_compile *c,
+static struct prog_instruction * emit_tex_op(struct brw_wm_compile *c,
GLuint op,
struct prog_dst_register dest,
GLuint saturate,
GLuint tex_src_unit,
GLuint tex_src_target,
+ GLuint tex_shadow,
struct prog_src_register src0,
struct prog_src_register src1,
struct prog_src_register src2 )
@@ -206,6 +206,7 @@ static struct prog_instruction * emit_op(struct brw_wm_compile *c,
inst->SaturateMode = saturate;
inst->TexSrcUnit = tex_src_unit;
inst->TexSrcTarget = tex_src_target;
+ inst->TexShadow = tex_shadow;
inst->SrcReg[0] = src0;
inst->SrcReg[1] = src1;
inst->SrcReg[2] = src2;
@@ -213,6 +214,20 @@ static struct prog_instruction * emit_op(struct brw_wm_compile *c,
}
+static struct prog_instruction * emit_op(struct brw_wm_compile *c,
+ GLuint op,
+ struct prog_dst_register dest,
+ GLuint saturate,
+ struct prog_src_register src0,
+ struct prog_src_register src1,
+ struct prog_src_register src2 )
+{
+ return emit_tex_op(c, op, dest, saturate,
+ 0, 0, 0, /* tex unit, target, shadow */
+ src0, src1, src2);
+}
+
+
/***********************************************************************
@@ -234,7 +249,7 @@ static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c )
emit_op(c,
WM_PIXELXY,
dst_mask(pixel_xy, WRITEMASK_XY),
- 0, 0, 0,
+ 0,
payload_r0_depth,
src_undef(),
src_undef());
@@ -257,7 +272,7 @@ static struct prog_src_register get_delta_xy( struct brw_wm_compile *c )
emit_op(c,
WM_DELTAXY,
dst_mask(delta_xy, WRITEMASK_XY),
- 0, 0, 0,
+ 0,
pixel_xy,
payload_r0_depth,
src_undef());
@@ -274,14 +289,13 @@ static struct prog_src_register get_pixel_w( struct brw_wm_compile *c )
struct prog_dst_register pixel_w = get_temp(c);
struct prog_src_register deltas = get_delta_xy(c);
struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS);
-
-
+
/* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
*/
emit_op(c,
WM_PIXELW,
dst_mask(pixel_w, WRITEMASK_W),
- 0, 0, 0,
+ 0,
interp_wpos,
deltas,
src_undef());
@@ -316,7 +330,7 @@ static void emit_interp( struct brw_wm_compile *c,
emit_op(c,
WM_WPOSXY,
dst_mask(dst, WRITEMASK_XY),
- 0, 0, 0,
+ 0,
get_pixel_xy(c),
src_undef(),
src_undef());
@@ -328,7 +342,7 @@ static void emit_interp( struct brw_wm_compile *c,
emit_op(c,
WM_LINTERP,
dst,
- 0, 0, 0,
+ 0,
interp,
deltas,
arg2);
@@ -339,7 +353,7 @@ static void emit_interp( struct brw_wm_compile *c,
emit_op(c,
WM_CINTERP,
dst,
- 0, 0, 0,
+ 0,
interp,
src_undef(),
src_undef());
@@ -348,7 +362,7 @@ static void emit_interp( struct brw_wm_compile *c,
emit_op(c,
WM_LINTERP,
dst,
- 0, 0, 0,
+ 0,
interp,
deltas,
src_undef());
@@ -358,7 +372,7 @@ static void emit_interp( struct brw_wm_compile *c,
emit_op(c,
WM_PINTERP,
dst,
- 0, 0, 0,
+ 0,
interp,
deltas,
get_pixel_w(c));
@@ -378,7 +392,7 @@ static void emit_ddx( struct brw_wm_compile *c,
emit_op(c,
OPCODE_DDX,
inst->DstReg,
- 0, 0, 0,
+ 0,
interp,
get_pixel_w(c),
src_undef());
@@ -394,7 +408,7 @@ static void emit_ddy( struct brw_wm_compile *c,
emit_op(c,
OPCODE_DDY,
inst->DstReg,
- 0, 0, 0,
+ 0,
interp,
get_pixel_w(c),
src_undef());
@@ -489,13 +503,12 @@ static void precalc_dst( struct brw_wm_compile *c,
emit_op(c,
OPCODE_MUL,
dst_mask(dst, WRITEMASK_Y),
- inst->SaturateMode, 0, 0,
+ inst->SaturateMode,
src0,
src1,
src_undef());
}
-
if (dst.WriteMask & WRITEMASK_XZ) {
struct prog_instruction *swz;
GLuint z = GET_SWZ(src0.Swizzle, Z);
@@ -505,7 +518,7 @@ static void precalc_dst( struct brw_wm_compile *c,
swz = emit_op(c,
OPCODE_SWZ,
dst_mask(dst, WRITEMASK_XZ),
- inst->SaturateMode, 0, 0,
+ inst->SaturateMode,
src_swizzle(src0, SWIZZLE_ONE, z, z, z),
src_undef(),
src_undef());
@@ -518,7 +531,7 @@ static void precalc_dst( struct brw_wm_compile *c,
emit_op(c,
OPCODE_MOV,
dst_mask(dst, WRITEMASK_W),
- inst->SaturateMode, 0, 0,
+ inst->SaturateMode,
src1,
src_undef(),
src_undef());
@@ -540,7 +553,7 @@ static void precalc_lit( struct brw_wm_compile *c,
swz = emit_op(c,
OPCODE_SWZ,
dst_mask(dst, WRITEMASK_XW),
- 0, 0, 0,
+ 0,
src_swizzle1(src0, SWIZZLE_ONE),
src_undef(),
src_undef());
@@ -548,12 +561,11 @@ static void precalc_lit( struct brw_wm_compile *c,
swz->SrcReg[0].NegateBase = 0;
}
-
if (dst.WriteMask & WRITEMASK_YZ) {
emit_op(c,
OPCODE_LIT,
dst_mask(dst, WRITEMASK_YZ),
- inst->SaturateMode, 0, 0,
+ inst->SaturateMode,
src0,
src_undef(),
src_undef());
@@ -589,7 +601,7 @@ static void precalc_tex( struct brw_wm_compile *c,
/* tmpcoord = src0 (i.e.: coord = src0) */
out = emit_op(c, OPCODE_MOV,
tmpcoord,
- 0, 0, 0,
+ 0,
src0,
src_undef(),
src_undef());
@@ -599,7 +611,7 @@ static void precalc_tex( struct brw_wm_compile *c,
/* tmp0 = MAX(coord.X, coord.Y) */
emit_op(c, OPCODE_MAX,
tmp0,
- 0, 0, 0,
+ 0,
src_swizzle1(coord, X),
src_swizzle1(coord, Y),
src_undef());
@@ -607,7 +619,7 @@ static void precalc_tex( struct brw_wm_compile *c,
/* tmp1 = MAX(tmp0, coord.Z) */
emit_op(c, OPCODE_MAX,
tmp1,
- 0, 0, 0,
+ 0,
tmp0src,
src_swizzle1(coord, Z),
src_undef());
@@ -615,7 +627,7 @@ static void precalc_tex( struct brw_wm_compile *c,
/* tmp0 = 1 / tmp1 */
emit_op(c, OPCODE_RCP,
tmp0,
- 0, 0, 0,
+ 0,
tmp1src,
src_undef(),
src_undef());
@@ -623,7 +635,7 @@ static void precalc_tex( struct brw_wm_compile *c,
/* tmpCoord = src0 * tmp0 */
emit_op(c, OPCODE_MUL,
tmpcoord,
- 0, 0, 0,
+ 0,
src0,
tmp0src,
src_undef());
@@ -646,7 +658,7 @@ static void precalc_tex( struct brw_wm_compile *c,
emit_op(c,
OPCODE_MUL,
tmpcoord,
- 0, 0, 0,
+ 0,
inst->SrcReg[0],
scale,
src_undef());
@@ -686,22 +698,23 @@ static void precalc_tex( struct brw_wm_compile *c,
/* tmp = TEX ...
*/
- emit_op(c,
- OPCODE_TEX,
- tmp,
- inst->SaturateMode,
- unit,
- inst->TexSrcTarget,
- coord,
- src_undef(),
- src_undef());
+ emit_tex_op(c,
+ OPCODE_TEX,
+ tmp,
+ inst->SaturateMode,
+ unit,
+ inst->TexSrcTarget,
+ inst->TexShadow,
+ coord,
+ src_undef(),
+ src_undef());
/* tmp.xyz = ADD TMP, C0
*/
emit_op(c,
OPCODE_ADD,
dst_mask(tmp, WRITEMASK_XYZ),
- 0, 0, 0,
+ 0,
tmpsrc,
C0,
src_undef());
@@ -712,7 +725,7 @@ static void precalc_tex( struct brw_wm_compile *c,
emit_op(c,
OPCODE_MUL,
dst_mask(tmp, WRITEMASK_Y),
- 0, 0, 0,
+ 0,
tmpsrc,
src_swizzle1(C0, W),
src_undef());
@@ -727,7 +740,7 @@ static void precalc_tex( struct brw_wm_compile *c,
emit_op(c,
OPCODE_MAD,
dst_mask(dst, WRITEMASK_XYZ),
- 0, 0, 0,
+ 0,
swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z),
C1,
src_swizzle1(tmpsrc, Y));
@@ -737,7 +750,7 @@ static void precalc_tex( struct brw_wm_compile *c,
emit_op(c,
OPCODE_MAD,
dst_mask(dst, WRITEMASK_Y),
- 0, 0, 0,
+ 0,
src_swizzle1(tmpsrc, Z),
src_swizzle1(C1, W),
src_swizzle1(src_reg_from_dst(dst), Y));
@@ -746,15 +759,16 @@ static void precalc_tex( struct brw_wm_compile *c,
}
else {
/* ordinary RGBA tex instruction */
- emit_op(c,
- OPCODE_TEX,
- inst->DstReg,
- inst->SaturateMode,
- unit,
- inst->TexSrcTarget,
- coord,
- src_undef(),
- src_undef());
+ emit_tex_op(c,
+ OPCODE_TEX,
+ inst->DstReg,
+ inst->SaturateMode,
+ unit,
+ inst->TexSrcTarget,
+ inst->TexShadow,
+ coord,
+ src_undef(),
+ src_undef());
}
/* For GL_EXT_texture_swizzle: */
@@ -764,7 +778,6 @@ static void precalc_tex( struct brw_wm_compile *c,
emit_op(c, OPCODE_SWZ,
inst->DstReg,
SATURATE_OFF, /* saturate already done above */
- 0, 0, /* tex unit, target N/A */
src_swizzle4(tmpsrc, c->key.tex_swizzles[unit]),
src_undef(),
src_undef());
@@ -813,7 +826,7 @@ static void precalc_txp( struct brw_wm_compile *c,
emit_op(c,
OPCODE_RCP,
dst_mask(tmp, WRITEMASK_W),
- 0, 0, 0,
+ 0,
src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)),
src_undef(),
src_undef());
@@ -823,7 +836,7 @@ static void precalc_txp( struct brw_wm_compile *c,
emit_op(c,
OPCODE_MUL,
dst_mask(tmp, WRITEMASK_XYZ),
- 0, 0, 0,
+ 0,
src0,
src_swizzle1(src_reg_from_dst(tmp), W),
src_undef());
@@ -849,42 +862,41 @@ static void precalc_txp( struct brw_wm_compile *c,
static void emit_fb_write( struct brw_wm_compile *c )
{
struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
- struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPR);
+ struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPTH);
struct prog_src_register outcolor;
GLuint i;
struct prog_instruction *inst, *last_inst;
struct brw_context *brw = c->func.brw;
- /* inst->Sampler is not used by backend,
- use it for fb write target and eot */
-
- if (brw->state.nr_draw_regions > 1) {
- for (i = 0 ; i < brw->state.nr_draw_regions; i++) {
- outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i);
- last_inst = inst = emit_op(c,
- WM_FB_WRITE, dst_mask(dst_undef(),0), 0, 0, 0,
- outcolor, payload_r0_depth, outdepth);
- inst->Sampler = (i<<1);
- if (c->fp_fragcolor_emitted) {
- outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR);
- last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
- 0, 0, 0, outcolor, payload_r0_depth, outdepth);
- inst->Sampler = (i<<1);
- }
- }
- last_inst->Sampler |= 1; //eot
+ /* The inst->Aux field is used for FB write target and the EOT marker */
+
+ if (brw->state.nr_color_regions > 1) {
+ for (i = 0 ; i < brw->state.nr_color_regions; i++) {
+ outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i);
+ last_inst = inst = emit_op(c,
+ WM_FB_WRITE, dst_mask(dst_undef(),0), 0,
+ outcolor, payload_r0_depth, outdepth);
+ inst->Aux = (i<<1);
+ if (c->fp_fragcolor_emitted) {
+ outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
+ last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
+ 0, outcolor, payload_r0_depth, outdepth);
+ inst->Aux = (i<<1);
+ }
+ }
+ last_inst->Aux |= 1; //eot
}
else {
/* if gl_FragData[0] is written, use it, else use gl_FragColor */
if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_DATA0))
outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0);
else
- outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR);
+ outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
- inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
- 0, 0, 0, outcolor, payload_r0_depth, outdepth);
- inst->Sampler = 1|(0<<1);
+ inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
+ 0, outcolor, payload_r0_depth, outdepth);
+ inst->Aux = 1|(0<<1);
}
}
@@ -915,9 +927,9 @@ static void validate_dst_regs( struct brw_wm_compile *c,
const struct prog_instruction *inst )
{
if (inst->DstReg.File == PROGRAM_OUTPUT) {
- GLuint idx = inst->DstReg.Index;
- if (idx == FRAG_RESULT_COLR)
- c->fp_fragcolor_emitted = 1;
+ GLuint idx = inst->DstReg.Index;
+ if (idx == FRAG_RESULT_COLOR)
+ c->fp_fragcolor_emitted = 1;
}
}
@@ -937,11 +949,15 @@ static void print_insns( const struct prog_instruction *insn,
3);
}
else
- _mesa_printf("UNKNOWN\n");
-
+ _mesa_printf("965 Opcode %d\n", insn->Opcode);
}
}
+
+/**
+ * Initial pass for fragment program code generation.
+ * This function is used by both the GLSL and non-GLSL paths.
+ */
void brw_wm_pass_fp( struct brw_wm_compile *c )
{
struct brw_fragment_program *fp = c->fp;
@@ -958,15 +974,19 @@ void brw_wm_pass_fp( struct brw_wm_compile *c )
c->pixel_w = src_undef();
c->nr_fp_insns = 0;
- /* Emit preamble instructions:
+ /* Emit preamble instructions. This is where special instructions such as
+ * WM_CINTERP, WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to
+ * compute shader inputs from varying vars.
*/
-
-
for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
validate_src_regs(c, inst);
validate_dst_regs(c, inst);
}
+
+ /* Loop over all instructions doing assorted simplifications and
+ * transformations.
+ */
for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
struct prog_instruction *out;
@@ -975,7 +995,6 @@ void brw_wm_pass_fp( struct brw_wm_compile *c )
* necessary:
*/
-
switch (inst->Opcode) {
case OPCODE_SWZ:
out = emit_insn(c, inst);
@@ -1055,9 +1074,9 @@ void brw_wm_pass_fp( struct brw_wm_compile *c )
}
if (INTEL_DEBUG & DEBUG_WM) {
- _mesa_printf("pass_fp:\n");
- print_insns( c->prog_instructions, c->nr_fp_insns );
- _mesa_printf("\n");
+ _mesa_printf("pass_fp:\n");
+ print_insns( c->prog_instructions, c->nr_fp_insns );
+ _mesa_printf("\n");
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
index 8fd776ac39..4cf092226c 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
@@ -8,12 +8,17 @@ enum _subroutine {
SUB_NOISE1, SUB_NOISE2, SUB_NOISE3, SUB_NOISE4
};
-/* Only guess, need a flag in gl_fragment_program later */
+
+/**
+ * Determine if the given fragment program uses GLSL features such
+ * as flow conditionals, loops, subroutines.
+ * Some GLSL shaders may use these features, others might not.
+ */
GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp)
{
int i;
for (i = 0; i < fp->Base.NumInstructions; i++) {
- struct prog_instruction *inst = &fp->Base.Instructions[i];
+ const struct prog_instruction *inst = &fp->Base.Instructions[i];
switch (inst->Opcode) {
case OPCODE_IF:
case OPCODE_TRUNC:
@@ -36,6 +41,10 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp)
return GL_FALSE;
}
+
+/**
+ * Record the mapping of a Mesa register to a hardware register.
+ */
static void set_reg(struct brw_wm_compile *c, int file, int index,
int component, struct brw_reg reg)
{
@@ -43,6 +52,10 @@ static void set_reg(struct brw_wm_compile *c, int file, int index,
c->wm_regs[file][index][component].inited = GL_TRUE;
}
+/**
+ * Examine instruction's write mask to find index of first component
+ * enabled for writing.
+ */
static int get_scalar_dst_index(struct prog_instruction *inst)
{
int i;
@@ -62,6 +75,10 @@ static struct brw_reg alloc_tmp(struct brw_wm_compile *c)
return reg;
}
+/**
+ * Save current temp register info.
+ * There must be a matching call to release_tmps().
+ */
static int mark_tmps(struct brw_wm_compile *c)
{
return c->tmp_index;
@@ -77,8 +94,22 @@ static void release_tmps(struct brw_wm_compile *c, int mark)
c->tmp_index = mark;
}
+/**
+ * Convert Mesa src register to brw register.
+ *
+ * Since we're running in SOA mode each Mesa register corresponds to four
+ * hardware registers. We allocate the hardware registers as needed here.
+ *
+ * \param file register file, one of PROGRAM_x
+ * \param index register number
+ * \param component src component (X=0, Y=1, Z=2, W=3)
+ * \param nr not used?!?
+ * \param neg negate value?
+ * \param abs take absolute value?
+ */
static struct brw_reg
-get_reg(struct brw_wm_compile *c, int file, int index, int component, int nr, GLuint neg, GLuint abs)
+get_reg(struct brw_wm_compile *c, int file, int index, int component,
+ int nr, GLuint neg, GLuint abs)
{
struct brw_reg reg;
switch (file) {
@@ -89,21 +120,46 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component, int nr, GL
break;
case PROGRAM_UNDEFINED:
return brw_null_reg();
- default:
+ case PROGRAM_TEMPORARY:
+ case PROGRAM_INPUT:
+ case PROGRAM_OUTPUT:
+ case PROGRAM_PAYLOAD:
break;
+ default:
+ _mesa_problem(NULL, "Unexpected file in get_reg()");
+ return brw_null_reg();
}
- if(c->wm_regs[file][index][component].inited)
+ /* see if we've already allocated a HW register for this Mesa register */
+ if (c->wm_regs[file][index][component].inited) {
+ /* yes, re-use */
reg = c->wm_regs[file][index][component].reg;
- else
+ }
+ else {
+ /* no, allocate new register */
reg = brw_vec8_grf(c->reg_index, 0);
+ }
- if(!c->wm_regs[file][index][component].inited) {
+ /* if this is a new register allocation, record it in the table */
+ if (!c->wm_regs[file][index][component].inited) {
set_reg(c, file, index, component, reg);
c->reg_index++;
}
- if (neg & (1<< component)) {
+ if (c->reg_index >= BRW_WM_MAX_GRF - 12) {
+ /* ran out of temporary registers! */
+#if 1
+ /* This is a big hack for now.
+ * Return bad register index, just don't hang the GPU.
+ */
+ _mesa_fprintf(stderr, "out of regs %d\n", c->reg_index);
+ c->reg_index = BRW_WM_MAX_GRF - 13;
+#else
+ return brw_null_reg();
+#endif
+ }
+
+ if (neg & (1 << component)) {
reg = negate(reg);
}
if (abs)
@@ -111,6 +167,12 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component, int nr, GL
return reg;
}
+
+/**
+ * Preallocate registers. This sets up the Mesa to hardware register
+ * mapping for certain registers, such as constants (uniforms/state vars)
+ * and shader inputs.
+ */
static void prealloc_reg(struct brw_wm_compile *c)
{
int i, j;
@@ -119,29 +181,42 @@ static void prealloc_reg(struct brw_wm_compile *c)
GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted | c->fp_deriv_emitted;
for (i = 0; i < 4; i++) {
- reg = (i < c->key.nr_depth_regs)
- ? brw_vec8_grf(i*2, 0) : brw_vec8_grf(0, 0);
+ if (i < c->key.nr_depth_regs)
+ reg = brw_vec8_grf(i * 2, 0);
+ else
+ reg = brw_vec8_grf(0, 0);
set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg);
}
- c->reg_index += 2*c->key.nr_depth_regs;
+ c->reg_index += 2 * c->key.nr_depth_regs;
+
+ /* constants */
{
- int nr_params = c->fp->program.Base.Parameters->NumParameters;
- struct gl_program_parameter_list *plist =
+ const int nr_params = c->fp->program.Base.Parameters->NumParameters;
+ const struct gl_program_parameter_list *plist =
c->fp->program.Base.Parameters;
int index = 0;
- c->prog_data.nr_params = 4*nr_params;
+
+ /* number of float constants */
+ c->prog_data.nr_params = 4 * nr_params;
+
+ /* loop over program constants (float[4]) */
for (i = 0; i < nr_params; i++) {
- for (j = 0; j < 4; j++, index++) {
- reg = brw_vec1_grf(c->reg_index + index/8,
- index%8);
- c->prog_data.param[index] =
- &plist->ParameterValues[i][j];
- set_reg(c, PROGRAM_STATE_VAR, i, j, reg);
+ /* loop over XYZW channels */
+ for (j = 0; j < 4; j++, index++) {
+ reg = brw_vec1_grf(c->reg_index + index / 8, index % 8);
+ /* Save pointer to parameter/constant value.
+ * Constants will be copied in prepare_constant_buffer()
+ */
+ c->prog_data.param[index] = &plist->ParameterValues[i][j];
+ set_reg(c, PROGRAM_STATE_VAR, i, j, reg);
}
}
- c->nr_creg = 2*((4*nr_params+15)/16);
+ /* number of constant regs used (each reg is float[8]) */
+ c->nr_creg = 2 * ((4 * nr_params + 15) / 16);
c->reg_index += c->nr_creg;
}
+
+ /* fragment shader inputs */
for (i = 0; i < FRAG_ATTRIB_MAX; i++) {
if (inputs & (1<<i)) {
nr_interp_regs++;
@@ -149,9 +224,9 @@ static void prealloc_reg(struct brw_wm_compile *c)
for (j = 0; j < 4; j++)
set_reg(c, PROGRAM_PAYLOAD, i, j, reg);
c->reg_index += 2;
-
}
}
+
c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2;
c->prog_data.urb_read_length = nr_interp_regs * 2;
c->prog_data.curb_read_length = c->nr_creg;
@@ -161,6 +236,10 @@ static void prealloc_reg(struct brw_wm_compile *c)
c->reg_index += 2;
}
+
+/**
+ * Convert Mesa dst register to brw register.
+ */
static struct brw_reg get_dst_reg(struct brw_wm_compile *c,
struct prog_instruction *inst, int component, int nr)
{
@@ -168,6 +247,10 @@ static struct brw_reg get_dst_reg(struct brw_wm_compile *c,
0, 0);
}
+
+/**
+ * Convert Mesa src register to brw register.
+ */
static struct brw_reg get_src_reg(struct brw_wm_compile *c,
struct prog_src_register *src, int index, int nr)
{
@@ -176,13 +259,15 @@ static struct brw_reg get_src_reg(struct brw_wm_compile *c,
src->NegateBase, src->Abs);
}
-/* Subroutines are minimal support for resusable instruction sequences.
- They are implemented as simply as possible to minimise overhead: there
- is no explicit support for communication between the caller and callee
- other than saving the return address in a temporary register, nor is
- there any automatic local storage. This implies that great care is
- required before attempting reentrancy or any kind of nested
- subroutine invocations. */
+/**
+ * Subroutines are minimal support for resusable instruction sequences.
+ * They are implemented as simply as possible to minimise overhead: there
+ * is no explicit support for communication between the caller and callee
+ * other than saving the return address in a temporary register, nor is
+ * there any automatic local storage. This implies that great care is
+ * required before attempting reentrancy or any kind of nested
+ * subroutine invocations.
+ */
static void invoke_subroutine( struct brw_wm_compile *c,
enum _subroutine subroutine,
void (*emit)( struct brw_wm_compile * ) )
@@ -319,11 +404,10 @@ static void emit_pixel_xy(struct brw_wm_compile *c,
stride(suboffset(r1_uw, 5), 2, 4, 0),
brw_imm_v(0x11001100));
}
-
}
static void emit_delta_xy(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ struct prog_instruction *inst)
{
struct brw_reg r1 = brw_vec1_grf(1, 0);
struct brw_reg dst0, dst1, src0, src1;
@@ -351,10 +435,8 @@ static void emit_delta_xy(struct brw_wm_compile *c,
negate(suboffset(r1,1)));
}
-
}
-
static void fire_fb_write( struct brw_wm_compile *c,
GLuint base_reg,
GLuint nr,
@@ -397,33 +479,59 @@ static void emit_fb_write(struct brw_wm_compile *c,
*/
if (c->key.aa_dest_stencil_reg)
nr += 1;
- {
- brw_push_insn_state(p);
- for (channel = 0; channel < 4; channel++) {
- src0 = get_src_reg(c, &inst->SrcReg[0], channel, 1);
- /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
- /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
- brw_MOV(p, brw_message_reg(nr + channel), src0);
- }
- /* skip over the regs populated above: */
- nr += 8;
- brw_pop_insn_state(p);
+
+ brw_push_insn_state(p);
+ for (channel = 0; channel < 4; channel++) {
+ src0 = get_src_reg(c, &inst->SrcReg[0], channel, 1);
+ /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
+ /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
+ brw_MOV(p, brw_message_reg(nr + channel), src0);
}
+ /* skip over the regs populated above: */
+ nr += 8;
+ brw_pop_insn_state(p);
- if (c->key.source_depth_to_render_target)
- {
- if (c->key.computes_depth) {
- src0 = get_src_reg(c, &inst->SrcReg[2], 2, 1);
- brw_MOV(p, brw_message_reg(nr), src0);
- } else {
- src0 = get_src_reg(c, &inst->SrcReg[1], 1, 1);
- brw_MOV(p, brw_message_reg(nr), src0);
- }
-
- nr += 2;
+ if (c->key.source_depth_to_render_target) {
+ if (c->key.computes_depth) {
+ src0 = get_src_reg(c, &inst->SrcReg[2], 2, 1);
+ brw_MOV(p, brw_message_reg(nr), src0);
+ }
+ else {
+ src0 = get_src_reg(c, &inst->SrcReg[1], 1, 1);
+ brw_MOV(p, brw_message_reg(nr), src0);
+ }
+
+ nr += 2;
+ }
+
+ if (c->key.dest_depth_reg) {
+ GLuint comp = c->key.dest_depth_reg / 2;
+ GLuint off = c->key.dest_depth_reg % 2;
+
+ assert(comp == 1);
+ assert(off == 0);
+#if 0
+ /* XXX do we need this code? comp always 1, off always 0, it seems */
+ if (off != 0) {
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+ brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1));
+ /* 2nd half? */
+ brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]);
+ brw_pop_insn_state(p);
+ }
+ else
+#endif
+ {
+ struct brw_reg src = get_src_reg(c, &inst->SrcReg[1], 1, 1);
+ brw_MOV(p, brw_message_reg(nr), src);
+ }
+ nr += 2;
}
- target = inst->Sampler >> 1;
- eot = inst->Sampler & 1;
+
+ target = inst->Aux >> 1;
+ eot = inst->Aux & 1;
fire_fb_write(c, 0, nr, target, eot);
}
@@ -465,12 +573,12 @@ static void emit_linterp(struct brw_wm_compile *c,
struct brw_reg interp[4];
struct brw_reg dst, delta0, delta1;
struct brw_reg src0;
+ GLuint nr, i;
src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1);
delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1);
- GLuint nr = src0.nr;
- int i;
+ nr = src0.nr;
interp[0] = brw_vec1_grf(nr, 0);
interp[1] = brw_vec1_grf(nr, 4);
@@ -494,10 +602,10 @@ static void emit_cinterp(struct brw_wm_compile *c,
struct brw_reg interp[4];
struct brw_reg dst, src0;
+ GLuint nr, i;
src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
- GLuint nr = src0.nr;
- int i;
+ nr = src0.nr;
interp[0] = brw_vec1_grf(nr, 0);
interp[1] = brw_vec1_grf(nr, 4);
@@ -521,13 +629,13 @@ static void emit_pinterp(struct brw_wm_compile *c,
struct brw_reg interp[4];
struct brw_reg dst, delta0, delta1;
struct brw_reg src0, w;
+ GLuint nr, i;
src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1);
delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1);
w = get_src_reg(c, &inst->SrcReg[2], 3, 1);
- GLuint nr = src0.nr;
- int i;
+ nr = src0.nr;
interp[0] = brw_vec1_grf(nr, 0);
interp[1] = brw_vec1_grf(nr, 4);
@@ -627,23 +735,46 @@ static void emit_dph(struct brw_wm_compile *c,
brw_set_saturate(p, 0);
}
+/**
+ * Emit a scalar instruction, like RCP, RSQ, LOG, EXP.
+ * Note that the result of the function is smeared across the dest
+ * register's X, Y, Z and W channels (subject to writemasking of course).
+ */
static void emit_math1(struct brw_wm_compile *c,
struct prog_instruction *inst, GLuint func)
{
struct brw_compile *p = &c->func;
- struct brw_reg src0, dst;
+ struct brw_reg src0, dst, tmp;
+ const int mark = mark_tmps( c );
+ int i;
+
+ tmp = alloc_tmp(c);
+ /* Get first component of source register */
src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
- dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1);
+
+ /* tmp = func(src0) */
brw_MOV(p, brw_message_reg(2), src0);
brw_math(p,
- dst,
- func,
- (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
- 2,
- brw_null_reg(),
- BRW_MATH_DATA_VECTOR,
- BRW_MATH_PRECISION_FULL);
+ tmp,
+ func,
+ (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
+ 2,
+ brw_null_reg(),
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+
+ /*tmp.dw1.bits.swizzle = SWIZZLE_XXXX;*/
+
+ /* replicate tmp value across enabled dest channels */
+ for (i = 0; i < 4; i++) {
+ if (inst->DstReg.WriteMask & (1 << i)) {
+ dst = get_dst_reg(c, inst, i, 1);
+ brw_MOV(p, dst, tmp);
+ }
+ }
+
+ release_tmps(c, mark);
}
static void emit_rcp(struct brw_wm_compile *c,
@@ -1045,23 +1176,23 @@ static void emit_ddy(struct brw_wm_compile *c,
brw_set_saturate(p, 0);
}
-static __inline struct brw_reg high_words( struct brw_reg reg )
+static INLINE struct brw_reg high_words( struct brw_reg reg )
{
return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_W ), 1 ),
0, 8, 2 );
}
-static __inline struct brw_reg low_words( struct brw_reg reg )
+static INLINE struct brw_reg low_words( struct brw_reg reg )
{
return stride( retype( reg, BRW_REGISTER_TYPE_W ), 0, 8, 2 );
}
-static __inline struct brw_reg even_bytes( struct brw_reg reg )
+static INLINE struct brw_reg even_bytes( struct brw_reg reg )
{
return stride( retype( reg, BRW_REGISTER_TYPE_B ), 0, 16, 2 );
}
-static __inline struct brw_reg odd_bytes( struct brw_reg reg )
+static INLINE struct brw_reg odd_bytes( struct brw_reg reg )
{
return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_B ), 1 ),
0, 16, 2 );
@@ -1366,9 +1497,11 @@ static void emit_noise2( struct brw_wm_compile *c,
release_tmps( c, mark );
}
-/* The three-dimensional case is much like the one- and two- versions above,
- but since the number of corners is rapidly growing we now pack 16 16-bit
- hashes into each register to extract more parallelism from the EUs. */
+/**
+ * The three-dimensional case is much like the one- and two- versions above,
+ * but since the number of corners is rapidly growing we now pack 16 16-bit
+ * hashes into each register to extract more parallelism from the EUs.
+ */
static void noise3_sub( struct brw_wm_compile *c ) {
struct brw_compile *p = &c->func;
@@ -1670,13 +1803,15 @@ static void emit_noise3( struct brw_wm_compile *c,
release_tmps( c, mark );
}
-/* For the four-dimensional case, the little micro-optimisation benefits
- we obtain by unrolling all the loops aren't worth the massive bloat it
- now causes. Instead, we loop twice around performing a similar operation
- to noise3, once for the w=0 cube and once for the w=1, with a bit more
- code to glue it all together. */
-static void noise4_sub( struct brw_wm_compile *c ) {
-
+/**
+ * For the four-dimensional case, the little micro-optimisation benefits
+ * we obtain by unrolling all the loops aren't worth the massive bloat it
+ * now causes. Instead, we loop twice around performing a similar operation
+ * to noise3, once for the w=0 cube and once for the w=1, with a bit more
+ * code to glue it all together.
+ */
+static void noise4_sub( struct brw_wm_compile *c )
+{
struct brw_compile *p = &c->func;
struct brw_reg param[ 4 ],
x0y0, x0y1, x1y0, x1y1, /* gradients at four of the corners */
@@ -2244,28 +2379,12 @@ static void emit_tex(struct brw_wm_compile *c,
brw_MOV(p, dst[3], brw_imm_f(1.0));
}
+/**
+ * Resolve subroutine calls after code emit is done.
+ */
static void post_wm_emit( struct brw_wm_compile *c )
{
- GLuint nr_insns = c->fp->program.Base.NumInstructions;
- GLuint insn, target_insn;
- struct prog_instruction *inst1, *inst2;
- struct brw_instruction *brw_inst1, *brw_inst2;
- int offset;
- for (insn = 0; insn < nr_insns; insn++) {
- inst1 = &c->fp->program.Base.Instructions[insn];
- brw_inst1 = inst1->Data;
- switch (inst1->Opcode) {
- case OPCODE_CAL:
- target_insn = inst1->BranchTarget;
- inst2 = &c->fp->program.Base.Instructions[target_insn];
- brw_inst2 = inst2->Data;
- offset = brw_inst2 - brw_inst1;
- brw_set_src1(brw_inst1, brw_imm_d(offset*16));
- break;
- default:
- break;
- }
- }
+ brw_resolve_cals(&c->func);
}
static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
@@ -2285,10 +2404,6 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
for (i = 0; i < c->nr_fp_insns; i++) {
struct prog_instruction *inst = &c->prog_instructions[i];
- struct prog_instruction *orig_inst;
-
- if ((orig_inst = inst->Data) != 0)
- orig_inst->Data = current_insn(p);
if (inst->CondUpdate)
brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
@@ -2446,7 +2561,10 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
brw_ENDIF(p, if_inst[--if_insn]);
break;
case OPCODE_BGNSUB:
+ brw_save_label(p, inst->Comment, p->nr_insn);
+ break;
case OPCODE_ENDSUB:
+ /* no-op */
break;
case OPCODE_CAL:
brw_push_insn_state(p);
@@ -2456,8 +2574,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
brw_set_access_mode(p, BRW_ALIGN_16);
brw_ADD(p, get_addr_reg(stack_index),
get_addr_reg(stack_index), brw_imm_d(4));
- orig_inst = inst->Data;
- orig_inst->Data = &p->store[p->nr_insn];
+ brw_save_call(&c->func, inst->Comment, p->nr_insn);
brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
brw_pop_insn_state(p);
break;
@@ -2510,14 +2627,34 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
}
post_wm_emit(c);
- for (i = 0; i < c->fp->program.Base.NumInstructions; i++)
- c->fp->program.Base.Instructions[i].Data = NULL;
+
+ if (c->reg_index >= BRW_WM_MAX_GRF) {
+ _mesa_problem(NULL, "Ran out of registers in brw_wm_emit_glsl()");
+ /* XXX we need to do some proper error recovery here */
+ }
}
+
+/**
+ * Do GPU code generation for shaders that use GLSL features such as
+ * flow control. Other shaders will be compiled with the
+ */
void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
{
+ if (INTEL_DEBUG & DEBUG_WM) {
+ _mesa_printf("brw_wm_glsl_emit:\n");
+ }
+
+ /* initial instruction translation/simplification */
brw_wm_pass_fp(c);
+
+ /* actual code generation */
brw_wm_emit_glsl(brw, c);
+
+ if (INTEL_DEBUG & DEBUG_WM) {
+ brw_wm_print_program(c, "brw_wm_glsl_emit done");
+ }
+
c->prog_data.total_grf = c->reg_index;
c->prog_data.total_scratch = 0;
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c
index 590cd946ec..2debd0678a 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_pass0.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_pass0.c
@@ -334,8 +334,9 @@ static struct brw_wm_ref *get_new_ref( struct brw_wm_compile *c,
}
-static struct brw_wm_instruction *translate_insn( struct brw_wm_compile *c,
- const struct prog_instruction *inst )
+static void
+translate_insn(struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
{
struct brw_wm_instruction *out = get_instruction(c);
GLuint writemask = inst->DstReg.WriteMask;
@@ -348,8 +349,9 @@ static struct brw_wm_instruction *translate_insn( struct brw_wm_compile *c,
out->saturate = (inst->SaturateMode != SATURATE_OFF);
out->tex_unit = inst->TexSrcUnit;
out->tex_idx = inst->TexSrcTarget;
- out->eot = inst->Sampler & 1;
- out->target = inst->Sampler>>1;
+ out->tex_shadow = inst->TexShadow;
+ out->eot = inst->Aux & 1;
+ out->target = inst->Aux >> 1;
/* Args:
*/
@@ -365,8 +367,6 @@ static struct brw_wm_instruction *translate_insn( struct brw_wm_compile *c,
pass0_set_dst_scalar(c, out, inst, writemask);
else
pass0_set_dst(c, out, inst, writemask);
-
- return out;
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass1.c b/src/mesa/drivers/dri/i965/brw_wm_pass1.c
index 6eaed8a665..cf031899dd 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_pass1.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_pass1.c
@@ -210,9 +210,10 @@ void brw_wm_pass1( struct brw_wm_compile *c )
break;
case OPCODE_TEX:
+ case OPCODE_TXP:
read0 = get_texcoord_mask(inst->tex_idx);
- if (c->key.shadowtex_mask & (1<<inst->tex_unit))
+ if (inst->tex_shadow)
read0 |= WRITEMASK_Z;
break;
@@ -267,7 +268,6 @@ void brw_wm_pass1( struct brw_wm_compile *c )
break;
case OPCODE_DST:
- case OPCODE_TXP:
default:
break;
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
index b6dac0d698..68a9296a71 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
@@ -95,6 +95,7 @@ struct wm_sampler_key {
int sampler_count;
struct wm_sampler_entry {
+ GLenum tex_target;
GLenum wrap_r, wrap_s, wrap_t;
float maxlod, minlod;
float lod_bias;
@@ -168,19 +169,20 @@ static void brw_update_sampler_state(struct wm_sampler_entry *key,
}
}
- sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r);
- sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s);
- sampler->ss1.t_wrap_mode = translate_wrap_mode(key->wrap_t);
-
- /* Fulsim complains if I don't do this. Hardware doesn't mind:
- */
-#if 0
- if (texObj->Target == GL_TEXTURE_CUBE_MAP_ARB) {
+ if (key->tex_target == GL_TEXTURE_CUBE_MAP &&
+ (key->minfilter != GL_NEAREST || key->magfilter != GL_NEAREST)) {
+ /* If we're using anything but nearest sampling for a cube map, we
+ * need to set this wrap mode to avoid GPU lock-ups.
+ */
sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CUBE;
sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE;
sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE;
}
-#endif
+ else {
+ sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r);
+ sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s);
+ sampler->ss1.t_wrap_mode = translate_wrap_mode(key->wrap_t);
+ }
/* Set shadow function:
*/
@@ -234,6 +236,8 @@ brw_wm_sampler_populate_key(struct brw_context *brw,
struct gl_texture_image *firstImage =
texObj->Image[0][intelObj->firstLevel];
+ entry->tex_target = texObj->Target;
+
entry->wrap_r = texObj->WrapR;
entry->wrap_s = texObj->WrapS;
entry->wrap_t = texObj->WrapT;
diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c
index 3c3b3473d6..63fc8a004f 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@@ -62,6 +62,7 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
{
GLcontext *ctx = &brw->intel.ctx;
const struct gl_fragment_program *fp = brw->fragment_program;
+ const struct brw_fragment_program *bfp = (struct brw_fragment_program *) fp;
struct intel_context *intel = &brw->intel;
memset(key, 0, sizeof(*key));
@@ -103,11 +104,14 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
/* as far as we can tell */
key->computes_depth =
- (fp->Base.OutputsWritten & (1 << FRAG_RESULT_DEPR)) != 0;
+ (fp->Base.OutputsWritten & (1 << FRAG_RESULT_DEPTH)) != 0;
/* _NEW_COLOR */
key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled;
- key->is_glsl = brw_wm_is_glsl(fp);
+ key->is_glsl = bfp->isGLSL;
+
+ /* temporary sanity check assertion */
+ ASSERT(bfp->isGLSL == brw_wm_is_glsl(fp));
/* XXX: This needs a flag to indicate when it changes. */
key->stats_wm = intel->stats_wm;
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index d70f9c646c..9b320480b6 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -139,7 +139,18 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum depth_mode )
return BRW_SURFACEFORMAT_BC1_UNORM_SRGB;
case MESA_FORMAT_S8_Z24:
- return BRW_SURFACEFORMAT_I24X8_UNORM;
+ /* XXX: these different surface formats don't seem to
+ * make any difference for shadow sampler/compares.
+ */
+ if (depth_mode == GL_INTENSITY)
+ return BRW_SURFACEFORMAT_I24X8_UNORM;
+ else if (depth_mode == GL_ALPHA)
+ return BRW_SURFACEFORMAT_A24X8_UNORM;
+ else
+ return BRW_SURFACEFORMAT_L24X8_UNORM;
+
+ case MESA_FORMAT_DUDV8:
+ return BRW_SURFACEFORMAT_R8G8_SNORM;
default:
assert(0);
@@ -381,8 +392,7 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region,
* a more restrictive relocation to emit.
*/
dri_bo_emit_reloc(brw->wm.surf_bo[unit],
- I915_GEM_DOMAIN_RENDER |
- I915_GEM_DOMAIN_SAMPLER,
+ I915_GEM_DOMAIN_RENDER,
I915_GEM_DOMAIN_RENDER,
0,
offsetof(struct brw_surface_state, ss1),
@@ -447,13 +457,13 @@ static void prepare_wm_surfaces(struct brw_context *brw )
GLuint i;
int old_nr_surfaces;
- if (brw->state.nr_draw_regions > 1) {
- for (i = 0; i < brw->state.nr_draw_regions; i++) {
- brw_update_region_surface(brw, brw->state.draw_regions[i], i,
+ if (brw->state.nr_color_regions > 1) {
+ for (i = 0; i < brw->state.nr_color_regions; i++) {
+ brw_update_region_surface(brw, brw->state.color_regions[i], i,
GL_FALSE);
}
- }else {
- brw_update_region_surface(brw, brw->state.draw_regions[0], 0, GL_TRUE);
+ } else {
+ brw_update_region_surface(brw, brw->state.color_regions[0], 0, GL_TRUE);
}
old_nr_surfaces = brw->wm.nr_surfaces;