summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/i965
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/i965')
-rw-r--r--src/mesa/drivers/dri/i965/brw_cc.c8
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip_state.c11
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw.c13
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw_upload.c6
-rw-r--r--src/mesa/drivers/dri/i965/brw_gs_state.c5
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_emit.c55
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.h8
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_emit.c101
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_fp.c38
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_glsl.c94
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_pass1.c6
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_state.c7
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_surface_state.c11
13 files changed, 225 insertions, 138 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c
index c724218cf5..1088a7a607 100644
--- a/src/mesa/drivers/dri/i965/brw_cc.c
+++ b/src/mesa/drivers/dri/i965/brw_cc.c
@@ -39,12 +39,14 @@
static void prepare_cc_vp( struct brw_context *brw )
{
+ GLcontext *ctx = &brw->intel.ctx;
struct brw_cc_viewport ccv;
memset(&ccv, 0, sizeof(ccv));
- ccv.min_depth = 0.0;
- ccv.max_depth = 1.0;
+ /* _NEW_VIEWPORT */
+ ccv.min_depth = ctx->Viewport.Near;
+ ccv.max_depth = ctx->Viewport.Far;
dri_bo_unreference(brw->cc.vp_bo);
brw->cc.vp_bo = brw_cache_data( &brw->cache, BRW_CC_VP, &ccv, NULL, 0 );
@@ -52,7 +54,7 @@ static void prepare_cc_vp( struct brw_context *brw )
const struct brw_tracked_state brw_cc_vp = {
.dirty = {
- .mesa = 0,
+ .mesa = _NEW_VIEWPORT,
.brw = BRW_NEW_CONTEXT,
.cache = 0
},
diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c
index 5762c9577c..234b3744bf 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_state.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_state.c
@@ -43,11 +43,14 @@ struct brw_clip_unit_key {
unsigned int curbe_offset;
unsigned int nr_urb_entries, urb_size;
+
+ GLboolean depth_clamp;
};
static void
clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key)
{
+ GLcontext *ctx = &brw->intel.ctx;
memset(key, 0, sizeof(*key));
/* CACHE_NEW_CLIP_PROG */
@@ -62,6 +65,9 @@ clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key)
/* BRW_NEW_URB_FENCE */
key->nr_urb_entries = brw->urb.nr_clip_entries;
key->urb_size = brw->urb.vsize;
+
+ /* _NEW_TRANSOFORM */
+ key->depth_clamp = ctx->Transform.DepthClamp;
}
static dri_bo *
@@ -117,7 +123,8 @@ clip_unit_create_from_key(struct brw_context *brw,
clip.clip5.userclip_enable_flags = 0x7f;
clip.clip5.userclip_must_clip = 1;
clip.clip5.guard_band_enable = 0;
- clip.clip5.viewport_z_clip_enable = 1;
+ if (!key->depth_clamp)
+ clip.clip5.viewport_z_clip_enable = 1;
clip.clip5.viewport_xy_clip_enable = 1;
clip.clip5.vertex_position_space = BRW_CLIP_NDCSPACE;
clip.clip5.api_mode = BRW_CLIP_API_OGL;
@@ -168,7 +175,7 @@ static void upload_clip_unit( struct brw_context *brw )
const struct brw_tracked_state brw_clip_unit = {
.dirty = {
- .mesa = 0,
+ .mesa = _NEW_TRANSFORM,
.brw = (BRW_NEW_CURBE_OFFSETS |
BRW_NEW_URB_FENCE),
.cache = CACHE_NEW_CLIP_PROG
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index c53bd47bb5..44bb7bd588 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -25,13 +25,15 @@
*
**************************************************************************/
-#include <stdlib.h>
#include "main/glheader.h"
#include "main/context.h"
#include "main/state.h"
-#include "main/api_validate.h"
#include "main/enums.h"
+#include "tnl/tnl.h"
+#include "vbo/vbo_context.h"
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
#include "brw_draw.h"
#include "brw_defines.h"
@@ -42,11 +44,6 @@
#include "intel_batchbuffer.h"
#include "intel_buffer_objects.h"
-#include "tnl/tnl.h"
-#include "vbo/vbo_context.h"
-#include "swrast/swrast.h"
-#include "swrast_setup/swrast_setup.h"
-
#define FILE_DEBUG_FLAG DEBUG_BATCH
static GLuint prim_to_hw_prim[GL_POLYGON+1] = {
@@ -145,7 +142,7 @@ static void brw_emit_prim(struct brw_context *brw,
prim_packet.start_vert_location += brw->ib.start_vertex_offset;
prim_packet.instance_count = 1;
prim_packet.start_instance_location = 0;
- prim_packet.base_vert_location = 0;
+ prim_packet.base_vert_location = prim->basevertex;
/* Can't wrap here, since we rely on the validated state. */
brw->no_batch_wrap = GL_TRUE;
diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index 4aa17fa02d..765ae5a2fe 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -25,9 +25,9 @@
*
**************************************************************************/
-#include <stdlib.h>
#include "main/glheader.h"
+#include "main/bufferobj.h"
#include "main/context.h"
#include "main/state.h"
#include "main/api_validate.h"
@@ -384,7 +384,7 @@ static void brw_prepare_vertices(struct brw_context *brw)
input->element_size = get_size(input->glarray->Type) * input->glarray->Size;
- if (input->glarray->BufferObj->Name != 0) {
+ if (_mesa_is_bufferobj(input->glarray->BufferObj)) {
struct intel_buffer_object *intel_buffer =
intel_buffer_object(input->glarray->BufferObj);
@@ -623,7 +623,7 @@ static void brw_prepare_indices(struct brw_context *brw)
/* Turn into a proper VBO:
*/
- if (!bufferobj->Name) {
+ if (!_mesa_is_bufferobj(bufferobj)) {
brw->ib.start_vertex_offset = 0;
/* Get new bufferobj, offset:
diff --git a/src/mesa/drivers/dri/i965/brw_gs_state.c b/src/mesa/drivers/dri/i965/brw_gs_state.c
index a761c03153..ed9d2ffe60 100644
--- a/src/mesa/drivers/dri/i965/brw_gs_state.c
+++ b/src/mesa/drivers/dri/i965/brw_gs_state.c
@@ -93,7 +93,10 @@ gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key)
gs.thread4.nr_urb_entries = key->nr_urb_entries;
gs.thread4.urb_entry_allocation_size = key->urb_size - 1;
- gs.thread4.max_threads = 0; /* Hardware requirement */
+ if (key->nr_urb_entries >= 8)
+ gs.thread4.max_threads = 1;
+ else
+ gs.thread4.max_threads = 0;
if (BRW_IS_IGDNG(brw))
gs.thread4.rendering_enable = 1;
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index 108e19cdbc..1638ef8111 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -1208,7 +1208,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
MIN2(c->nr_outputs + 1 + len_vertext_header, (BRW_MAX_MRF-1)), /* msg len */
0, /* response len */
eot, /* eot */
- 1, /* writes complete */
+ eot, /* writes complete */
0, /* urb destination offset */
BRW_URB_SWIZZLE_INTERLEAVE);
@@ -1270,9 +1270,27 @@ post_vs_emit( struct brw_vs_compile *c,
}
static uint32_t
-get_predicate(uint32_t swizzle)
+get_predicate(const struct prog_instruction *inst)
{
- switch (swizzle) {
+ if (inst->DstReg.CondMask == COND_TR)
+ return BRW_PREDICATE_NONE;
+
+ /* All of GLSL only produces predicates for COND_NE and one channel per
+ * vector. Fail badly if someone starts doing something else, as it might
+ * mean infinite looping or something.
+ *
+ * We'd like to support all the condition codes, but our hardware doesn't
+ * quite match the Mesa IR, which is modeled after the NV extensions. For
+ * those, the instruction may update the condition codes or not, then any
+ * later instruction may use one of those condition codes. For gen4, the
+ * instruction may update the flags register based on one of the condition
+ * codes output by the instruction, and then further instructions may
+ * predicate on that. We can probably support this, but it won't
+ * necessarily be easy.
+ */
+ assert(inst->DstReg.CondMask == COND_NE);
+
+ switch (inst->DstReg.CondSwizzle) {
case SWIZZLE_XXXX:
return BRW_PREDICATE_ALIGN16_REPLICATE_X;
case SWIZZLE_YYYY:
@@ -1282,7 +1300,8 @@ get_predicate(uint32_t swizzle)
case SWIZZLE_WWWW:
return BRW_PREDICATE_ALIGN16_REPLICATE_W;
default:
- _mesa_problem(NULL, "Unexpected predicate: 0x%08x\n", swizzle);
+ _mesa_problem(NULL, "Unexpected predicate: 0x%08x\n",
+ inst->DstReg.CondMask);
return BRW_PREDICATE_NORMAL;
}
}
@@ -1294,6 +1313,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
#define MAX_IF_DEPTH 32
#define MAX_LOOP_DEPTH 32
struct brw_compile *p = &c->func;
+ struct brw_context *brw = p->brw;
const GLuint nr_insns = c->vp->program.Base.NumInstructions;
GLuint insn, if_depth = 0, loop_depth = 0;
GLuint end_offset = 0;
@@ -1492,8 +1512,8 @@ void brw_vs_emit(struct brw_vs_compile *c )
case OPCODE_IF:
assert(if_depth < MAX_IF_DEPTH);
if_inst[if_depth] = brw_IF(p, BRW_EXECUTE_8);
- if_inst[if_depth]->header.predicate_control =
- get_predicate(inst->DstReg.CondSwizzle);
+ /* Note that brw_IF smashes the predicate_control field. */
+ if_inst[if_depth]->header.predicate_control = get_predicate(inst);
if_depth++;
break;
case OPCODE_ELSE:
@@ -1503,45 +1523,48 @@ void brw_vs_emit(struct brw_vs_compile *c )
assert(if_depth > 0);
brw_ENDIF(p, if_inst[--if_depth]);
break;
-#if 0
case OPCODE_BGNLOOP:
loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8);
break;
case OPCODE_BRK:
+ brw_set_predicate_control(p, get_predicate(inst));
brw_BREAK(p);
- brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
break;
case OPCODE_CONT:
+ brw_set_predicate_control(p, get_predicate(inst));
brw_CONT(p);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
break;
case OPCODE_ENDLOOP:
{
struct brw_instruction *inst0, *inst1;
+ GLuint br = 1;
+
loop_depth--;
+
+ if (BRW_IS_IGDNG(brw))
+ br = 2;
+
inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]);
/* patch all the BREAK/CONT instructions from last BEGINLOOP */
while (inst0 > loop_inst[loop_depth]) {
inst0--;
if (inst0->header.opcode == BRW_OPCODE_BREAK) {
- inst0->bits3.if_else.jump_count = inst1 - inst0 + 1;
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
inst0->bits3.if_else.pop_count = 0;
}
else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) {
- inst0->bits3.if_else.jump_count = inst1 - inst0;
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
inst0->bits3.if_else.pop_count = 0;
}
}
}
break;
-#else
- (void) loop_inst;
- (void) loop_depth;
-#endif
case OPCODE_BRA:
- brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+ brw_set_predicate_control(p, get_predicate(inst));
brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
- brw_set_predicate_control_flag_value(p, 0xff);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
break;
case OPCODE_CAL:
brw_set_access_mode(p, BRW_ALIGN_1);
diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
index ae98b5492d..872b1f3ecf 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@@ -38,6 +38,8 @@
#include "brw_context.h"
#include "brw_eu.h"
+#define SATURATE (1<<5)
+
/* A big lookup table is used to figure out which and how many
* additional regs will inserted before the main payload in the WM
* program execution. These mainly relate to depth and stencil
@@ -203,7 +205,6 @@ struct brw_wm_compile {
GLuint fp_temp;
GLuint fp_interp_emitted;
GLuint fp_fragcolor_emitted;
- GLuint fp_deriv_emitted;
struct prog_src_register pixel_xy;
struct prog_src_register delta_xy;
@@ -299,5 +300,10 @@ void brw_wm_lookup_iz( GLuint line_aa,
GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp);
void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c);
+void emit_ddxy(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ GLboolean is_ddx,
+ const struct brw_reg *arg0);
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c
index 268f7965c0..bf80a2942a 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -34,8 +34,6 @@
#include "brw_context.h"
#include "brw_wm.h"
-#define SATURATE (1<<5)
-
/* Not quite sure how correct this is - need to understand horiz
* vs. vertical strides a little better.
*/
@@ -281,6 +279,79 @@ static void emit_frontfacing( struct brw_compile *p,
brw_set_predicate_control_flag_value(p, 0xff);
}
+/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
+ * looking like:
+ *
+ * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
+ *
+ * and we're trying to produce:
+ *
+ * DDX DDY
+ * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
+ * (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
+ * (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
+ * (ss0.br - ss0.bl) (ss0.tr - ss0.br)
+ * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
+ * (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
+ * (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
+ * (ss1.br - ss1.bl) (ss1.tr - ss1.br)
+ *
+ * and add another set of two more subspans if in 16-pixel dispatch mode.
+ *
+ * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
+ * for each pair, and vertstride = 2 jumps us 2 elements after processing a
+ * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
+ * between each other. We could probably do it like ddx and swizzle the right
+ * order later, but bail for now and just produce
+ * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
+ */
+void emit_ddxy(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ GLboolean is_ddx,
+ const struct brw_reg *arg0)
+{
+ int i;
+ struct brw_reg src0, src1;
+
+ if (mask & SATURATE)
+ brw_set_saturate(p, 1);
+ for (i = 0; i < 4; i++ ) {
+ if (mask & (1<<i)) {
+ if (is_ddx) {
+ src0 = brw_reg(arg0[i].file, arg0[i].nr, 1,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_2,
+ BRW_WIDTH_2,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+ src1 = brw_reg(arg0[i].file, arg0[i].nr, 0,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_2,
+ BRW_WIDTH_2,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+ } else {
+ src0 = brw_reg(arg0[i].file, arg0[i].nr, 0,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_4,
+ BRW_WIDTH_4,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+ src1 = brw_reg(arg0[i].file, arg0[i].nr, 2,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_4,
+ BRW_WIDTH_4,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+ }
+ brw_ADD(p, dst[i], src0, negate(src1));
+ }
+ }
+ if (mask & SATURATE)
+ brw_set_saturate(p, 0);
+}
+
static void emit_alu1( struct brw_compile *p,
struct brw_instruction *(*func)(struct brw_compile *,
struct brw_reg,
@@ -908,6 +979,20 @@ static void emit_kil( struct brw_wm_compile *c,
}
}
+/* KIL_NV kills the pixels that are currently executing, not based on a test
+ * of the arguments.
+ */
+static void emit_kil_nv( struct brw_wm_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK
+ brw_AND(p, r0uw, c->emit_mask_reg, r0uw);
+ brw_pop_insn_state(p);
+}
static void fire_fb_write( struct brw_wm_compile *c,
GLuint base_reg,
@@ -1258,6 +1343,14 @@ void brw_wm_emit( struct brw_wm_compile *c )
emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
break;
+ case OPCODE_DDX:
+ emit_ddxy(p, dst, dst_flags, GL_TRUE, args[0]);
+ break;
+
+ case OPCODE_DDY:
+ emit_ddxy(p, dst, dst_flags, GL_FALSE, args[0]);
+ break;
+
case OPCODE_DP3:
emit_dp3(p, dst, dst_flags, args[0], args[1]);
break;
@@ -1387,6 +1480,10 @@ void brw_wm_emit( struct brw_wm_compile *c )
emit_kil(c, args[0]);
break;
+ case OPCODE_KIL_NV:
+ emit_kil_nv(c);
+ break;
+
default:
_mesa_printf("Unsupported opcode %i (%s) in fragment shader\n",
inst->opcode, inst->opcode < MAX_OPCODE ?
diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c
index 123fe841c3..4e3edfbbff 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_fp.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c
@@ -494,38 +494,6 @@ static void emit_interp( struct brw_wm_compile *c,
c->fp_interp_emitted |= 1<<idx;
}
-static void emit_ddx( struct brw_wm_compile *c,
- const struct prog_instruction *inst )
-{
- GLuint idx = inst->SrcReg[0].Index;
- struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
-
- c->fp_deriv_emitted |= 1<<idx;
- emit_op(c,
- OPCODE_DDX,
- inst->DstReg,
- 0,
- interp,
- get_pixel_w(c),
- src_undef());
-}
-
-static void emit_ddy( struct brw_wm_compile *c,
- const struct prog_instruction *inst )
-{
- GLuint idx = inst->SrcReg[0].Index;
- struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
-
- c->fp_deriv_emitted |= 1<<idx;
- emit_op(c,
- OPCODE_DDY,
- inst->DstReg,
- 0,
- interp,
- get_pixel_w(c),
- src_undef());
-}
-
/***********************************************************************
* Hacks to extend the program parameter and constant lists.
*/
@@ -1186,12 +1154,6 @@ void brw_wm_pass_fp( struct brw_wm_compile *c )
*/
out->DstReg.WriteMask = 0;
break;
- case OPCODE_DDX:
- emit_ddx(c, inst);
- break;
- case OPCODE_DDY:
- emit_ddy(c, inst);
- break;
case OPCODE_END:
emit_fb_write(c);
break;
diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
index 7c210abbce..c9fe1dd8ad 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
@@ -22,6 +22,7 @@ static struct brw_reg get_dst_reg(struct brw_wm_compile *c,
GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp)
{
int i;
+
for (i = 0; i < fp->Base.NumInstructions; i++) {
const struct prog_instruction *inst = &fp->Base.Instructions[i];
switch (inst->Opcode) {
@@ -31,8 +32,6 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp)
case OPCODE_CAL:
case OPCODE_BRK:
case OPCODE_RET:
- case OPCODE_DDX:
- case OPCODE_DDY:
case OPCODE_NOISE1:
case OPCODE_NOISE2:
case OPCODE_NOISE3:
@@ -293,7 +292,7 @@ static void prealloc_reg(struct brw_wm_compile *c)
int i, j;
struct brw_reg reg;
int urb_read_length = 0;
- GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted | c->fp_deriv_emitted;
+ GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted;
GLuint reg_index = 0;
memset(c->used_grf, GL_FALSE, sizeof(c->used_grf));
@@ -1474,61 +1473,6 @@ static void emit_sne(struct brw_wm_compile *c,
emit_sop(c, inst, BRW_CONDITIONAL_NEQ);
}
-static void emit_ddx(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
- struct brw_reg interp[4];
- struct brw_reg dst;
- struct brw_reg src0, w;
- GLuint nr, i;
- src0 = get_src_reg(c, inst, 0, 0);
- w = get_src_reg(c, inst, 1, 3);
- nr = src0.nr;
- interp[0] = brw_vec1_grf(nr, 0);
- interp[1] = brw_vec1_grf(nr, 4);
- interp[2] = brw_vec1_grf(nr+1, 0);
- interp[3] = brw_vec1_grf(nr+1, 4);
- brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
- for(i = 0; i < 4; i++ ) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i);
- brw_MOV(p, dst, interp[i]);
- brw_MUL(p, dst, dst, w);
- }
- }
- brw_set_saturate(p, 0);
-}
-
-static void emit_ddy(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
- struct brw_reg interp[4];
- struct brw_reg dst;
- struct brw_reg src0, w;
- GLuint nr, i;
-
- src0 = get_src_reg(c, inst, 0, 0);
- nr = src0.nr;
- w = get_src_reg(c, inst, 1, 3);
- interp[0] = brw_vec1_grf(nr, 0);
- interp[1] = brw_vec1_grf(nr, 4);
- interp[2] = brw_vec1_grf(nr+1, 0);
- interp[3] = brw_vec1_grf(nr+1, 4);
- brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
- for(i = 0; i < 4; i++ ) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i);
- brw_MOV(p, dst, suboffset(interp[i], 1));
- brw_MUL(p, dst, dst, w);
- }
- }
- brw_set_saturate(p, 0);
-}
-
static INLINE struct brw_reg high_words( struct brw_reg reg )
{
return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_W ), 1 ),
@@ -2780,6 +2724,21 @@ static void post_wm_emit( struct brw_wm_compile *c )
brw_resolve_cals(&c->func);
}
+static void
+get_argument_regs(struct brw_wm_compile *c,
+ const struct prog_instruction *inst,
+ int index,
+ struct brw_reg *regs,
+ int mask)
+{
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1 << i))
+ regs[i] = get_src_reg(c, inst, index, i);
+ }
+}
+
static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
{
#define MAX_IF_DEPTH 32
@@ -2797,6 +2756,9 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
for (i = 0; i < c->nr_fp_insns; i++) {
const struct prog_instruction *inst = &c->prog_instructions[i];
+ int dst_flags;
+ struct brw_reg args[3][4], dst[4];
+ int j;
c->cur_inst = i;
@@ -2814,6 +2776,10 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
else
brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE);
+ dst_flags = inst->DstReg.WriteMask;
+ if (inst->SaturateMode == SATURATE_ZERO_ONE)
+ dst_flags |= SATURATE;
+
switch (inst->Opcode) {
case WM_PIXELXY:
emit_pixel_xy(c, inst);
@@ -2899,10 +2865,16 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
emit_min_max(c, inst);
break;
case OPCODE_DDX:
- emit_ddx(c, inst);
- break;
case OPCODE_DDY:
- emit_ddy(c, inst);
+ for (j = 0; j < 4; j++) {
+ if (inst->DstReg.WriteMask & (1 << j))
+ dst[j] = get_dst_reg(c, inst, j);
+ else
+ dst[j] = brw_null_reg();
+ }
+ get_argument_regs(c, inst, 0, args[0], WRITEMASK_XYZW);
+ emit_ddxy(p, dst, dst_flags, (inst->Opcode == OPCODE_DDX),
+ args[0]);
break;
case OPCODE_SLT:
emit_slt(c, inst);
diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass1.c b/src/mesa/drivers/dri/i965/brw_wm_pass1.c
index 3436a24717..b449394029 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_pass1.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_pass1.c
@@ -178,6 +178,11 @@ void brw_wm_pass1( struct brw_wm_compile *c )
read1 = writemask;
break;
+ case OPCODE_DDX:
+ case OPCODE_DDY:
+ read0 = writemask;
+ break;
+
case OPCODE_MAD:
case OPCODE_CMP:
case OPCODE_LRP:
@@ -270,6 +275,7 @@ void brw_wm_pass1( struct brw_wm_compile *c )
case OPCODE_DST:
case WM_FRONTFACING:
+ case OPCODE_KIL_NV:
default:
break;
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c
index 39f8c6d522..361f91292b 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@@ -107,6 +107,12 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
/* as far as we can tell */
key->computes_depth =
(fp->Base.OutputsWritten & (1 << FRAG_RESULT_DEPTH)) != 0;
+ /* BRW_NEW_DEPTH_BUFFER
+ * Override for NULL depthbuffer case, required by the Pixel Shader Computed
+ * Depth field.
+ */
+ if (brw->state.depth_region == NULL)
+ key->computes_depth = 0;
/* _NEW_COLOR */
key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled;
@@ -300,6 +306,7 @@ const struct brw_tracked_state brw_wm_unit = {
.brw = (BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_CURBE_OFFSETS |
+ BRW_NEW_DEPTH_BUFFER |
BRW_NEW_NR_WM_SURFACES),
.cache = (CACHE_NEW_WM_PROG |
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 3dcc592bde..51539ac1e7 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -545,15 +545,20 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
irb->texformat->MesaFormat);
}
key.tiling = region->tiling;
- key.width = region->width;
- key.height = region->height;
+ if (brw->intel.intelScreen->driScrnPriv->dri2.enabled) {
+ key.width = rb->Width;
+ key.height = rb->Height;
+ } else {
+ key.width = region->width;
+ key.height = region->height;
+ }
key.pitch = region->pitch;
key.cpp = region->cpp;
key.draw_offset = region->draw_offset; /* cur 3d or cube face offset */
} else {
key.surface_type = BRW_SURFACE_NULL;
key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
- key.tiling = 0;
+ key.tiling = I915_TILING_X;
key.width = 1;
key.height = 1;
key.cpp = 4;