summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/i965
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/i965')
-rw-r--r--src/mesa/drivers/dri/i965/brw_cc.c17
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip.h9
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip_line.c8
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip_tri.c12
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h39
-rw-r--r--src/mesa/drivers/dri/i965/brw_curbe.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h16
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw.c12
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw_upload.c27
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.h10
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_fallback.c6
-rw-r--r--src/mesa/drivers/dri/i965/brw_gs.c10
-rw-r--r--src/mesa/drivers/dri/i965/brw_gs.h9
-rw-r--r--src/mesa/drivers/dri/i965/brw_gs_emit.c38
-rw-r--r--src/mesa/drivers/dri/i965/brw_misc_state.c10
-rw-r--r--src/mesa/drivers/dri/i965/brw_sf.c8
-rw-r--r--src/mesa/drivers/dri/i965/brw_sf.h8
-rw-r--r--src/mesa/drivers/dri/i965/brw_sf_emit.c22
-rw-r--r--src/mesa/drivers/dri/i965/brw_sf_state.c10
-rw-r--r--src/mesa/drivers/dri/i965/brw_state.h9
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_cache.c60
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_upload.c7
-rw-r--r--src/mesa/drivers/dri/i965/brw_tex_layout.c7
-rw-r--r--src/mesa/drivers/dri/i965/brw_util.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_util.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_emit.c85
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_state.c37
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_surface_state.c16
-rw-r--r--src/mesa/drivers/dri/i965/brw_vtbl.c30
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.c39
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.h159
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_emit.c665
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_fp.c31
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_glsl.c1196
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_pass0.c7
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_pass2.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_sampler_state.c21
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_surface_state.c31
43 files changed, 1052 insertions, 1641 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c
index 1088a7a607..d4ccd28c9e 100644
--- a/src/mesa/drivers/dri/i965/brw_cc.c
+++ b/src/mesa/drivers/dri/i965/brw_cc.c
@@ -44,17 +44,24 @@ static void prepare_cc_vp( struct brw_context *brw )
memset(&ccv, 0, sizeof(ccv));
- /* _NEW_VIEWPORT */
- ccv.min_depth = ctx->Viewport.Near;
- ccv.max_depth = ctx->Viewport.Far;
+ /* _NEW_TRANSOFORM */
+ if (ctx->Transform.DepthClamp) {
+ /* _NEW_VIEWPORT */
+ ccv.min_depth = MIN2(ctx->Viewport.Near, ctx->Viewport.Far);
+ ccv.max_depth = MAX2(ctx->Viewport.Near, ctx->Viewport.Far);
+ } else {
+ ccv.min_depth = 0.0;
+ ccv.max_depth = 1.0;
+ }
dri_bo_unreference(brw->cc.vp_bo);
- brw->cc.vp_bo = brw_cache_data( &brw->cache, BRW_CC_VP, &ccv, NULL, 0 );
+ brw->cc.vp_bo = brw_cache_data(&brw->cache, BRW_CC_VP, &ccv, sizeof(ccv),
+ NULL, 0);
}
const struct brw_tracked_state brw_cc_vp = {
.dirty = {
- .mesa = _NEW_VIEWPORT,
+ .mesa = _NEW_VIEWPORT | _NEW_TRANSFORM,
.brw = BRW_NEW_CONTEXT,
.cache = 0
},
diff --git a/src/mesa/drivers/dri/i965/brw_clip.c b/src/mesa/drivers/dri/i965/brw_clip.c
index 20a927cf38..dbd10a5297 100644
--- a/src/mesa/drivers/dri/i965/brw_clip.c
+++ b/src/mesa/drivers/dri/i965/brw_clip.c
@@ -78,7 +78,7 @@ static void compile_clip_prog( struct brw_context *brw,
delta = REG_SIZE;
for (i = 0; i < VERT_RESULT_MAX; i++)
- if (c.key.attrs & (1<<i)) {
+ if (c.key.attrs & BITFIELD64_BIT(i)) {
c.offset[i] = delta;
delta += ATTR_SIZE;
}
@@ -156,6 +156,7 @@ static void upload_clip_prog(struct brw_context *brw)
key.attrs = brw->vs.prog_data->outputs_written;
/* _NEW_LIGHT */
key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT);
+ key.pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION);
/* _NEW_TRANSFORM */
key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled);
diff --git a/src/mesa/drivers/dri/i965/brw_clip.h b/src/mesa/drivers/dri/i965/brw_clip.h
index 957df441ab..1c6825510a 100644
--- a/src/mesa/drivers/dri/i965/brw_clip.h
+++ b/src/mesa/drivers/dri/i965/brw_clip.h
@@ -42,22 +42,21 @@
* up polygon offset and flatshading at this point:
*/
struct brw_clip_prog_key {
- GLuint attrs:32;
+ GLbitfield64 attrs;
GLuint primitive:4;
GLuint nr_userclip:3;
GLuint do_flat_shading:1;
+ GLuint pv_first:1;
GLuint do_unfilled:1;
GLuint fill_cw:2; /* includes cull information */
GLuint fill_ccw:2; /* includes cull information */
GLuint offset_cw:1;
GLuint offset_ccw:1;
- GLuint pad0:17;
-
GLuint copy_bfc_cw:1;
GLuint copy_bfc_ccw:1;
GLuint clip_mode:3;
- GLuint pad1:27;
-
+ GLuint pad0:11;
+
GLfloat offset_factor;
GLfloat offset_units;
};
diff --git a/src/mesa/drivers/dri/i965/brw_clip_line.c b/src/mesa/drivers/dri/i965/brw_clip_line.c
index 048ca620fa..fa9648f50f 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_line.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_line.c
@@ -269,8 +269,12 @@ void brw_emit_line_clip( struct brw_clip_compile *c )
brw_clip_line_alloc_regs(c);
brw_clip_init_ff_sync(c);
- if (c->key.do_flat_shading)
- brw_clip_copy_colors(c, 0, 1);
+ if (c->key.do_flat_shading) {
+ if (c->key.pv_first)
+ brw_clip_copy_colors(c, 1, 0);
+ else
+ brw_clip_copy_colors(c, 0, 1);
+ }
clip_and_emit_line(c);
}
diff --git a/src/mesa/drivers/dri/i965/brw_clip_tri.c b/src/mesa/drivers/dri/i965/brw_clip_tri.c
index 0efd77225e..cf79224be4 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_tri.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_tri.c
@@ -188,14 +188,20 @@ void brw_clip_tri_flat_shade( struct brw_clip_compile *c )
brw_imm_ud(_3DPRIM_POLYGON));
is_poly = brw_IF(p, BRW_EXECUTE_1);
- {
+ {
brw_clip_copy_colors(c, 1, 0);
brw_clip_copy_colors(c, 2, 0);
}
is_poly = brw_ELSE(p, is_poly);
{
- brw_clip_copy_colors(c, 0, 2);
- brw_clip_copy_colors(c, 1, 2);
+ if (c->key.pv_first) {
+ brw_clip_copy_colors(c, 1, 0);
+ brw_clip_copy_colors(c, 2, 0);
+ }
+ else {
+ brw_clip_copy_colors(c, 0, 2);
+ brw_clip_copy_colors(c, 1, 2);
+ }
}
brw_ENDIF(p, is_poly);
}
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index c300c33adc..8bdda60697 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -105,11 +105,13 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
+ ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
ctx->Const.MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
ctx->Const.MaxTextureUnits = MIN2(ctx->Const.MaxTextureCoordUnits,
ctx->Const.MaxTextureImageUnits);
ctx->Const.MaxVertexTextureImageUnits = 0; /* no vertex shader textures */
+ ctx->Const.MaxCombinedTextureImageUnits = 0;
/* Mesa limits textures to 4kx4k; it would be nice to fix that someday
*/
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 01b6a4a168..e73e21433c 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -116,8 +116,6 @@
*/
-#define BRW_FALLBACK_DRAW (INTEL_FALLBACK_DRIVER << 0)
-
#define BRW_MAX_CURBE (32*16)
struct brw_context;
@@ -174,8 +172,8 @@ struct brw_fragment_program {
GLuint id; /**< serial no. to identify frag progs, never re-used */
GLboolean isGLSL; /**< really, any IF/LOOP/CONT/BREAK instructions */
- dri_bo *const_buffer; /** Program constant buffer/surface */
GLboolean use_const_buffer;
+ dri_bo *const_buffer; /** Program constant buffer/surface */
/** for debugging, which texture units are referenced */
GLbitfield tex_units_used;
@@ -233,7 +231,7 @@ struct brw_vs_prog_data {
GLuint curb_read_length;
GLuint urb_read_length;
GLuint total_grf;
- GLuint outputs_written;
+ GLbitfield64 outputs_written;
GLuint nr_params; /**< number of float params/constants */
GLuint inputs_read;
@@ -254,20 +252,23 @@ struct brw_vs_ouput_sizes {
/** Number of texture sampler units */
#define BRW_MAX_TEX_UNIT 16
+/** Max number of render targets in a shader */
+#define BRW_MAX_DRAW_BUFFERS 4
+
/**
* Size of our surface binding table for the WM.
* This contains pointers to the drawing surfaces and current texture
* objects and shader constant buffers (+2).
*/
-#define BRW_WM_MAX_SURF (MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1)
+#define BRW_WM_MAX_SURF (BRW_MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1)
/**
* Helpers to convert drawing buffers, textures and constant buffers
* to surface binding table indexes, for WM.
*/
#define SURF_INDEX_DRAW(d) (d)
-#define SURF_INDEX_FRAG_CONST_BUFFER (MAX_DRAW_BUFFERS)
-#define SURF_INDEX_TEXTURE(t) (MAX_DRAW_BUFFERS + 1 + (t))
+#define SURF_INDEX_FRAG_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS)
+#define SURF_INDEX_TEXTURE(t) (BRW_MAX_DRAW_BUFFERS + 1 + (t))
/**
* Size of surface binding table for the VS.
@@ -319,7 +320,6 @@ struct brw_cache_item {
GLuint nr_reloc_bufs;
dri_bo *bo;
- GLuint data_size;
struct brw_cache_item *next;
};
@@ -332,7 +332,6 @@ struct brw_cache {
struct brw_cache_item **items;
GLuint size, n_items;
- GLuint key_size[BRW_MAX_CACHE]; /* for fixed-size keys */
GLuint aux_size[BRW_MAX_CACHE];
char *name[BRW_MAX_CACHE];
@@ -412,23 +411,6 @@ struct brw_vertex_info {
GLuint sizes[ATTRIB_BIT_DWORDS * 2]; /* sizes:2[VERT_ATTRIB_MAX] */
};
-
-
-
-/* Cache for TNL programs.
- */
-struct brw_tnl_cache_item {
- GLuint hash;
- void *key;
- void *data;
- struct brw_tnl_cache_item *next;
-};
-
-struct brw_tnl_cache {
- struct brw_tnl_cache_item **items;
- GLuint size, n_items;
-};
-
struct brw_query_object {
struct gl_query_object Base;
@@ -456,7 +438,6 @@ struct brw_context
GLuint primitive;
GLboolean emit_state_always;
- GLboolean no_batch_wrap;
struct {
struct brw_state_flags dirty;
@@ -760,9 +741,5 @@ brw_fragment_program_const(const struct gl_fragment_program *p)
return (const struct brw_fragment_program *) p;
}
-
-
-#define DO_SETUP_BITS ((1<<(FRAG_ATTRIB_MAX)) - 1)
-
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c
index 4be6c77aa1..aadcfbe2da 100644
--- a/src/mesa/drivers/dri/i965/brw_curbe.c
+++ b/src/mesa/drivers/dri/i965/brw_curbe.c
@@ -130,7 +130,7 @@ static void calculate_curbe_offsets( struct brw_context *brw )
const struct brw_tracked_state brw_curbe_offsets = {
.dirty = {
.mesa = _NEW_TRANSFORM,
- .brw = BRW_NEW_VERTEX_PROGRAM,
+ .brw = BRW_NEW_VERTEX_PROGRAM | BRW_NEW_CONTEXT,
.cache = CACHE_NEW_WM_PROG
},
.prepare = calculate_curbe_offsets
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 78d457ad2b..c19510bbd4 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -673,18 +673,10 @@
#define BRW_SAMPLER_MESSAGE_SIMD8_LD 3
#define BRW_SAMPLER_MESSAGE_SIMD16_LD 3
-#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG 0
-#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_IGDNG 0
-#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG 0
-#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG 1
-#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_BIAS_IGDNG 1
-#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG 1
-#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_IGDNG 2
-#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_IGDNG 2
-#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD_IGDNG 2
-#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG 3
-#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE_IGDNG 3
-#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG 3
+#define BRW_SAMPLER_MESSAGE_SAMPLE_IGDNG 0
+#define BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG 1
+#define BRW_SAMPLER_MESSAGE_SAMPLE_LOD_IGDNG 2
+#define BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_IGDNG 3
/* for IGDNG only */
#define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index 44bb7bd588..7ad860898f 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -145,7 +145,7 @@ static void brw_emit_prim(struct brw_context *brw,
prim_packet.base_vert_location = prim->basevertex;
/* Can't wrap here, since we rely on the validated state. */
- brw->no_batch_wrap = GL_TRUE;
+ intel->no_batch_wrap = GL_TRUE;
/* If we're set to always flush, do it before and after the primitive emit.
* We want to catch both missed flushes that hurt instruction/state cache
@@ -153,21 +153,17 @@ static void brw_emit_prim(struct brw_context *brw,
* the besides the draw code.
*/
if (intel->always_flush_cache) {
- BEGIN_BATCH(1, IGNORE_CLIPRECTS);
- OUT_BATCH(intel->vtbl.flush_cmd());
- ADVANCE_BATCH();
+ intel_batchbuffer_emit_mi_flush(intel->batch);
}
if (prim_packet.verts_per_instance) {
intel_batchbuffer_data( brw->intel.batch, &prim_packet,
sizeof(prim_packet), LOOP_CLIPRECTS);
}
if (intel->always_flush_cache) {
- BEGIN_BATCH(1, IGNORE_CLIPRECTS);
- OUT_BATCH(intel->vtbl.flush_cmd());
- ADVANCE_BATCH();
+ intel_batchbuffer_emit_mi_flush(intel->batch);
}
- brw->no_batch_wrap = GL_FALSE;
+ intel->no_batch_wrap = GL_FALSE;
}
static void brw_merge_inputs( struct brw_context *brw,
diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index 375afadcbe..7c796dae93 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -243,14 +243,6 @@ static void wrap_buffers( struct brw_context *brw,
dri_bo_unreference(brw->vb.upload.bo);
brw->vb.upload.bo = dri_bo_alloc(brw->intel.bufmgr, "temporary VBO",
size, 1);
-
- /* Set the internal VBO\ to no-backing-store. We only use them as a
- * temporary within a brw_try_draw_prims while the lock is held.
- */
- /* DON'T DO THIS AS IF WE HAVE TO RE-ORG MEMORY WE NEED SOMEWHERE WITH
- FAKE TO PUSH THIS STUFF */
-// if (!brw->intel.ttm)
-// dri_bo_fake_disable_backing_store(brw->vb.upload.bo, NULL, NULL);
}
static void get_space( struct brw_context *brw,
@@ -375,10 +367,9 @@ static void brw_prepare_vertices(struct brw_context *brw)
* isn't an issue at this point.
*/
if (brw->vb.nr_enabled >= BRW_VEP_MAX) {
- FALLBACK(intel, BRW_FALLBACK_DRAW, GL_TRUE);
+ intel->Fallback = GL_TRUE; /* boolean, not bitfield */
return;
}
- FALLBACK(intel, BRW_FALLBACK_DRAW, GL_FALSE);
for (i = 0; i < brw->vb.nr_enabled; i++) {
struct brw_vertex_element *input = brw->vb.enabled[i];
@@ -428,10 +419,9 @@ static void brw_prepare_vertices(struct brw_context *brw)
/* Position array not properly enabled:
*/
if (input->glarray->StrideB == 0) {
- FALLBACK(intel, BRW_FALLBACK_DRAW, GL_TRUE);
+ intel->Fallback = GL_TRUE; /* boolean, not bitfield */
return;
}
- FALLBACK(intel, BRW_FALLBACK_DRAW, GL_FALSE);
interleave = input->glarray->StrideB;
ptr = input->glarray->Ptr;
@@ -538,16 +528,9 @@ static void brw_emit_vertices(struct brw_context *brw)
I915_GEM_DOMAIN_VERTEX, 0,
input->offset);
if (BRW_IS_IGDNG(brw)) {
- if (input->stride) {
- OUT_RELOC(input->bo,
- I915_GEM_DOMAIN_VERTEX, 0,
- input->offset + input->stride * input->count - 1);
- } else {
- assert(input->count == 1);
- OUT_RELOC(input->bo,
- I915_GEM_DOMAIN_VERTEX, 0,
- input->offset + input->element_size - 1);
- }
+ OUT_RELOC(input->bo,
+ I915_GEM_DOMAIN_VERTEX, 0,
+ input->bo->size - 1);
} else
OUT_BATCH(input->stride ? input->count : 0);
OUT_BATCH(0); /* Instance data step rate */
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 30603bdd0e..39eb88d7c2 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -170,11 +170,11 @@ static INLINE struct brw_reg brw_reg( GLuint file,
GLuint writemask )
{
struct brw_reg reg;
- if (type == BRW_GENERAL_REGISTER_FILE)
+ if (file == BRW_GENERAL_REGISTER_FILE)
assert(nr < BRW_MAX_GRF);
- else if (type == BRW_MESSAGE_REGISTER_FILE)
- assert(nr < BRW_MAX_MRF);
- else if (type == BRW_ARCHITECTURE_REGISTER_FILE)
+ else if (file == BRW_MESSAGE_REGISTER_FILE)
+ assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
+ else if (file == BRW_ARCHITECTURE_REGISTER_FILE)
assert(nr <= BRW_ARF_IP);
reg.type = type;
@@ -538,7 +538,7 @@ static INLINE struct brw_reg brw_mask_reg( GLuint subnr )
static INLINE struct brw_reg brw_message_reg( GLuint nr )
{
- assert(nr < BRW_MAX_MRF);
+ assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE,
nr,
0);
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 241cdc33f8..7ceabba288 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -55,7 +55,8 @@ static void guess_execution_size( struct brw_instruction *insn,
static void brw_set_dest( struct brw_instruction *insn,
struct brw_reg dest )
{
- if (dest.type != BRW_ARCHITECTURE_REGISTER_FILE)
+ if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
+ dest.file != BRW_MESSAGE_REGISTER_FILE)
assert(dest.nr < 128);
insn->bits1.da1.dest_reg_file = dest.file;
diff --git a/src/mesa/drivers/dri/i965/brw_fallback.c b/src/mesa/drivers/dri/i965/brw_fallback.c
index d27c6c24ca..562a17844b 100644
--- a/src/mesa/drivers/dri/i965/brw_fallback.c
+++ b/src/mesa/drivers/dri/i965/brw_fallback.c
@@ -133,7 +133,11 @@ const struct brw_tracked_state brw_check_fallback = {
-/* Not used:
+/**
+ * Called by the INTEL_FALLBACK() macro.
+ * NOTE: this is a no-op for the i965 driver. The brw->intel.Fallback
+ * field is treated as a boolean, not a bitmask. It's only set in a
+ * couple of places.
*/
void intelFallback( struct intel_context *intel, GLuint bit, GLboolean mode )
{
diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c
index 48c2b9a41c..610b6c35e2 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -85,10 +85,10 @@ static void compile_gs_prog( struct brw_context *brw,
*/
switch (key->primitive) {
case GL_QUADS:
- brw_gs_quads( &c );
+ brw_gs_quads( &c, key );
break;
case GL_QUAD_STRIP:
- brw_gs_quad_strip( &c );
+ brw_gs_quad_strip( &c, key );
break;
case GL_LINE_LOOP:
brw_gs_lines( &c );
@@ -149,6 +149,7 @@ static const GLenum gs_prim[GL_POLYGON+1] = {
static void populate_key( struct brw_context *brw,
struct brw_gs_prog_key *key )
{
+ GLcontext *ctx = &brw->intel.ctx;
memset(key, 0, sizeof(*key));
/* CACHE_NEW_VS_PROG */
@@ -158,6 +159,9 @@ static void populate_key( struct brw_context *brw,
key->primitive = gs_prim[brw->primitive];
key->hint_gs_always = 0; /* debug code? */
+
+ /* _NEW_LIGHT */
+ key->pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION);
key->need_gs_prog = (key->hint_gs_always ||
brw->primitive == GL_QUADS ||
@@ -193,7 +197,7 @@ static void prepare_gs_prog(struct brw_context *brw)
const struct brw_tracked_state brw_gs_prog = {
.dirty = {
- .mesa = 0,
+ .mesa = _NEW_LIGHT,
.brw = BRW_NEW_PRIMITIVE,
.cache = CACHE_NEW_VS_PROG
},
diff --git a/src/mesa/drivers/dri/i965/brw_gs.h b/src/mesa/drivers/dri/i965/brw_gs.h
index bbb991ea2e..010c1c2352 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.h
+++ b/src/mesa/drivers/dri/i965/brw_gs.h
@@ -40,11 +40,12 @@
#define MAX_GS_VERTS (4)
struct brw_gs_prog_key {
- GLuint attrs:32;
+ GLbitfield64 attrs;
GLuint primitive:4;
GLuint hint_gs_always:1;
+ GLuint pv_first:1;
GLuint need_gs_prog:1;
- GLuint pad:26;
+ GLuint pad:25;
};
struct brw_gs_compile {
@@ -67,8 +68,8 @@ struct brw_gs_compile {
#define ATTR_SIZE (4*4)
-void brw_gs_quads( struct brw_gs_compile *c );
-void brw_gs_quad_strip( struct brw_gs_compile *c );
+void brw_gs_quads( struct brw_gs_compile *c, struct brw_gs_prog_key *key );
+void brw_gs_quad_strip( struct brw_gs_compile *c, struct brw_gs_prog_key *key );
void brw_gs_tris( struct brw_gs_compile *c );
void brw_gs_lines( struct brw_gs_compile *c );
void brw_gs_points( struct brw_gs_compile *c );
diff --git a/src/mesa/drivers/dri/i965/brw_gs_emit.c b/src/mesa/drivers/dri/i965/brw_gs_emit.c
index a9b2aa2eac..0fc5b02c61 100644
--- a/src/mesa/drivers/dri/i965/brw_gs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_gs_emit.c
@@ -120,7 +120,7 @@ static void brw_gs_ff_sync(struct brw_gs_compile *c, int num_prim)
}
-void brw_gs_quads( struct brw_gs_compile *c )
+void brw_gs_quads( struct brw_gs_compile *c, struct brw_gs_prog_key *key )
{
brw_gs_alloc_regs(c, 4);
@@ -128,23 +128,39 @@ void brw_gs_quads( struct brw_gs_compile *c )
* is the PV for quads, but vertex 0 for polygons:
*/
if (c->need_ff_sync)
- brw_gs_ff_sync(c, 1);
- brw_gs_emit_vue(c, c->reg.vertex[3], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START));
- brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2));
- brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2));
- brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END));
+ brw_gs_ff_sync(c, 1);
+ if (key->pv_first) {
+ brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START));
+ brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2));
+ brw_gs_emit_vue(c, c->reg.vertex[2], 0, (_3DPRIM_POLYGON << 2));
+ brw_gs_emit_vue(c, c->reg.vertex[3], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END));
+ }
+ else {
+ brw_gs_emit_vue(c, c->reg.vertex[3], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START));
+ brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2));
+ brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2));
+ brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END));
+ }
}
-void brw_gs_quad_strip( struct brw_gs_compile *c )
+void brw_gs_quad_strip( struct brw_gs_compile *c, struct brw_gs_prog_key *key )
{
brw_gs_alloc_regs(c, 4);
if (c->need_ff_sync)
brw_gs_ff_sync(c, 1);
- brw_gs_emit_vue(c, c->reg.vertex[2], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START));
- brw_gs_emit_vue(c, c->reg.vertex[3], 0, (_3DPRIM_POLYGON << 2));
- brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2));
- brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END));
+ if (key->pv_first) {
+ brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START));
+ brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2));
+ brw_gs_emit_vue(c, c->reg.vertex[2], 0, (_3DPRIM_POLYGON << 2));
+ brw_gs_emit_vue(c, c->reg.vertex[3], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END));
+ }
+ else {
+ brw_gs_emit_vue(c, c->reg.vertex[2], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START));
+ brw_gs_emit_vue(c, c->reg.vertex[3], 0, (_3DPRIM_POLYGON << 2));
+ brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2));
+ brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END));
+ }
}
void brw_gs_tris( struct brw_gs_compile *c )
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index ea71857548..4b0d598336 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -66,7 +66,7 @@ static void upload_blend_constant_color(struct brw_context *brw)
const struct brw_tracked_state brw_blend_constant_color = {
.dirty = {
.mesa = _NEW_COLOR,
- .brw = 0,
+ .brw = BRW_NEW_CONTEXT,
.cache = 0
},
.emit = upload_blend_constant_color
@@ -93,7 +93,7 @@ static void upload_drawing_rect(struct brw_context *brw)
const struct brw_tracked_state brw_drawing_rect = {
.dirty = {
.mesa = _NEW_BUFFERS,
- .brw = 0,
+ .brw = BRW_NEW_CONTEXT,
.cache = 0
},
.emit = upload_drawing_rect
@@ -317,7 +317,7 @@ static void upload_polygon_stipple(struct brw_context *brw)
const struct brw_tracked_state brw_polygon_stipple = {
.dirty = {
.mesa = _NEW_POLYGONSTIPPLE,
- .brw = 0,
+ .brw = BRW_NEW_CONTEXT,
.cache = 0
},
.emit = upload_polygon_stipple
@@ -362,7 +362,7 @@ static void upload_polygon_stipple_offset(struct brw_context *brw)
const struct brw_tracked_state brw_polygon_stipple_offset = {
.dirty = {
.mesa = _NEW_WINDOW_POS,
- .brw = 0,
+ .brw = BRW_NEW_CONTEXT,
.cache = 0
},
.emit = upload_polygon_stipple_offset
@@ -425,7 +425,7 @@ static void upload_line_stipple(struct brw_context *brw)
const struct brw_tracked_state brw_line_stipple = {
.dirty = {
.mesa = _NEW_LINE,
- .brw = 0,
+ .brw = BRW_NEW_CONTEXT,
.cache = 0
},
.emit = upload_line_stipple
diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c
index e1c2c7777b..968890f7fb 100644
--- a/src/mesa/drivers/dri/i965/brw_sf.c
+++ b/src/mesa/drivers/dri/i965/brw_sf.c
@@ -61,7 +61,7 @@ static void compile_sf_prog( struct brw_context *brw,
c.key = *key;
c.nr_attrs = brw_count_bits(c.key.attrs);
c.nr_attr_regs = (c.nr_attrs+1)/2;
- c.nr_setup_attrs = brw_count_bits(c.key.attrs & DO_SETUP_BITS);
+ c.nr_setup_attrs = brw_count_bits(c.key.attrs);
c.nr_setup_regs = (c.nr_setup_attrs+1)/2;
c.prog_data.urb_read_length = c.nr_attr_regs;
@@ -70,7 +70,7 @@ static void compile_sf_prog( struct brw_context *brw,
/* Construct map from attribute number to position in the vertex.
*/
for (i = idx = 0; i < VERT_RESULT_MAX; i++)
- if (c.key.attrs & (1<<i)) {
+ if (c.key.attrs & BITFIELD64_BIT(i)) {
c.attr_to_idx[i] = idx;
c.idx_to_attr[idx] = i;
if (i >= VERT_RESULT_TEX0 && i <= VERT_RESULT_TEX7) {
@@ -147,7 +147,7 @@ static void upload_sf_prog(struct brw_context *brw)
* edgeflag testing here, it is already done in the clip
* program.
*/
- if (key.attrs & (1<<VERT_RESULT_EDGE))
+ if (key.attrs & BITFIELD64_BIT(VERT_RESULT_EDGE))
key.primitive = SF_UNFILLED_TRIS;
else
key.primitive = SF_TRIANGLES;
@@ -161,7 +161,7 @@ static void upload_sf_prog(struct brw_context *brw)
}
key.do_point_sprite = ctx->Point.PointSprite;
- key.SpriteOrigin = ctx->Point.SpriteOrigin;
+ key.sprite_origin_lower_left = (ctx->Point.SpriteOrigin == GL_LOWER_LEFT);
/* _NEW_LIGHT */
key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT);
key.do_twoside_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide);
diff --git a/src/mesa/drivers/dri/i965/brw_sf.h b/src/mesa/drivers/dri/i965/brw_sf.h
index 6426b6df9f..0ba731fac9 100644
--- a/src/mesa/drivers/dri/i965/brw_sf.h
+++ b/src/mesa/drivers/dri/i965/brw_sf.h
@@ -45,19 +45,19 @@
#define SF_UNFILLED_TRIS 3
struct brw_sf_prog_key {
- GLuint attrs:32;
+ GLbitfield64 attrs;
GLuint primitive:2;
GLuint do_twoside_color:1;
GLuint do_flat_shading:1;
GLuint frontface_ccw:1;
GLuint do_point_sprite:1;
GLuint linear_color:1; /**< linear interp vs. perspective interp */
- GLuint pad:25;
- GLenum SpriteOrigin;
+ GLuint sprite_origin_lower_left:1;
+ GLuint pad:24;
};
struct brw_sf_point_tex {
- GLboolean CoordReplace;
+ GLboolean CoordReplace;
};
struct brw_sf_compile {
diff --git a/src/mesa/drivers/dri/i965/brw_sf_emit.c b/src/mesa/drivers/dri/i965/brw_sf_emit.c
index ca8f97f9f9..3eae41ee74 100644
--- a/src/mesa/drivers/dri/i965/brw_sf_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_sf_emit.c
@@ -56,7 +56,7 @@ static struct brw_reg get_vert_attr(struct brw_sf_compile *c,
static GLboolean have_attr(struct brw_sf_compile *c,
GLuint attr)
{
- return (c->key.attrs & (1<<attr)) ? 1 : 0;
+ return (c->key.attrs & BITFIELD64_BIT(attr)) ? 1 : 0;
}
/***********************************************************************
@@ -122,8 +122,8 @@ static void do_twoside_color( struct brw_sf_compile *c )
* Flat shading
*/
-#define VERT_RESULT_COLOR_BITS ((1<<VERT_RESULT_COL0) | \
- (1<<VERT_RESULT_COL1))
+#define VERT_RESULT_COLOR_BITS (BITFIELD64_BIT(VERT_RESULT_COL0) | \
+ BITFIELD64_BIT(VERT_RESULT_COL1))
static void copy_colors( struct brw_sf_compile *c,
struct brw_reg dst,
@@ -312,8 +312,8 @@ static GLboolean calculate_masks( struct brw_sf_compile *c,
GLushort *pc_linear)
{
GLboolean is_last_attr = (reg == c->nr_setup_regs - 1);
- GLuint persp_mask;
- GLuint linear_mask;
+ GLbitfield64 persp_mask;
+ GLbitfield64 linear_mask;
if (c->key.do_flat_shading || c->key.linear_color)
persp_mask = c->key.attrs & ~(FRAG_BIT_WPOS |
@@ -331,10 +331,10 @@ static GLboolean calculate_masks( struct brw_sf_compile *c,
*pc_linear = 0;
*pc = 0xf;
- if (persp_mask & (1 << c->idx_to_attr[reg*2]))
+ if (persp_mask & BITFIELD64_BIT(c->idx_to_attr[reg*2]))
*pc_persp = 0xf;
- if (linear_mask & (1 << c->idx_to_attr[reg*2]))
+ if (linear_mask & BITFIELD64_BIT(c->idx_to_attr[reg*2]))
*pc_linear = 0xf;
/* Maybe only processs one attribute on the final round:
@@ -342,10 +342,10 @@ static GLboolean calculate_masks( struct brw_sf_compile *c,
if (reg*2+1 < c->nr_setup_attrs) {
*pc |= 0xf0;
- if (persp_mask & (1 << c->idx_to_attr[reg*2+1]))
+ if (persp_mask & BITFIELD64_BIT(c->idx_to_attr[reg*2+1]))
*pc_persp |= 0xf0;
- if (linear_mask & (1 << c->idx_to_attr[reg*2+1]))
+ if (linear_mask & BITFIELD64_BIT(c->idx_to_attr[reg*2+1]))
*pc_linear |= 0xf0;
}
@@ -551,7 +551,7 @@ void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate)
BRW_MATH_DATA_SCALAR,
BRW_MATH_PRECISION_FULL);
- if (c->key.SpriteOrigin == GL_LOWER_LEFT) {
+ if (c->key.sprite_origin_lower_left) {
brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]);
brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0));
brw_MUL(p, c->m2Cy, c->tmp, negate(c->inv_w[0]));
@@ -570,7 +570,7 @@ void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate)
{
brw_set_predicate_control_flag_value(p, pc);
if (tex->CoordReplace) {
- if (c->key.SpriteOrigin == GL_LOWER_LEFT) {
+ if (c->key.sprite_origin_lower_left) {
brw_MUL(p, c->m3C0, c->inv_w[0], brw_imm_f(1.0));
brw_MOV(p, vec1(suboffset(c->m3C0, 0)), brw_imm_f(0.0));
}
diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c
index bc0f076073..bb69435ec0 100644
--- a/src/mesa/drivers/dri/i965/brw_sf_state.c
+++ b/src/mesa/drivers/dri/i965/brw_sf_state.c
@@ -93,7 +93,8 @@ static void upload_sf_vp(struct brw_context *brw)
}
dri_bo_unreference(brw->sf.vp_bo);
- brw->sf.vp_bo = brw_cache_data( &brw->cache, BRW_SF_VP, &sfv, NULL, 0 );
+ brw->sf.vp_bo = brw_cache_data(&brw->cache, BRW_SF_VP, &sfv, sizeof(sfv),
+ NULL, 0);
}
const struct brw_tracked_state brw_sf_vp = {
@@ -113,7 +114,8 @@ struct brw_sf_unit_key {
unsigned int nr_urb_entries, urb_size, sfsize;
- GLenum front_face, cull_face, provoking_vertex;
+ GLenum front_face, cull_face;
+ unsigned pv_first:1;
unsigned scissor:1;
unsigned line_smooth:1;
unsigned point_sprite:1;
@@ -154,7 +156,7 @@ sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key)
key->point_attenuated = ctx->Point._Attenuated;
/* _NEW_LIGHT */
- key->provoking_vertex = ctx->Light.ProvokingVertex;
+ key->pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION);
key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
}
@@ -287,7 +289,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
/* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons:
*/
- if (key->provoking_vertex == GL_LAST_VERTEX_CONVENTION) {
+ if (!key->pv_first) {
sf.sf7.trifan_pv = 2;
sf.sf7.linestrip_pv = 1;
sf.sf7.tristrip_pv = 2;
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index d639656b9d..b129b1f1c3 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -112,6 +112,7 @@ void brw_validate_state(struct brw_context *brw);
void brw_upload_state(struct brw_context *brw);
void brw_init_state(struct brw_context *brw);
void brw_destroy_state(struct brw_context *brw);
+void brw_clear_validated_bos(struct brw_context *brw);
/***********************************************************************
* brw_state_cache.c
@@ -119,16 +120,10 @@ void brw_destroy_state(struct brw_context *brw);
dri_bo *brw_cache_data(struct brw_cache *cache,
enum brw_cache_id cache_id,
const void *data,
+ GLuint size,
dri_bo **reloc_bufs,
GLuint nr_reloc_bufs);
-dri_bo *brw_cache_data_sz(struct brw_cache *cache,
- enum brw_cache_id cache_id,
- const void *data,
- GLuint data_size,
- dri_bo **reloc_bufs,
- GLuint nr_reloc_bufs);
-
dri_bo *brw_upload_cache( struct brw_cache *cache,
enum brw_cache_id cache_id,
const void *key,
diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c
index c262e1db8b..e4c9ba7d87 100644
--- a/src/mesa/drivers/dri/i965/brw_state_cache.c
+++ b/src/mesa/drivers/dri/i965/brw_state_cache.c
@@ -245,7 +245,6 @@ brw_upload_cache( struct brw_cache *cache,
item->bo = bo;
dri_bo_reference(bo);
- item->data_size = data_size;
if (cache->n_items > cache->size * 1.5)
rehash(cache);
@@ -275,15 +274,22 @@ brw_upload_cache( struct brw_cache *cache,
/**
- * This doesn't really work with aux data. Use search/upload instead
+ * Wrapper around brw_cache_data_sz using the cache_id's canonical key size.
+ *
+ * If nr_reloc_bufs is nonzero, brw_search_cache()/brw_upload_cache() would be
+ * better to use, as the potentially changing offsets in the data-used-as-key
+ * will result in excessive cache misses.
+ *
+ * If aux data is involved, use search/upload instead.
+
*/
dri_bo *
-brw_cache_data_sz(struct brw_cache *cache,
- enum brw_cache_id cache_id,
- const void *data,
- GLuint data_size,
- dri_bo **reloc_bufs,
- GLuint nr_reloc_bufs)
+brw_cache_data(struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *data,
+ GLuint data_size,
+ dri_bo **reloc_bufs,
+ GLuint nr_reloc_bufs)
{
dri_bo *bo;
struct brw_cache_item *item;
@@ -306,25 +312,6 @@ brw_cache_data_sz(struct brw_cache *cache,
return bo;
}
-
-/**
- * Wrapper around brw_cache_data_sz using the cache_id's canonical key size.
- *
- * If nr_reloc_bufs is nonzero, brw_search_cache()/brw_upload_cache() would be
- * better to use, as the potentially changing offsets in the data-used-as-key
- * will result in excessive cache misses.
- */
-dri_bo *
-brw_cache_data(struct brw_cache *cache,
- enum brw_cache_id cache_id,
- const void *data,
- dri_bo **reloc_bufs,
- GLuint nr_reloc_bufs)
-{
- return brw_cache_data_sz(cache, cache_id, data, cache->key_size[cache_id],
- reloc_bufs, nr_reloc_bufs);
-}
-
enum pool_type {
DW_SURFACE_STATE,
DW_GENERAL_STATE
@@ -335,11 +322,9 @@ static void
brw_init_cache_id(struct brw_cache *cache,
const char *name,
enum brw_cache_id id,
- GLuint key_size,
GLuint aux_size)
{
cache->name[id] = strdup(name);
- cache->key_size[id] = key_size;
cache->aux_size[id] = aux_size;
}
@@ -359,91 +344,76 @@ brw_init_non_surface_cache(struct brw_context *brw)
brw_init_cache_id(cache,
"CC_VP",
BRW_CC_VP,
- sizeof(struct brw_cc_viewport),
0);
brw_init_cache_id(cache,
"CC_UNIT",
BRW_CC_UNIT,
- sizeof(struct brw_cc_unit_state),
0);
brw_init_cache_id(cache,
"WM_PROG",
BRW_WM_PROG,
- sizeof(struct brw_wm_prog_key),
sizeof(struct brw_wm_prog_data));
brw_init_cache_id(cache,
"SAMPLER_DEFAULT_COLOR",
BRW_SAMPLER_DEFAULT_COLOR,
- sizeof(struct brw_sampler_default_color),
0);
brw_init_cache_id(cache,
"SAMPLER",
BRW_SAMPLER,
- 0, /* variable key/data size */
0);
brw_init_cache_id(cache,
"WM_UNIT",
BRW_WM_UNIT,
- sizeof(struct brw_wm_unit_state),
0);
brw_init_cache_id(cache,
"SF_PROG",
BRW_SF_PROG,
- sizeof(struct brw_sf_prog_key),
sizeof(struct brw_sf_prog_data));
brw_init_cache_id(cache,
"SF_VP",
BRW_SF_VP,
- sizeof(struct brw_sf_viewport),
0);
brw_init_cache_id(cache,
"SF_UNIT",
BRW_SF_UNIT,
- sizeof(struct brw_sf_unit_state),
0);
brw_init_cache_id(cache,
"VS_UNIT",
BRW_VS_UNIT,
- sizeof(struct brw_vs_unit_state),
0);
brw_init_cache_id(cache,
"VS_PROG",
BRW_VS_PROG,
- sizeof(struct brw_vs_prog_key),
sizeof(struct brw_vs_prog_data));
brw_init_cache_id(cache,
"CLIP_UNIT",
BRW_CLIP_UNIT,
- sizeof(struct brw_clip_unit_state),
0);
brw_init_cache_id(cache,
"CLIP_PROG",
BRW_CLIP_PROG,
- sizeof(struct brw_clip_prog_key),
sizeof(struct brw_clip_prog_data));
brw_init_cache_id(cache,
"GS_UNIT",
BRW_GS_UNIT,
- sizeof(struct brw_gs_unit_state),
0);
brw_init_cache_id(cache,
"GS_PROG",
BRW_GS_PROG,
- sizeof(struct brw_gs_prog_key),
sizeof(struct brw_gs_prog_data));
}
@@ -463,13 +433,11 @@ brw_init_surface_cache(struct brw_context *brw)
brw_init_cache_id(cache,
"SS_SURFACE",
BRW_SS_SURFACE,
- sizeof(struct brw_surface_state),
0);
brw_init_cache_id(cache,
"SS_SURF_BIND",
BRW_SS_SURF_BIND,
- 0,
0);
}
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index b817b741e7..af8dfb4c15 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -34,6 +34,7 @@
#include "brw_context.h"
#include "brw_state.h"
#include "intel_batchbuffer.h"
+#include "intel_buffers.h"
/* This is used to initialize brw->state.atoms[]. We could use this
* list directly except for a single atom, brw_constant_buffer, which
@@ -142,7 +143,7 @@ static void xor_states( struct brw_state_flags *result,
result->cache = a->cache ^ b->cache;
}
-static void
+void
brw_clear_validated_bos(struct brw_context *brw)
{
int i;
@@ -308,7 +309,7 @@ void brw_validate_state( struct brw_context *brw )
if (brw->state.dirty.brw & BRW_NEW_CONTEXT)
brw_clear_batch_cache(brw);
- brw->intel.Fallback = 0;
+ brw->intel.Fallback = GL_FALSE; /* boolean, not bitfield */
/* do prepare stage for all atoms */
for (i = 0; i < Elements(atoms); i++) {
@@ -324,6 +325,8 @@ void brw_validate_state( struct brw_context *brw )
}
}
+ intel_check_front_buffer_rendering(intel);
+
/* Make sure that the textures which are referenced by the current
* brw fragment program are actually present/valid.
* If this fails, we can experience GPU lock-ups.
diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c
index 5986cbffad..e59e52ed86 100644
--- a/src/mesa/drivers/dri/i965/brw_tex_layout.c
+++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c
@@ -86,10 +86,10 @@ GLboolean brw_miptree_layout(struct intel_context *intel,
mt->pitch = intel_miptree_pitch_align(intel, mt, tiling, mt->pitch);
if (mt->compressed) {
- qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * mt->pitch * mt->cpp;
+ qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4;
mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * 6;
} else {
- qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * mt->pitch * mt->cpp;
+ qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h);
mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * 6;
}
@@ -102,7 +102,8 @@ GLboolean brw_miptree_layout(struct intel_context *intel,
height, 1);
for (q = 0; q < nr_images; q++)
- intel_miptree_set_image_offset_ex(mt, level, q, x, y, q * qpitch);
+ intel_miptree_set_image_offset(mt, level, q,
+ x, y + q * qpitch);
if (mt->compressed)
img_height = MAX2(1, height/4);
diff --git a/src/mesa/drivers/dri/i965/brw_util.c b/src/mesa/drivers/dri/i965/brw_util.c
index ce21aa4869..bba9249d1b 100644
--- a/src/mesa/drivers/dri/i965/brw_util.c
+++ b/src/mesa/drivers/dri/i965/brw_util.c
@@ -35,7 +35,7 @@
#include "brw_util.h"
#include "brw_defines.h"
-GLuint brw_count_bits( GLuint val )
+GLuint brw_count_bits(uint64_t val)
{
GLuint i;
for (i = 0; val ; val >>= 1)
diff --git a/src/mesa/drivers/dri/i965/brw_util.h b/src/mesa/drivers/dri/i965/brw_util.h
index 33e7cd87e4..04f3175d3e 100644
--- a/src/mesa/drivers/dri/i965/brw_util.h
+++ b/src/mesa/drivers/dri/i965/brw_util.h
@@ -35,7 +35,7 @@
#include "main/mtypes.h"
-extern GLuint brw_count_bits( GLuint val );
+extern GLuint brw_count_bits(uint64_t val);
extern GLuint brw_parameter_list_state_flags(struct gl_program_parameter_list *paramList);
extern GLuint brw_translate_blend_factor( GLenum factor );
extern GLuint brw_translate_blend_equation( GLenum mode );
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index f0c79efbd9..fd055e225e 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -56,7 +56,7 @@ static void do_vs_prog( struct brw_context *brw,
c.prog_data.inputs_read = vp->program.Base.InputsRead;
if (c.key.copy_edgeflag) {
- c.prog_data.outputs_written |= 1<<VERT_RESULT_EDGE;
+ c.prog_data.outputs_written |= BITFIELD64_BIT(VERT_RESULT_EDGE);
c.prog_data.inputs_read |= 1<<VERT_ATTRIB_EDGEFLAG;
}
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index 1638ef8111..00efd3443d 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -147,7 +147,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
mrf = 4;
for (i = 0; i < VERT_RESULT_MAX; i++) {
- if (c->prog_data.outputs_written & (1 << i)) {
+ if (c->prog_data.outputs_written & BITFIELD64_BIT(i)) {
c->nr_outputs++;
assert(i < Elements(c->regs[PROGRAM_OUTPUT]));
if (i == VERT_RESULT_HPOS) {
@@ -331,63 +331,65 @@ static void unalias3( struct brw_vs_compile *c,
}
}
-static void emit_sop( struct brw_compile *p,
+static void emit_sop( struct brw_vs_compile *c,
struct brw_reg dst,
struct brw_reg arg0,
struct brw_reg arg1,
GLuint cond)
{
+ struct brw_compile *p = &c->func;
+
brw_MOV(p, dst, brw_imm_f(0.0f));
brw_CMP(p, brw_null_reg(), cond, arg0, arg1);
brw_MOV(p, dst, brw_imm_f(1.0f));
brw_set_predicate_control_flag_value(p, 0xff);
}
-static void emit_seq( struct brw_compile *p,
+static void emit_seq( struct brw_vs_compile *c,
struct brw_reg dst,
struct brw_reg arg0,
struct brw_reg arg1 )
{
- emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_EQ);
+ emit_sop(c, dst, arg0, arg1, BRW_CONDITIONAL_EQ);
}
-static void emit_sne( struct brw_compile *p,
+static void emit_sne( struct brw_vs_compile *c,
struct brw_reg dst,
struct brw_reg arg0,
struct brw_reg arg1 )
{
- emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_NEQ);
+ emit_sop(c, dst, arg0, arg1, BRW_CONDITIONAL_NEQ);
}
-static void emit_slt( struct brw_compile *p,
+static void emit_slt( struct brw_vs_compile *c,
struct brw_reg dst,
struct brw_reg arg0,
struct brw_reg arg1 )
{
- emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_L);
+ emit_sop(c, dst, arg0, arg1, BRW_CONDITIONAL_L);
}
-static void emit_sle( struct brw_compile *p,
+static void emit_sle( struct brw_vs_compile *c,
struct brw_reg dst,
struct brw_reg arg0,
struct brw_reg arg1 )
{
- emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_LE);
+ emit_sop(c, dst, arg0, arg1, BRW_CONDITIONAL_LE);
}
-static void emit_sgt( struct brw_compile *p,
+static void emit_sgt( struct brw_vs_compile *c,
struct brw_reg dst,
struct brw_reg arg0,
struct brw_reg arg1 )
{
- emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_G);
+ emit_sop(c, dst, arg0, arg1, BRW_CONDITIONAL_G);
}
-static void emit_sge( struct brw_compile *p,
+static void emit_sge( struct brw_vs_compile *c,
struct brw_reg dst,
struct brw_reg arg0,
struct brw_reg arg1 )
{
- emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_GE);
+ emit_sop(c, dst, arg0, arg1, BRW_CONDITIONAL_GE);
}
static void emit_max( struct brw_compile *p,
@@ -912,6 +914,7 @@ get_src_reg( struct brw_vs_compile *c,
case PROGRAM_CONSTANT:
case PROGRAM_UNIFORM:
case PROGRAM_ENV_PARAM:
+ case PROGRAM_LOCAL_PARAM:
if (c->vp->use_const_buffer) {
return get_constant(c, inst, argIndex);
}
@@ -930,7 +933,6 @@ get_src_reg( struct brw_vs_compile *c,
/* this is a normal case since we loop over all three src args */
return brw_null_reg();
- case PROGRAM_LOCAL_PARAM:
case PROGRAM_WRITE_ONLY:
default:
assert(0);
@@ -1122,7 +1124,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
/* Update the header for point size, user clipping flags, and -ve rhw
* workaround.
*/
- if ((c->prog_data.outputs_written & (1<<VERT_RESULT_PSIZ)) ||
+ if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) ||
c->key.nr_userclip || BRW_IS_965(p->brw))
{
struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
@@ -1132,7 +1134,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
brw_set_access_mode(p, BRW_ALIGN_16);
- if (c->prog_data.outputs_written & (1<<VERT_RESULT_PSIZ)) {
+ if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
struct brw_reg psiz = c->regs[PROGRAM_OUTPUT][VERT_RESULT_PSIZ];
brw_MUL(p, brw_writemask(header1, WRITEMASK_W), brw_swizzle1(psiz, 0), brw_imm_f(1<<11));
brw_AND(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(0x7ff<<8));
@@ -1222,7 +1224,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
*/
GLuint i, mrf = 0;
for (i = c->first_overflow_output; i < VERT_RESULT_MAX; i++) {
- if (c->prog_data.outputs_written & (1 << i)) {
+ if (c->prog_data.outputs_written & BITFIELD64_BIT(i)) {
/* move from GRF to MRF */
brw_MOV(p, brw_message_reg(4+mrf), c->regs[PROGRAM_OUTPUT][i]);
mrf++;
@@ -1269,6 +1271,38 @@ post_vs_emit( struct brw_vs_compile *c,
}
}
+static GLboolean
+accumulator_contains(struct brw_vs_compile *c, struct brw_reg val)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *prev_insn = &p->store[p->nr_insn - 1];
+
+ if (p->nr_insn == 0)
+ return GL_FALSE;
+
+ if (val.address_mode != BRW_ADDRESS_DIRECT)
+ return GL_FALSE;
+
+ switch (prev_insn->header.opcode) {
+ case BRW_OPCODE_MOV:
+ case BRW_OPCODE_MAC:
+ case BRW_OPCODE_MUL:
+ if (prev_insn->header.access_mode == BRW_ALIGN_16 &&
+ prev_insn->header.execution_size == val.width &&
+ prev_insn->bits1.da1.dest_reg_file == val.file &&
+ prev_insn->bits1.da1.dest_reg_type == val.type &&
+ prev_insn->bits1.da1.dest_address_mode == val.address_mode &&
+ prev_insn->bits1.da1.dest_reg_nr == val.nr &&
+ prev_insn->bits1.da16.dest_subreg_nr == val.subnr / 16 &&
+ prev_insn->bits1.da16.dest_writemask == 0xf)
+ return GL_TRUE;
+ else
+ return GL_FALSE;
+ default:
+ return GL_FALSE;
+ }
+}
+
static uint32_t
get_predicate(const struct prog_instruction *inst)
{
@@ -1447,7 +1481,8 @@ void brw_vs_emit(struct brw_vs_compile *c )
unalias3(c, dst, args[0], args[1], args[2], emit_lrp_noalias);
break;
case OPCODE_MAD:
- brw_MOV(p, brw_acc_reg(), args[2]);
+ if (!accumulator_contains(c, args[2]))
+ brw_MOV(p, brw_acc_reg(), args[2]);
brw_MAC(p, dst, args[0], args[1]);
break;
case OPCODE_MAX:
@@ -1473,25 +1508,25 @@ void brw_vs_emit(struct brw_vs_compile *c )
break;
case OPCODE_SEQ:
- emit_seq(p, dst, args[0], args[1]);
+ unalias2(c, dst, args[0], args[1], emit_seq);
break;
case OPCODE_SIN:
emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, args[0], BRW_MATH_PRECISION_FULL);
break;
case OPCODE_SNE:
- emit_sne(p, dst, args[0], args[1]);
+ unalias2(c, dst, args[0], args[1], emit_sne);
break;
case OPCODE_SGE:
- emit_sge(p, dst, args[0], args[1]);
+ unalias2(c, dst, args[0], args[1], emit_sge);
break;
case OPCODE_SGT:
- emit_sgt(p, dst, args[0], args[1]);
+ unalias2(c, dst, args[0], args[1], emit_sgt);
break;
case OPCODE_SLT:
- emit_slt(p, dst, args[0], args[1]);
+ unalias2(c, dst, args[0], args[1], emit_slt);
break;
case OPCODE_SLE:
- emit_sle(p, dst, args[0], args[1]);
+ unalias2(c, dst, args[0], args[1], emit_sle);
break;
case OPCODE_SUB:
brw_ADD(p, dst, args[0], negate(args[1]));
diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c
index d790ab6555..7285466645 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_state.c
@@ -109,10 +109,39 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
vs.thread3.urb_entry_read_offset = 0;
vs.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
- if (BRW_IS_IGDNG(brw))
- vs.thread4.nr_urb_entries = key->nr_urb_entries >> 2;
- else
- vs.thread4.nr_urb_entries = key->nr_urb_entries;
+ if (BRW_IS_IGDNG(brw)) {
+ switch (key->nr_urb_entries) {
+ case 8:
+ case 12:
+ case 16:
+ case 32:
+ case 64:
+ case 96:
+ case 128:
+ case 168:
+ case 192:
+ case 224:
+ case 256:
+ vs.thread4.nr_urb_entries = key->nr_urb_entries >> 2;
+ break;
+ default:
+ assert(0);
+ }
+ } else {
+ switch (key->nr_urb_entries) {
+ case 8:
+ case 12:
+ case 16:
+ case 32:
+ break;
+ case 64:
+ assert(BRW_IS_G4X(brw));
+ break;
+ default:
+ assert(0);
+ }
+ vs.thread4.nr_urb_entries = key->nr_urb_entries;
+ }
vs.thread4.urb_entry_allocation_size = key->urb_size - 1;
diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
index 89f47522a1..3bc9840a97 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
@@ -30,7 +30,6 @@
*/
#include "main/mtypes.h"
-#include "main/texformat.h"
#include "main/texstore.h"
#include "shader/prog_parameter.h"
@@ -53,6 +52,7 @@ brw_vs_update_constant_buffer(struct brw_context *brw)
const struct gl_program_parameter_list *params = vp->program.Base.Parameters;
const int size = params->NumParameters * 4 * sizeof(GLfloat);
drm_intel_bo *const_buffer;
+ int i;
/* BRW_NEW_VERTEX_PROGRAM */
if (!vp->use_const_buffer)
@@ -62,7 +62,19 @@ brw_vs_update_constant_buffer(struct brw_context *brw)
size, 64);
/* _NEW_PROGRAM_CONSTANTS */
- dri_bo_subdata(const_buffer, 0, size, params->ParameterValues);
+
+ /* Updates the ParamaterValues[i] pointers for all parameters of the
+ * basic type of PROGRAM_STATE_VAR.
+ */
+ _mesa_load_state_parameters(&brw->intel.ctx, vp->program.Base.Parameters);
+
+ intel_bo_map_gtt_preferred(intel, const_buffer, GL_TRUE);
+ for (i = 0; i < params->NumParameters; i++) {
+ memcpy(const_buffer->virtual + i * 4 * sizeof(float),
+ params->ParameterValues[i],
+ 4 * sizeof(float));
+ }
+ intel_bo_unmap_gtt_preferred(intel, const_buffer);
return const_buffer;
}
diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c
index 124fde25fe..72749b3859 100644
--- a/src/mesa/drivers/dri/i965/brw_vtbl.c
+++ b/src/mesa/drivers/dri/i965/brw_vtbl.c
@@ -46,7 +46,7 @@
#include "brw_state.h"
#include "brw_fallback.h"
#include "brw_vs.h"
-
+#include "brw_wm.h"
static void
dri_bo_release(dri_bo **bo)
@@ -66,8 +66,14 @@ static void brw_destroy_context( struct intel_context *intel )
brw_destroy_state(brw);
brw_draw_destroy( brw );
-
- _mesa_free(brw->wm.compile_data);
+ brw_clear_validated_bos(brw);
+ if (brw->wm.compile_data) {
+ _mesa_free(brw->wm.compile_data->instruction);
+ _mesa_free(brw->wm.compile_data->vreg);
+ _mesa_free(brw->wm.compile_data->refs);
+ _mesa_free(brw->wm.compile_data->prog_instructions);
+ _mesa_free(brw->wm.compile_data);
+ }
for (i = 0; i < brw->state.nr_color_regions; i++)
intel_region_release(&brw->state.color_regions[i]);
@@ -144,9 +150,6 @@ static void brw_new_batch( struct intel_context *intel )
{
struct brw_context *brw = brw_context(&intel->ctx);
- /* Check that we didn't just wrap our batchbuffer at a bad time. */
- assert(!brw->no_batch_wrap);
-
brw->curbe.need_new_bo = GL_TRUE;
/* Mark all context state as needing to be re-emitted.
@@ -175,20 +178,6 @@ static void brw_note_fence( struct intel_context *intel, GLuint fence )
brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_FENCE;
}
-/* called from intelWaitForIdle() and intelFlush()
- *
- * For now, just flush everything. Could be smarter later.
- */
-static GLuint brw_flush_cmd( void )
-{
- struct brw_mi_flush flush;
- flush.opcode = CMD_MI_FLUSH;
- flush.pad = 0;
- flush.flags = BRW_FLUSH_STATE_CACHE;
- return *(GLuint *)&flush;
-}
-
-
static void brw_invalidate_state( struct intel_context *intel, GLuint new_state )
{
/* nothing */
@@ -209,6 +198,5 @@ void brwInitVtbl( struct brw_context *brw )
brw->intel.vtbl.finish_batch = brw_finish_batch;
brw->intel.vtbl.destroy = brw_destroy_context;
brw->intel.vtbl.set_draw_region = brw_set_draw_region;
- brw->intel.vtbl.flush_cmd = brw_flush_cmd;
brw->intel.vtbl.debug_batch = brw_debug_batch;
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index 2292de94c4..6895f64410 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -29,7 +29,6 @@
* Keith Whitwell <keith@tungstengraphics.com>
*/
-#include "main/texformat.h"
#include "brw_context.h"
#include "brw_util.h"
#include "brw_wm.h"
@@ -153,8 +152,21 @@ static void do_wm_prog( struct brw_context *brw,
*/
return;
}
+ c->instruction = _mesa_calloc(BRW_WM_MAX_INSN * sizeof(*c->instruction));
+ c->prog_instructions = _mesa_calloc(BRW_WM_MAX_INSN *
+ sizeof(*c->prog_instructions));
+ c->vreg = _mesa_calloc(BRW_WM_MAX_VREG * sizeof(*c->vreg));
+ c->refs = _mesa_calloc(BRW_WM_MAX_REF * sizeof(*c->refs));
} else {
+ void *instruction = c->instruction;
+ void *prog_instructions = c->prog_instructions;
+ void *vreg = c->vreg;
+ void *refs = c->refs;
memset(c, 0, sizeof(*brw->wm.compile_data));
+ c->instruction = instruction;
+ c->prog_instructions = prog_instructions;
+ c->vreg = vreg;
+ c->refs = refs;
}
memcpy(&c->key, key, sizeof(*key));
@@ -218,7 +230,7 @@ static void brw_wm_populate_key( struct brw_context *brw,
ctx->Color.AlphaEnabled)
lookup |= IZ_PS_KILL_ALPHATEST_BIT;
- if (fp->program.Base.OutputsWritten & (1<<FRAG_RESULT_DEPTH))
+ if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
lookup |= IZ_PS_COMPUTES_DEPTH_BIT;
/* _NEW_DEPTH */
@@ -288,7 +300,7 @@ static void brw_wm_populate_key( struct brw_context *brw,
const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
if (img->InternalFormat == GL_YCBCR_MESA) {
key->yuvtex_mask |= 1 << i;
- if (img->TexFormat->MesaFormat == MESA_FORMAT_YCBCR)
+ if (img->TexFormat == MESA_FORMAT_YCBCR)
key->yuvtex_swap_mask |= 1 << i;
}
@@ -309,6 +321,9 @@ static void brw_wm_populate_key( struct brw_context *brw,
* from the incoming screen origin relative position we get as part of our
* payload.
*
+ * This is only needed for the WM_WPOSXY opcode when the fragment program
+ * uses the gl_FragCoord input.
+ *
* We could avoid recompiling by including this as a constant referenced by
* our program, but if we were to do that it would also be nice to handle
* getting that constant updated at batchbuffer submit time (when we
@@ -317,17 +332,21 @@ static void brw_wm_populate_key( struct brw_context *brw,
* just avoid using this as key data if the program doesn't use
* fragment.position.
*
- * This pretty much becomes moot with DRI2 and redirected buffers anyway,
- * as our origins will always be zero then.
+ * For DRI2 the origin_x/y will always be (0,0) but we still need the
+ * drawable height in order to invert the Y axis.
*/
- if (brw->intel.driDrawable != NULL) {
- key->origin_x = brw->intel.driDrawable->x;
- key->origin_y = brw->intel.driDrawable->y;
- key->drawable_height = brw->intel.driDrawable->h;
+ if (fp->program.Base.InputsRead & FRAG_BIT_WPOS) {
+ if (brw->intel.driDrawable != NULL) {
+ key->origin_x = brw->intel.driDrawable->x;
+ key->origin_y = brw->intel.driDrawable->y;
+ key->drawable_height = brw->intel.driDrawable->h;
+ }
}
+ key->nr_color_regions = brw->state.nr_color_regions;
+
/* CACHE_NEW_VS_PROG */
- key->vp_outputs_written = brw->vs.prog_data->outputs_written & DO_SETUP_BITS;
+ key->vp_outputs_written = brw->vs.prog_data->outputs_written;
/* The unique fragment program ID */
key->program_string_id = fp->id;
diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
index 872b1f3ecf..b9b987ea70 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@@ -67,18 +67,19 @@ struct brw_wm_prog_key {
GLuint flat_shade:1;
GLuint linear_color:1; /**< linear interpolation vs perspective interp */
GLuint runtime_check_aads_emit:1;
+ GLuint nr_color_regions:2;
GLbitfield proj_attrib_mask; /**< one bit per fragment program attribute */
GLuint shadowtex_mask:16;
GLuint yuvtex_mask:16;
GLuint yuvtex_swap_mask:16; /* UV swaped */
- GLuint tex_swizzles[BRW_MAX_TEX_UNIT];
+ GLushort tex_swizzles[BRW_MAX_TEX_UNIT];
+ GLushort origin_x, origin_y;
+ GLushort drawable_height;
+ GLbitfield64 vp_outputs_written;
GLuint program_string_id:32;
- GLuint origin_x, origin_y;
- GLuint drawable_height;
- GLuint vp_outputs_written;
};
@@ -153,15 +154,16 @@ struct brw_wm_instruction {
};
-#define BRW_WM_MAX_INSN (MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS*3 + FRAG_ATTRIB_MAX + 3)
+#define BRW_WM_MAX_INSN (MAX_PROGRAM_INSTRUCTIONS*3 + FRAG_ATTRIB_MAX + 3)
#define BRW_WM_MAX_GRF 128 /* hardware limit */
#define BRW_WM_MAX_VREG (BRW_WM_MAX_INSN * 4)
#define BRW_WM_MAX_REF (BRW_WM_MAX_INSN * 12)
#define BRW_WM_MAX_PARAM 256
#define BRW_WM_MAX_CONST 256
-#define BRW_WM_MAX_KILLS MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS
#define BRW_WM_MAX_SUBROUTINE 16
+/* used in masks next to WRITEMASK_*. */
+#define SATURATE (1<<5)
/* New opcodes to track internal operations required for WM unit.
@@ -200,7 +202,7 @@ struct brw_wm_compile {
* simplifying and adding instructions for interpolation and
* framebuffer writes.
*/
- struct prog_instruction prog_instructions[BRW_WM_MAX_INSN];
+ struct prog_instruction *prog_instructions;
GLuint nr_fp_insns;
GLuint fp_temp;
GLuint fp_interp_emitted;
@@ -211,7 +213,7 @@ struct brw_wm_compile {
struct prog_src_register pixel_w;
- struct brw_wm_value vreg[BRW_WM_MAX_VREG];
+ struct brw_wm_value *vreg;
GLuint nr_vreg;
struct brw_wm_value creg[BRW_WM_MAX_PARAM];
@@ -228,10 +230,10 @@ struct brw_wm_compile {
struct brw_wm_ref undef_ref;
struct brw_wm_value undef_value;
- struct brw_wm_ref refs[BRW_WM_MAX_REF];
+ struct brw_wm_ref *refs;
GLuint nr_refs;
- struct brw_wm_instruction instruction[BRW_WM_MAX_INSN];
+ struct brw_wm_instruction *instruction;
GLuint nr_insns;
struct brw_wm_constref constref[BRW_WM_MAX_CONST];
@@ -271,6 +273,12 @@ struct brw_wm_compile {
};
+/** Bits for prog_instruction::Aux field */
+#define INST_AUX_EOT 0x1
+#define INST_AUX_TARGET(T) (T << 1)
+#define INST_AUX_GET_TARGET(AUX) ((AUX) >> 1)
+
+
GLuint brw_wm_nr_args( GLuint opcode );
GLuint brw_wm_is_scalar_result( GLuint opcode );
@@ -300,10 +308,141 @@ void brw_wm_lookup_iz( GLuint line_aa,
GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp);
void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c);
+/* brw_wm_emit.c */
+void emit_alu1(struct brw_compile *p,
+ struct brw_instruction *(*func)(struct brw_compile *,
+ struct brw_reg,
+ struct brw_reg),
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0);
+void emit_alu2(struct brw_compile *p,
+ struct brw_instruction *(*func)(struct brw_compile *,
+ struct brw_reg,
+ struct brw_reg,
+ struct brw_reg),
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1);
+void emit_cinterp(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0);
void emit_ddxy(struct brw_compile *p,
const struct brw_reg *dst,
GLuint mask,
GLboolean is_ddx,
const struct brw_reg *arg0);
+void emit_delta_xy(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0);
+void emit_dp3(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1);
+void emit_dp4(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1);
+void emit_dph(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1);
+void emit_fb_write(struct brw_wm_compile *c,
+ struct brw_reg *arg0,
+ struct brw_reg *arg1,
+ struct brw_reg *arg2,
+ GLuint target,
+ GLuint eot);
+void emit_frontfacing(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask);
+void emit_linterp(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *deltas);
+void emit_lrp(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1,
+ const struct brw_reg *arg2);
+void emit_mad(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1,
+ const struct brw_reg *arg2);
+void emit_math1(struct brw_wm_compile *c,
+ GLuint function,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0);
+void emit_math2(struct brw_wm_compile *c,
+ GLuint function,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1);
+void emit_min(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1);
+void emit_max(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1);
+void emit_pinterp(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *deltas,
+ const struct brw_reg *w);
+void emit_pixel_xy(struct brw_wm_compile *c,
+ const struct brw_reg *dst,
+ GLuint mask);
+void emit_pixel_w(struct brw_wm_compile *c,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *deltas);
+void emit_sop(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ GLuint cond,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1);
+void emit_tex(struct brw_wm_compile *c,
+ struct brw_reg *dst,
+ GLuint dst_flags,
+ struct brw_reg *arg,
+ struct brw_reg depth_payload,
+ GLuint tex_idx,
+ GLuint sampler,
+ GLboolean shadow);
+void emit_txb(struct brw_wm_compile *c,
+ struct brw_reg *dst,
+ GLuint dst_flags,
+ struct brw_reg *arg,
+ struct brw_reg depth_payload,
+ GLuint tex_idx,
+ GLuint sampler);
+void emit_wpos_xy(struct brw_wm_compile *c,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0);
+void emit_xpd(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1);
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c
index bf80a2942a..5390fd2584 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -44,6 +44,7 @@ static INLINE struct brw_reg sechalf( struct brw_reg reg )
return reg;
}
+
/* Payload R0:
*
* R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
@@ -60,42 +61,50 @@ static INLINE struct brw_reg sechalf( struct brw_reg reg )
* R1.8 -- ?
*/
-
-static void emit_pixel_xy(struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask)
+void emit_pixel_xy(struct brw_wm_compile *c,
+ const struct brw_reg *dst,
+ GLuint mask)
{
+ struct brw_compile *p = &c->func;
struct brw_reg r1 = brw_vec1_grf(1, 0);
struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
+ struct brw_reg dst0_uw, dst1_uw;
+ brw_push_insn_state(p);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ if (c->dispatch_width == 16) {
+ dst0_uw = vec16(retype(dst[0], BRW_REGISTER_TYPE_UW));
+ dst1_uw = vec16(retype(dst[1], BRW_REGISTER_TYPE_UW));
+ } else {
+ dst0_uw = vec8(retype(dst[0], BRW_REGISTER_TYPE_UW));
+ dst1_uw = vec8(retype(dst[1], BRW_REGISTER_TYPE_UW));
+ }
+
/* Calculate pixel centers by adding 1 or 0 to each of the
* micro-tile coordinates passed in r1.
*/
if (mask & WRITEMASK_X) {
brw_ADD(p,
- vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)),
+ dst0_uw,
stride(suboffset(r1_uw, 4), 2, 4, 0),
brw_imm_v(0x10101010));
}
if (mask & WRITEMASK_Y) {
brw_ADD(p,
- vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)),
+ dst1_uw,
stride(suboffset(r1_uw,5), 2, 4, 0),
brw_imm_v(0x11001100));
}
-
- brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+ brw_pop_insn_state(p);
}
-
-static void emit_delta_xy(struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0)
+void emit_delta_xy(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0)
{
struct brw_reg r1 = brw_vec1_grf(1, 0);
@@ -118,10 +127,10 @@ static void emit_delta_xy(struct brw_compile *p,
}
}
-static void emit_wpos_xy(struct brw_wm_compile *c,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0)
+void emit_wpos_xy(struct brw_wm_compile *c,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0)
{
struct brw_compile *p = &c->func;
@@ -146,12 +155,14 @@ static void emit_wpos_xy(struct brw_wm_compile *c,
}
-static void emit_pixel_w( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *deltas)
+void emit_pixel_w(struct brw_wm_compile *c,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *deltas)
{
+ struct brw_compile *p = &c->func;
+
/* Don't need this if all you are doing is interpolating color, for
* instance.
*/
@@ -165,21 +176,29 @@ static void emit_pixel_w( struct brw_compile *p,
brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
/* Calc w */
- brw_math_16( p, dst[3],
- BRW_MATH_FUNCTION_INV,
- BRW_MATH_SATURATE_NONE,
- 2, brw_null_reg(),
- BRW_MATH_PRECISION_FULL);
+ if (c->dispatch_width == 16) {
+ brw_math_16(p, dst[3],
+ BRW_MATH_FUNCTION_INV,
+ BRW_MATH_SATURATE_NONE,
+ 2, brw_null_reg(),
+ BRW_MATH_PRECISION_FULL);
+ } else {
+ brw_math(p, dst[3],
+ BRW_MATH_FUNCTION_INV,
+ BRW_MATH_SATURATE_NONE,
+ 2, brw_null_reg(),
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+ }
}
}
-
-static void emit_linterp( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *deltas )
+void emit_linterp(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *deltas)
{
struct brw_reg interp[4];
GLuint nr = arg0[0].nr;
@@ -199,12 +218,12 @@ static void emit_linterp( struct brw_compile *p,
}
-static void emit_pinterp( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *deltas,
- const struct brw_reg *w)
+void emit_pinterp(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *deltas,
+ const struct brw_reg *w)
{
struct brw_reg interp[4];
GLuint nr = arg0[0].nr;
@@ -229,10 +248,10 @@ static void emit_pinterp( struct brw_compile *p,
}
-static void emit_cinterp( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0 )
+void emit_cinterp(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0)
{
struct brw_reg interp[4];
GLuint nr = arg0[0].nr;
@@ -251,9 +270,9 @@ static void emit_cinterp( struct brw_compile *p,
}
/* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
-static void emit_frontfacing( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask )
+void emit_frontfacing(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask)
{
struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
GLuint i;
@@ -352,13 +371,13 @@ void emit_ddxy(struct brw_compile *p,
brw_set_saturate(p, 0);
}
-static void emit_alu1( struct brw_compile *p,
- struct brw_instruction *(*func)(struct brw_compile *,
- struct brw_reg,
- struct brw_reg),
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0 )
+void emit_alu1(struct brw_compile *p,
+ struct brw_instruction *(*func)(struct brw_compile *,
+ struct brw_reg,
+ struct brw_reg),
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0)
{
GLuint i;
@@ -376,15 +395,15 @@ static void emit_alu1( struct brw_compile *p,
}
-static void emit_alu2( struct brw_compile *p,
- struct brw_instruction *(*func)(struct brw_compile *,
- struct brw_reg,
- struct brw_reg,
- struct brw_reg),
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1 )
+void emit_alu2(struct brw_compile *p,
+ struct brw_instruction *(*func)(struct brw_compile *,
+ struct brw_reg,
+ struct brw_reg,
+ struct brw_reg),
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1)
{
GLuint i;
@@ -402,12 +421,12 @@ static void emit_alu2( struct brw_compile *p,
}
-static void emit_mad( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1,
- const struct brw_reg *arg2 )
+void emit_mad(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1,
+ const struct brw_reg *arg2)
{
GLuint i;
@@ -422,26 +441,12 @@ static void emit_mad( struct brw_compile *p,
}
}
-static void emit_trunc( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0)
-{
- GLuint i;
-
- for (i = 0; i < 4; i++) {
- if (mask & (1<<i)) {
- brw_RNDZ(p, dst[i], arg0[i]);
- }
- }
-}
-
-static void emit_lrp( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1,
- const struct brw_reg *arg2 )
+void emit_lrp(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1,
+ const struct brw_reg *arg2)
{
GLuint i;
@@ -461,21 +466,24 @@ static void emit_lrp( struct brw_compile *p,
}
}
-static void emit_sop( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- GLuint cond,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1 )
+void emit_sop(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ GLuint cond,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1)
{
GLuint i;
for (i = 0; i < 4; i++) {
if (mask & (1<<i)) {
- brw_MOV(p, dst[i], brw_imm_f(0));
+ brw_push_insn_state(p);
brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_MOV(p, dst[i], brw_imm_f(0));
+ brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
brw_MOV(p, dst[i], brw_imm_f(1.0));
- brw_set_predicate_control_flag_value(p, 0xff);
+ brw_pop_insn_state(p);
}
}
}
@@ -559,11 +567,11 @@ static void emit_cmp( struct brw_compile *p,
}
}
-static void emit_max( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1 )
+void emit_max(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1)
{
GLuint i;
@@ -583,11 +591,11 @@ static void emit_max( struct brw_compile *p,
}
}
-static void emit_min( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1 )
+void emit_min(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1)
{
GLuint i;
@@ -608,11 +616,11 @@ static void emit_min( struct brw_compile *p,
}
-static void emit_dp3( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1 )
+void emit_dp3(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1)
{
int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
@@ -630,11 +638,11 @@ static void emit_dp3( struct brw_compile *p,
}
-static void emit_dp4( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1 )
+void emit_dp4(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1)
{
int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
@@ -653,11 +661,11 @@ static void emit_dp4( struct brw_compile *p,
}
-static void emit_dph( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1 )
+void emit_dph(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1)
{
const int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
@@ -676,11 +684,11 @@ static void emit_dph( struct brw_compile *p,
}
-static void emit_xpd( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1 )
+void emit_xpd(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1)
{
GLuint i;
@@ -701,41 +709,68 @@ static void emit_xpd( struct brw_compile *p,
}
-static void emit_math1( struct brw_compile *p,
- GLuint function,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0 )
+void emit_math1(struct brw_wm_compile *c,
+ GLuint function,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0)
{
+ struct brw_compile *p = &c->func;
int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+ GLuint saturate = ((mask & SATURATE) ?
+ BRW_MATH_SATURATE_SATURATE :
+ BRW_MATH_SATURATE_NONE);
if (!(mask & WRITEMASK_XYZW))
return; /* Do not emit dead code */
assert(is_power_of_two(mask & WRITEMASK_XYZW));
+ /* If compressed, this will write message reg 2,3 from arg0.x's 16
+ * channels.
+ */
brw_MOV(p, brw_message_reg(2), arg0[0]);
/* Send two messages to perform all 16 operations:
*/
- brw_math_16(p,
- dst[dst_chan],
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_math(p,
+ dst[dst_chan],
+ function,
+ saturate,
+ 2,
+ brw_null_reg(),
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+
+ if (c->dispatch_width == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_math(p,
+ offset(dst[dst_chan],1),
function,
- (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
- 2,
+ saturate,
+ 3,
brw_null_reg(),
+ BRW_MATH_DATA_VECTOR,
BRW_MATH_PRECISION_FULL);
+ }
+ brw_pop_insn_state(p);
}
-static void emit_math2( struct brw_compile *p,
- GLuint function,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1)
+void emit_math2(struct brw_wm_compile *c,
+ GLuint function,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1)
{
+ struct brw_compile *p = &c->func;
int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+ GLuint saturate = ((mask & SATURATE) ?
+ BRW_MATH_SATURATE_SATURATE :
+ BRW_MATH_SATURATE_NONE);
if (!(mask & WRITEMASK_XYZW))
return; /* Do not emit dead code */
@@ -746,173 +781,231 @@ static void emit_math2( struct brw_compile *p,
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_MOV(p, brw_message_reg(2), arg0[0]);
- brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
- brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
+ if (c->dispatch_width == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
+ }
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_MOV(p, brw_message_reg(3), arg1[0]);
- brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
- brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
+ if (c->dispatch_width == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
+ }
-
- /* Send two messages to perform all 16 operations:
- */
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_math(p,
dst[dst_chan],
function,
- (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
+ saturate,
2,
brw_null_reg(),
BRW_MATH_DATA_VECTOR,
BRW_MATH_PRECISION_FULL);
- brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
- brw_math(p,
- offset(dst[dst_chan],1),
- function,
- (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
- 4,
- brw_null_reg(),
- BRW_MATH_DATA_VECTOR,
- BRW_MATH_PRECISION_FULL);
-
+ /* Send two messages to perform all 16 operations:
+ */
+ if (c->dispatch_width == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_math(p,
+ offset(dst[dst_chan],1),
+ function,
+ saturate,
+ 4,
+ brw_null_reg(),
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+ }
brw_pop_insn_state(p);
}
-
-static void emit_tex( struct brw_wm_compile *c,
- const struct brw_wm_instruction *inst,
- struct brw_reg *dst,
- GLuint dst_flags,
- struct brw_reg *arg )
+void emit_tex(struct brw_wm_compile *c,
+ struct brw_reg *dst,
+ GLuint dst_flags,
+ struct brw_reg *arg,
+ struct brw_reg depth_payload,
+ GLuint tex_idx,
+ GLuint sampler,
+ GLboolean shadow)
{
struct brw_compile *p = &c->func;
- GLuint msgLength, responseLength;
- GLuint i, nr;
+ struct brw_reg dst_retyped;
+ GLuint cur_mrf = 2, response_length;
+ GLuint i, nr_texcoords;
GLuint emit;
GLuint msg_type;
+ GLuint mrf_per_channel;
+ GLuint simd_mode;
+
+ if (c->dispatch_width == 16) {
+ mrf_per_channel = 2;
+ response_length = 8;
+ dst_retyped = retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW);
+ simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
+ } else {
+ mrf_per_channel = 1;
+ response_length = 4;
+ dst_retyped = retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW);
+ simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
+ }
/* How many input regs are there?
*/
- switch (inst->tex_idx) {
+ switch (tex_idx) {
case TEXTURE_1D_INDEX:
emit = WRITEMASK_X;
- nr = 1;
+ nr_texcoords = 1;
break;
case TEXTURE_2D_INDEX:
case TEXTURE_RECT_INDEX:
emit = WRITEMASK_XY;
- nr = 2;
+ nr_texcoords = 2;
break;
case TEXTURE_3D_INDEX:
case TEXTURE_CUBE_INDEX:
emit = WRITEMASK_XYZ;
- nr = 3;
+ nr_texcoords = 3;
break;
default:
/* unexpected target */
abort();
}
- if (inst->tex_shadow) {
- nr = 4;
- emit |= WRITEMASK_W;
- }
+ /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */
+ if (!BRW_IS_IGDNG(p->brw) && c->dispatch_width == 8)
+ nr_texcoords = 3;
- msgLength = 1;
+ /* For shadow comparisons, we have to supply u,v,r. */
+ if (shadow)
+ nr_texcoords = 3;
- for (i = 0; i < nr; i++) {
- static const GLuint swz[4] = {0,1,2,2};
- if (emit & (1<<i))
- brw_MOV(p, brw_message_reg(msgLength+1), arg[swz[i]]);
+ /* Emit the texcoords. */
+ for (i = 0; i < nr_texcoords; i++) {
+ if (emit & (1<<i))
+ brw_MOV(p, brw_message_reg(cur_mrf), arg[i]);
else
- brw_MOV(p, brw_message_reg(msgLength+1), brw_imm_f(0));
- msgLength += 2;
+ brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
+ cur_mrf += mrf_per_channel;
}
- responseLength = 8; /* always */
+ /* Fill in the shadow comparison reference value. */
+ if (shadow) {
+ if (BRW_IS_IGDNG(p->brw)) {
+ /* Fill in the cube map array index value. */
+ brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
+ cur_mrf += mrf_per_channel;
+ } else if (c->dispatch_width == 8) {
+ /* Fill in the LOD bias value. */
+ brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
+ cur_mrf += mrf_per_channel;
+ }
+ brw_MOV(p, brw_message_reg(cur_mrf), arg[2]);
+ cur_mrf += mrf_per_channel;
+ }
if (BRW_IS_IGDNG(p->brw)) {
- if (inst->tex_shadow)
- msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG;
- else
- msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG;
+ if (shadow)
+ msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_IGDNG;
+ else
+ msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_IGDNG;
} else {
- if (inst->tex_shadow)
- msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
- else
- msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
+ /* Note that G45 and older determines shadow compare and dispatch width
+ * from message length for most messages.
+ */
+ if (c->dispatch_width == 16 && shadow)
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
+ else
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
}
- brw_SAMPLE(p,
- retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
+ brw_SAMPLE(p,
+ dst_retyped,
1,
- retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
- SURF_INDEX_TEXTURE(inst->tex_unit),
- inst->tex_unit, /* sampler */
- inst->writemask,
- msg_type,
- responseLength,
- msgLength,
- 0,
+ retype(depth_payload, BRW_REGISTER_TYPE_UW),
+ SURF_INDEX_TEXTURE(sampler),
+ sampler,
+ dst_flags & WRITEMASK_XYZW,
+ msg_type,
+ response_length,
+ cur_mrf - 1,
+ 0,
1,
- BRW_SAMPLER_SIMD_MODE_SIMD16);
+ simd_mode);
}
-static void emit_txb( struct brw_wm_compile *c,
- const struct brw_wm_instruction *inst,
- struct brw_reg *dst,
- GLuint dst_flags,
- struct brw_reg *arg )
+void emit_txb(struct brw_wm_compile *c,
+ struct brw_reg *dst,
+ GLuint dst_flags,
+ struct brw_reg *arg,
+ struct brw_reg depth_payload,
+ GLuint tex_idx,
+ GLuint sampler)
{
struct brw_compile *p = &c->func;
GLuint msgLength;
GLuint msg_type;
- /* Shadow ignored for txb.
+ GLuint mrf_per_channel;
+ GLuint response_length;
+ struct brw_reg dst_retyped;
+
+ /* The G45 and older chipsets don't support 8-wide dispatch for LOD biased
+ * samples, so we'll use the 16-wide instruction, leave the second halves
+ * undefined, and trust the execution mask to keep the undefined pixels
+ * from mattering.
*/
- switch (inst->tex_idx) {
+ if (c->dispatch_width == 16 || !BRW_IS_IGDNG(p->brw)) {
+ if (BRW_IS_IGDNG(p->brw))
+ msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG;
+ else
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
+ mrf_per_channel = 2;
+ dst_retyped = retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW);
+ response_length = 8;
+ } else {
+ msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG;
+ mrf_per_channel = 1;
+ dst_retyped = retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW);
+ response_length = 4;
+ }
+
+ /* Shadow ignored for txb. */
+ switch (tex_idx) {
case TEXTURE_1D_INDEX:
- brw_MOV(p, brw_message_reg(2), arg[0]);
- brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
- brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
+ brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]);
+ brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), brw_imm_f(0));
+ brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), brw_imm_f(0));
break;
case TEXTURE_2D_INDEX:
case TEXTURE_RECT_INDEX:
- brw_MOV(p, brw_message_reg(2), arg[0]);
- brw_MOV(p, brw_message_reg(4), arg[1]);
- brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
+ brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]);
+ brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), arg[1]);
+ brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), brw_imm_f(0));
break;
case TEXTURE_3D_INDEX:
case TEXTURE_CUBE_INDEX:
- brw_MOV(p, brw_message_reg(2), arg[0]);
- brw_MOV(p, brw_message_reg(4), arg[1]);
- brw_MOV(p, brw_message_reg(6), arg[2]);
+ brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]);
+ brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), arg[1]);
+ brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), arg[2]);
break;
default:
/* unexpected target */
abort();
}
- brw_MOV(p, brw_message_reg(8), arg[3]);
- msgLength = 9;
-
- if (BRW_IS_IGDNG(p->brw))
- msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG;
- else
- msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
+ brw_MOV(p, brw_message_reg(2 + 3 * mrf_per_channel), arg[3]);
+ msgLength = 2 + 4 * mrf_per_channel - 1;
brw_SAMPLE(p,
- retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
+ dst_retyped,
1,
- retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
- SURF_INDEX_TEXTURE(inst->tex_unit),
- inst->tex_unit, /* sampler */
- inst->writemask,
+ retype(depth_payload, BRW_REGISTER_TYPE_UW),
+ SURF_INDEX_TEXTURE(sampler),
+ sampler,
+ dst_flags & WRITEMASK_XYZW,
msg_type,
- 8, /* responseLength */
+ response_length,
msgLength,
0,
1,
@@ -920,11 +1013,13 @@ static void emit_txb( struct brw_wm_compile *c,
}
-static void emit_lit( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0 )
+static void emit_lit(struct brw_wm_compile *c,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0)
{
+ struct brw_compile *p = &c->func;
+
assert((mask & WRITEMASK_XW) == 0);
if (mask & WRITEMASK_Y) {
@@ -934,7 +1029,7 @@ static void emit_lit( struct brw_compile *p,
}
if (mask & WRITEMASK_Z) {
- emit_math2(p, BRW_MATH_FUNCTION_POW,
+ emit_math2(c, BRW_MATH_FUNCTION_POW,
&dst[2],
WRITEMASK_X | (mask & SATURATE),
&arg0[1],
@@ -1001,7 +1096,13 @@ static void fire_fb_write( struct brw_wm_compile *c,
GLuint eot )
{
struct brw_compile *p = &c->func;
-
+ struct brw_reg dst;
+
+ if (c->dispatch_width == 16)
+ dst = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
+ else
+ dst = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
+
/* Pass through control information:
*/
/* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
@@ -1018,7 +1119,7 @@ static void fire_fb_write( struct brw_wm_compile *c,
/* Send framebuffer write message: */
/* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
brw_fb_WRITE(p,
- retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW),
+ dst,
base_reg,
retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
target,
@@ -1050,14 +1151,15 @@ static void emit_aa( struct brw_wm_compile *c,
* \param arg1 the pass-through depth value
* \param arg2 the shader-computed depth value
*/
-static void emit_fb_write( struct brw_wm_compile *c,
- struct brw_reg *arg0,
- struct brw_reg *arg1,
- struct brw_reg *arg2,
- GLuint target,
- GLuint eot)
+void emit_fb_write(struct brw_wm_compile *c,
+ struct brw_reg *arg0,
+ struct brw_reg *arg1,
+ struct brw_reg *arg2,
+ GLuint target,
+ GLuint eot)
{
struct brw_compile *p = &c->func;
+ struct brw_context *brw = p->brw;
GLuint nr = 2;
GLuint channel;
@@ -1069,30 +1171,37 @@ static void emit_fb_write( struct brw_wm_compile *c,
/* I don't really understand how this achieves the color interleave
* (ie RGBARGBA) in the result: [Do the saturation here]
*/
- {
- brw_push_insn_state(p);
-
- for (channel = 0; channel < 4; channel++) {
+ brw_push_insn_state(p);
+
+ for (channel = 0; channel < 4; channel++) {
+ if (c->dispatch_width == 16 && (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))) {
+ /* By setting the high bit of the MRF register number, we indicate
+ * that we want COMPR4 mode - instead of doing the usual destination
+ * + 1 for the second half we get destination + 4.
+ */
+ brw_MOV(p,
+ brw_message_reg(nr + channel + (1 << 7)),
+ arg0[channel]);
+ } else {
/* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
/* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
-
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_MOV(p,
brw_message_reg(nr + channel),
arg0[channel]);
-
- brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
- brw_MOV(p,
- brw_message_reg(nr + channel + 4),
- sechalf(arg0[channel]));
- }
- /* skip over the regs populated above:
- */
- nr += 8;
-
- brw_pop_insn_state(p);
+ if (c->dispatch_width == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_MOV(p,
+ brw_message_reg(nr + channel + 4),
+ sechalf(arg0[channel]));
+ }
+ }
}
+ /* skip over the regs populated above:
+ */
+ nr += 8;
+ brw_pop_insn_state(p);
if (c->key.source_depth_to_render_target)
{
@@ -1142,7 +1251,7 @@ static void emit_fb_write( struct brw_wm_compile *c,
get_element_ud(brw_vec8_grf(1,0), 6),
brw_imm_ud(1<<26));
- jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
+ jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
{
emit_aa(c, arg1, 2);
fire_fb_write(c, 0, nr, target, eot);
@@ -1156,7 +1265,6 @@ static void emit_fb_write( struct brw_wm_compile *c,
}
}
-
/**
* Move a GPR to scratch memory.
*/
@@ -1294,7 +1402,7 @@ void brw_wm_emit( struct brw_wm_compile *c )
/* Generated instructions for calculating triangle interpolants:
*/
case WM_PIXELXY:
- emit_pixel_xy(p, dst, dst_flags);
+ emit_pixel_xy(c, dst, dst_flags);
break;
case WM_DELTAXY:
@@ -1306,7 +1414,7 @@ void brw_wm_emit( struct brw_wm_compile *c )
break;
case WM_PIXELW:
- emit_pixel_w(p, dst, dst_flags, args[0], args[1]);
+ emit_pixel_w(c, dst, dst_flags, args[0], args[1]);
break;
case WM_LINTERP:
@@ -1364,7 +1472,7 @@ void brw_wm_emit( struct brw_wm_compile *c )
break;
case OPCODE_TRUNC:
- emit_trunc(p, dst, dst_flags, args[0]);
+ emit_alu1(p, brw_RNDZ, dst, dst_flags, args[0]);
break;
case OPCODE_LRP:
@@ -1391,27 +1499,27 @@ void brw_wm_emit( struct brw_wm_compile *c )
/* Higher math functions:
*/
case OPCODE_RCP:
- emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
+ emit_math1(c, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
break;
case OPCODE_RSQ:
- emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
+ emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
break;
case OPCODE_SIN:
- emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
+ emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
break;
case OPCODE_COS:
- emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
+ emit_math1(c, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
break;
case OPCODE_EX2:
- emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
+ emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
break;
case OPCODE_LG2:
- emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
+ emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
break;
case OPCODE_SCS:
@@ -1419,13 +1527,13 @@ void brw_wm_emit( struct brw_wm_compile *c )
* fixup for 16-element execution.
*/
if (dst_flags & WRITEMASK_X)
- emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
+ emit_math1(c, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
if (dst_flags & WRITEMASK_Y)
- emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
+ emit_math1(c, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
break;
case OPCODE_POW:
- emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
+ emit_math2(c, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
break;
/* Comparisons:
@@ -1463,17 +1571,20 @@ void brw_wm_emit( struct brw_wm_compile *c )
break;
case OPCODE_LIT:
- emit_lit(p, dst, dst_flags, args[0]);
+ emit_lit(c, dst, dst_flags, args[0]);
break;
/* Texturing operations:
*/
case OPCODE_TEX:
- emit_tex(c, inst, dst, dst_flags, args[0]);
+ emit_tex(c, dst, dst_flags, args[0], c->payload.depth[0].hw_reg,
+ inst->tex_idx, inst->tex_unit,
+ inst->tex_shadow);
break;
case OPCODE_TXB:
- emit_txb(c, inst, dst, dst_flags, args[0]);
+ emit_txb(c, dst, dst_flags, args[0], c->payload.depth[0].hw_reg,
+ inst->tex_idx, inst->tex_unit);
break;
case OPCODE_KIL:
diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c
index 4e3edfbbff..3737faf26f 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_fp.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c
@@ -138,7 +138,6 @@ static struct prog_dst_register dst_reg(GLuint file, GLuint idx)
reg.CondMask = COND_TR;
reg.CondSwizzle = 0;
reg.CondSrc = 0;
- reg.pad = 0;
return reg;
}
@@ -181,6 +180,9 @@ static void release_temp( struct brw_wm_compile *c, struct prog_dst_register tem
static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c)
{
+ assert(c->nr_fp_insns < BRW_WM_MAX_INSN);
+ memset(&c->prog_instructions[c->nr_fp_insns], 0,
+ sizeof(*c->prog_instructions));
return &c->prog_instructions[c->nr_fp_insns++];
}
@@ -447,7 +449,6 @@ static void emit_interp( struct brw_wm_compile *c,
break;
case FRAG_ATTRIB_FACE:
- /* XXX review/test this case */
emit_op(c,
WM_FRONTFACING,
dst_mask(dst, WRITEMASK_X),
@@ -956,7 +957,7 @@ static void precalc_txp( struct brw_wm_compile *c,
-static void emit_fb_write( struct brw_wm_compile *c )
+static void emit_render_target_writes( struct brw_wm_compile *c )
{
struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPTH);
@@ -964,36 +965,34 @@ static void emit_fb_write( struct brw_wm_compile *c )
GLuint i;
struct prog_instruction *inst, *last_inst;
- struct brw_context *brw = c->func.brw;
/* The inst->Aux field is used for FB write target and the EOT marker */
- if (brw->state.nr_color_regions > 1) {
- for (i = 0 ; i < brw->state.nr_color_regions; i++) {
+ if (c->key.nr_color_regions > 1) {
+ for (i = 0 ; i < c->key.nr_color_regions; i++) {
outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i);
- last_inst = inst = emit_op(c,
- WM_FB_WRITE, dst_mask(dst_undef(),0), 0,
- outcolor, payload_r0_depth, outdepth);
- inst->Aux = (i<<1);
+ last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0),
+ 0, outcolor, payload_r0_depth, outdepth);
+ inst->Aux = INST_AUX_TARGET(i);
if (c->fp_fragcolor_emitted) {
outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
- last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
+ last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0),
0, outcolor, payload_r0_depth, outdepth);
- inst->Aux = (i<<1);
+ inst->Aux = INST_AUX_TARGET(i);
}
}
- last_inst->Aux |= 1; //eot
+ last_inst->Aux |= INST_AUX_EOT;
}
else {
/* if gl_FragData[0] is written, use it, else use gl_FragColor */
- if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_DATA0))
+ if (c->fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DATA0))
outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0);
else
outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
0, outcolor, payload_r0_depth, outdepth);
- inst->Aux = 1|(0<<1);
+ inst->Aux = INST_AUX_EOT | INST_AUX_TARGET(0);
}
}
@@ -1155,7 +1154,7 @@ void brw_wm_pass_fp( struct brw_wm_compile *c )
out->DstReg.WriteMask = 0;
break;
case OPCODE_END:
- emit_fb_write(c);
+ emit_render_target_writes(c);
break;
case OPCODE_PRINT:
break;
diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
index c9fe1dd8ad..e8c2cb66ec 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
@@ -371,7 +371,7 @@ static void prealloc_reg(struct brw_wm_compile *c)
for (j = 0; j < 4; j++)
set_reg(c, PROGRAM_PAYLOAD, fp_input, j, reg);
}
- if (c->key.vp_outputs_written & (1 << i)) {
+ if (c->key.vp_outputs_written & BITFIELD64_BIT(i)) {
reg_index += 2;
}
}
@@ -550,42 +550,6 @@ static struct brw_reg get_src_reg(struct brw_wm_compile *c,
}
}
-
-/**
- * Same as \sa get_src_reg() but if the register is a literal, emit
- * a brw_reg encoding the literal.
- * Note that a brw instruction only allows one src operand to be a literal.
- * For instructions with more than one operand, only the second can be a
- * literal. This means that we treat some literals as constants/uniforms
- * (which why PROGRAM_CONSTANT is checked in fetch_constants()).
- *
- */
-static struct brw_reg get_src_reg_imm(struct brw_wm_compile *c,
- const struct prog_instruction *inst,
- GLuint srcRegIndex, GLuint channel)
-{
- const struct prog_src_register *src = &inst->SrcReg[srcRegIndex];
- if (src->File == PROGRAM_CONSTANT) {
- /* a literal */
- const int component = GET_SWZ(src->Swizzle, channel);
- const GLfloat *param =
- c->fp->program.Base.Parameters->ParameterValues[src->Index];
- GLfloat value = param[component];
- if (src->Negate & (1 << channel))
- value = -value;
- if (src->Abs)
- value = FABSF(value);
-#if 0
- printf(" form immed value %f for chan %d\n", value, channel);
-#endif
- return brw_imm_f(value);
- }
- else {
- return get_src_reg(c, inst, srcRegIndex, channel);
- }
-}
-
-
/**
* Subroutines are minimal support for resusable instruction sequences.
* They are implemented as simply as possible to minimise overhead: there
@@ -650,542 +614,110 @@ static void invoke_subroutine( struct brw_wm_compile *c,
}
}
-static void emit_trunc( struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- int i;
- struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
- brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
- for (i = 0; i < 4; i++) {
- if (mask & (1<<i)) {
- struct brw_reg src, dst;
- dst = get_dst_reg(c, inst, i);
- src = get_src_reg(c, inst, 0, i);
- brw_RNDZ(p, dst, src);
- }
- }
- brw_set_saturate(p, 0);
-}
-
-static void emit_mov( struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- int i;
- struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
- brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
- for (i = 0; i < 4; i++) {
- if (mask & (1<<i)) {
- struct brw_reg src, dst;
- dst = get_dst_reg(c, inst, i);
- /* XXX some moves from immediate value don't work reliably!!! */
- /*src = get_src_reg_imm(c, inst, 0, i);*/
- src = get_src_reg(c, inst, 0, i);
- brw_MOV(p, dst, src);
- }
- }
- brw_set_saturate(p, 0);
-}
-
-static void emit_pixel_xy(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_reg r1 = brw_vec1_grf(1, 0);
- struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
-
- struct brw_reg dst0, dst1;
- struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
-
- dst0 = get_dst_reg(c, inst, 0);
- dst1 = get_dst_reg(c, inst, 1);
- /* Calculate pixel centers by adding 1 or 0 to each of the
- * micro-tile coordinates passed in r1.
- */
- if (mask & WRITEMASK_X) {
- brw_ADD(p,
- vec8(retype(dst0, BRW_REGISTER_TYPE_UW)),
- stride(suboffset(r1_uw, 4), 2, 4, 0),
- brw_imm_v(0x10101010));
- }
-
- if (mask & WRITEMASK_Y) {
- brw_ADD(p,
- vec8(retype(dst1, BRW_REGISTER_TYPE_UW)),
- stride(suboffset(r1_uw, 5), 2, 4, 0),
- brw_imm_v(0x11001100));
- }
-}
-
-static void emit_delta_xy(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_reg r1 = brw_vec1_grf(1, 0);
- struct brw_reg dst0, dst1, src0, src1;
- struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
-
- dst0 = get_dst_reg(c, inst, 0);
- dst1 = get_dst_reg(c, inst, 1);
- src0 = get_src_reg(c, inst, 0, 0);
- src1 = get_src_reg(c, inst, 0, 1);
- /* Calc delta X,Y by subtracting origin in r1 from the pixel
- * centers.
- */
- if (mask & WRITEMASK_X) {
- brw_ADD(p,
- dst0,
- retype(src0, BRW_REGISTER_TYPE_UW),
- negate(r1));
- }
-
- if (mask & WRITEMASK_Y) {
- brw_ADD(p,
- dst1,
- retype(src1, BRW_REGISTER_TYPE_UW),
- negate(suboffset(r1,1)));
-
- }
-}
-
-static void fire_fb_write( struct brw_wm_compile *c,
- GLuint base_reg,
- GLuint nr,
- GLuint target,
- GLuint eot)
-{
- struct brw_compile *p = &c->func;
- /* Pass through control information:
- */
- /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
- {
- brw_push_insn_state(p);
- brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
- brw_MOV(p,
- brw_message_reg(base_reg + 1),
- brw_vec8_grf(1, 0));
- brw_pop_insn_state(p);
- }
- /* Send framebuffer write message: */
- brw_fb_WRITE(p,
- retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW),
- base_reg,
- retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
- target,
- nr,
- 0,
- eot);
-}
-
-static void emit_fb_write(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- int nr = 2;
- int channel;
- GLuint target, eot;
- struct brw_reg src0;
-
- /* Reserve a space for AA - may not be needed:
- */
- if (c->key.aa_dest_stencil_reg)
- nr += 1;
-
- brw_push_insn_state(p);
- for (channel = 0; channel < 4; channel++) {
- src0 = get_src_reg(c, inst, 0, channel);
- /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
- /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
- brw_MOV(p, brw_message_reg(nr + channel), src0);
- }
- /* skip over the regs populated above: */
- nr += 8;
- brw_pop_insn_state(p);
-
- if (c->key.source_depth_to_render_target) {
- if (c->key.computes_depth) {
- src0 = get_src_reg(c, inst, 2, 2);
- brw_MOV(p, brw_message_reg(nr), src0);
- }
- else {
- src0 = get_src_reg(c, inst, 1, 1);
- brw_MOV(p, brw_message_reg(nr), src0);
- }
-
- nr += 2;
- }
-
- if (c->key.dest_depth_reg) {
- const GLuint comp = c->key.dest_depth_reg / 2;
- const GLuint off = c->key.dest_depth_reg % 2;
-
- if (off != 0) {
- /* XXX this code needs review/testing */
- struct brw_reg arg1_0 = get_src_reg(c, inst, 1, comp);
- struct brw_reg arg1_1 = get_src_reg(c, inst, 1, comp+1);
-
- brw_push_insn_state(p);
- brw_set_compression_control(p, BRW_COMPRESSION_NONE);
-
- brw_MOV(p, brw_message_reg(nr), offset(arg1_0, 1));
- /* 2nd half? */
- brw_MOV(p, brw_message_reg(nr+1), arg1_1);
- brw_pop_insn_state(p);
- }
- else
- {
- struct brw_reg src = get_src_reg(c, inst, 1, 1);
- brw_MOV(p, brw_message_reg(nr), src);
- }
- nr += 2;
- }
-
- target = inst->Aux >> 1;
- eot = inst->Aux & 1;
- fire_fb_write(c, 0, nr, target, eot);
-}
-
-static void emit_pixel_w( struct brw_wm_compile *c,
- const struct prog_instruction *inst)
+/* Workaround for using brw_wm_emit.c's emit functions, which expect
+ * destination regs to be uniquely written. Moves arguments out to
+ * temporaries as necessary for instructions which use their destination as
+ * a temporary.
+ */
+static void
+unalias3(struct brw_wm_compile *c,
+ void (*func)(struct brw_compile *c,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1,
+ const struct brw_reg *arg2),
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1,
+ const struct brw_reg *arg2)
{
struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
- if (mask & WRITEMASK_W) {
- struct brw_reg dst, src0, delta0, delta1;
- struct brw_reg interp3;
-
- dst = get_dst_reg(c, inst, 3);
- src0 = get_src_reg(c, inst, 0, 0);
- delta0 = get_src_reg(c, inst, 1, 0);
- delta1 = get_src_reg(c, inst, 1, 1);
-
- interp3 = brw_vec1_grf(src0.nr+1, 4);
- /* Calc 1/w - just linterp wpos[3] optimized by putting the
- * result straight into a message reg.
- */
- brw_LINE(p, brw_null_reg(), interp3, delta0);
- brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), delta1);
-
- /* Calc w */
- brw_math_16( p, dst,
- BRW_MATH_FUNCTION_INV,
- BRW_MATH_SATURATE_NONE,
- 2, brw_null_reg(),
- BRW_MATH_PRECISION_FULL);
- }
-}
+ struct brw_reg tmp_arg0[4], tmp_arg1[4], tmp_arg2[4];
+ int i, j;
+ int mark = mark_tmps(c);
-static void emit_linterp(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
- struct brw_reg interp[4];
- struct brw_reg dst, delta0, delta1;
- struct brw_reg src0;
- GLuint nr, i;
-
- src0 = get_src_reg(c, inst, 0, 0);
- delta0 = get_src_reg(c, inst, 1, 0);
- delta1 = get_src_reg(c, inst, 1, 1);
- nr = src0.nr;
-
- interp[0] = brw_vec1_grf(nr, 0);
- interp[1] = brw_vec1_grf(nr, 4);
- interp[2] = brw_vec1_grf(nr+1, 0);
- interp[3] = brw_vec1_grf(nr+1, 4);
-
- for(i = 0; i < 4; i++ ) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i);
- brw_LINE(p, brw_null_reg(), interp[i], delta0);
- brw_MAC(p, dst, suboffset(interp[i],1), delta1);
- }
+ for (j = 0; j < 4; j++) {
+ tmp_arg0[j] = arg0[j];
+ tmp_arg1[j] = arg1[j];
+ tmp_arg2[j] = arg2[j];
}
-}
-
-static void emit_cinterp(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
- struct brw_reg interp[4];
- struct brw_reg dst, src0;
- GLuint nr, i;
-
- src0 = get_src_reg(c, inst, 0, 0);
- nr = src0.nr;
-
- interp[0] = brw_vec1_grf(nr, 0);
- interp[1] = brw_vec1_grf(nr, 4);
- interp[2] = brw_vec1_grf(nr+1, 0);
- interp[3] = brw_vec1_grf(nr+1, 4);
-
- for(i = 0; i < 4; i++ ) {
+ for (i = 0; i < 4; i++) {
if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i);
- brw_MOV(p, dst, suboffset(interp[i],3));
+ for (j = 0; j < 4; j++) {
+ if (arg0[j].file == dst[i].file &&
+ dst[i].nr == arg0[j].nr) {
+ tmp_arg0[j] = alloc_tmp(c);
+ brw_MOV(p, tmp_arg0[j], arg0[j]);
+ }
+ if (arg1[j].file == dst[i].file &&
+ dst[i].nr == arg1[j].nr) {
+ tmp_arg1[j] = alloc_tmp(c);
+ brw_MOV(p, tmp_arg1[j], arg1[j]);
+ }
+ if (arg2[j].file == dst[i].file &&
+ dst[i].nr == arg2[j].nr) {
+ tmp_arg2[j] = alloc_tmp(c);
+ brw_MOV(p, tmp_arg2[j], arg2[j]);
+ }
+ }
}
}
-}
-
-static void emit_pinterp(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
-
- struct brw_reg interp[4];
- struct brw_reg dst, delta0, delta1;
- struct brw_reg src0, w;
- GLuint nr, i;
- src0 = get_src_reg(c, inst, 0, 0);
- delta0 = get_src_reg(c, inst, 1, 0);
- delta1 = get_src_reg(c, inst, 1, 1);
- w = get_src_reg(c, inst, 2, 3);
- nr = src0.nr;
+ func(p, dst, mask, tmp_arg0, tmp_arg1, tmp_arg2);
- interp[0] = brw_vec1_grf(nr, 0);
- interp[1] = brw_vec1_grf(nr, 4);
- interp[2] = brw_vec1_grf(nr+1, 0);
- interp[3] = brw_vec1_grf(nr+1, 4);
-
- for(i = 0; i < 4; i++ ) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i);
- brw_LINE(p, brw_null_reg(), interp[i], delta0);
- brw_MAC(p, dst, suboffset(interp[i],1),
- delta1);
- brw_MUL(p, dst, dst, w);
- }
- }
+ release_tmps(c, mark);
}
-/* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
-static void emit_frontfacing(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
+/* Workaround for using brw_wm_emit.c's emit functions, which expect
+ * destination regs to be uniquely written. Moves arguments out to
+ * temporaries as necessary for instructions which use their destination as
+ * a temporary.
+ */
+static void
+unalias2(struct brw_wm_compile *c,
+ void (*func)(struct brw_compile *c,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1),
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1)
{
struct brw_compile *p = &c->func;
- struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
- struct brw_reg dst;
- GLuint mask = inst->DstReg.WriteMask;
- int i;
+ struct brw_reg tmp_arg0[4], tmp_arg1[4];
+ int i, j;
+ int mark = mark_tmps(c);
- for (i = 0; i < 4; i++) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i);
- brw_MOV(p, dst, brw_imm_f(0.0));
- }
+ for (j = 0; j < 4; j++) {
+ tmp_arg0[j] = arg0[j];
+ tmp_arg1[j] = arg1[j];
}
- /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
- * us front face
- */
- brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31));
for (i = 0; i < 4; i++) {
if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i);
- brw_MOV(p, dst, brw_imm_f(1.0));
+ for (j = 0; j < 4; j++) {
+ if (arg0[j].file == dst[i].file &&
+ dst[i].nr == arg0[j].nr) {
+ tmp_arg0[j] = alloc_tmp(c);
+ brw_MOV(p, tmp_arg0[j], arg0[j]);
+ }
+ if (arg1[j].file == dst[i].file &&
+ dst[i].nr == arg1[j].nr) {
+ tmp_arg1[j] = alloc_tmp(c);
+ brw_MOV(p, tmp_arg1[j], arg1[j]);
+ }
+ }
}
}
- brw_set_predicate_control_flag_value(p, 0xff);
-}
-static void emit_xpd(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- int i;
- struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
- for (i = 0; i < 4; i++) {
- GLuint i2 = (i+2)%3;
- GLuint i1 = (i+1)%3;
- if (mask & (1<<i)) {
- struct brw_reg src0, src1, dst;
- dst = get_dst_reg(c, inst, i);
- src0 = negate(get_src_reg(c, inst, 0, i2));
- src1 = get_src_reg_imm(c, inst, 1, i1);
- brw_MUL(p, brw_null_reg(), src0, src1);
- src0 = get_src_reg(c, inst, 0, i1);
- src1 = get_src_reg_imm(c, inst, 1, i2);
- brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
- brw_MAC(p, dst, src0, src1);
- brw_set_saturate(p, 0);
- }
- }
- brw_set_saturate(p, 0);
-}
+ func(p, dst, mask, tmp_arg0, tmp_arg1);
-static void emit_dp3(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_reg src0[3], src1[3], dst;
- int i;
- struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
- int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
-
- if (!(mask & WRITEMASK_XYZW))
- return;
-
- assert(is_power_of_two(mask & WRITEMASK_XYZW));
-
- for (i = 0; i < 3; i++) {
- src0[i] = get_src_reg(c, inst, 0, i);
- src1[i] = get_src_reg_imm(c, inst, 1, i);
- }
-
- dst = get_dst_reg(c, inst, dst_chan);
- brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
- brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
- brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
- brw_MAC(p, dst, src0[2], src1[2]);
- brw_set_saturate(p, 0);
-}
-
-static void emit_dp4(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_reg src0[4], src1[4], dst;
- int i;
- struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
- int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
-
- if (!(mask & WRITEMASK_XYZW))
- return;
-
- assert(is_power_of_two(mask & WRITEMASK_XYZW));
-
- for (i = 0; i < 4; i++) {
- src0[i] = get_src_reg(c, inst, 0, i);
- src1[i] = get_src_reg_imm(c, inst, 1, i);
- }
- dst = get_dst_reg(c, inst, dst_chan);
- brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
- brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
- brw_MAC(p, brw_null_reg(), src0[2], src1[2]);
- brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
- brw_MAC(p, dst, src0[3], src1[3]);
- brw_set_saturate(p, 0);
-}
-
-static void emit_dph(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_reg src0[4], src1[4], dst;
- int i;
- struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
- int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
-
- if (!(mask & WRITEMASK_XYZW))
- return;
-
- assert(is_power_of_two(mask & WRITEMASK_XYZW));
-
- for (i = 0; i < 4; i++) {
- src0[i] = get_src_reg(c, inst, 0, i);
- src1[i] = get_src_reg_imm(c, inst, 1, i);
- }
- dst = get_dst_reg(c, inst, dst_chan);
- brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
- brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
- brw_MAC(p, dst, src0[2], src1[2]);
- brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
- brw_ADD(p, dst, dst, src1[3]);
- brw_set_saturate(p, 0);
-}
-
-/**
- * Emit a scalar instruction, like RCP, RSQ, LOG, EXP.
- * Note that the result of the function is smeared across the dest
- * register's X, Y, Z and W channels (subject to writemasking of course).
- */
-static void emit_math1(struct brw_wm_compile *c,
- const struct prog_instruction *inst, GLuint func)
-{
- struct brw_compile *p = &c->func;
- struct brw_reg src0, dst;
- GLuint mask = inst->DstReg.WriteMask;
- int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
-
- if (!(mask & WRITEMASK_XYZW))
- return;
-
- assert(is_power_of_two(mask & WRITEMASK_XYZW));
-
- /* Get first component of source register */
- dst = get_dst_reg(c, inst, dst_chan);
- src0 = get_src_reg(c, inst, 0, 0);
-
- brw_MOV(p, brw_message_reg(2), src0);
- brw_math(p,
- dst,
- func,
- (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
- 2,
- brw_null_reg(),
- BRW_MATH_DATA_VECTOR,
- BRW_MATH_PRECISION_FULL);
-}
-
-static void emit_rcp(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- emit_math1(c, inst, BRW_MATH_FUNCTION_INV);
-}
-
-static void emit_rsq(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ);
-}
-
-static void emit_sin(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- emit_math1(c, inst, BRW_MATH_FUNCTION_SIN);
-}
-
-static void emit_cos(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- emit_math1(c, inst, BRW_MATH_FUNCTION_COS);
-}
-
-static void emit_ex2(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- emit_math1(c, inst, BRW_MATH_FUNCTION_EXP);
-}
-
-static void emit_lg2(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- emit_math1(c, inst, BRW_MATH_FUNCTION_LOG);
-}
-
-static void emit_add(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- struct brw_reg src0, src1, dst;
- GLuint mask = inst->DstReg.WriteMask;
- int i;
- brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
- for (i = 0 ; i < 4; i++) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i);
- src0 = get_src_reg(c, inst, 0, i);
- src1 = get_src_reg_imm(c, inst, 1, i);
- brw_ADD(p, dst, src0, src1);
- }
- }
- brw_set_saturate(p, 0);
+ release_tmps(c, mark);
}
static void emit_arl(struct brw_wm_compile *c,
@@ -1201,180 +733,6 @@ static void emit_arl(struct brw_wm_compile *c,
brw_set_saturate(p, 0);
}
-
-static void emit_mul(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- struct brw_reg src0, src1, dst;
- GLuint mask = inst->DstReg.WriteMask;
- int i;
- brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
- for (i = 0 ; i < 4; i++) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i);
- src0 = get_src_reg(c, inst, 0, i);
- src1 = get_src_reg_imm(c, inst, 1, i);
- brw_MUL(p, dst, src0, src1);
- }
- }
- brw_set_saturate(p, 0);
-}
-
-static void emit_frc(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- struct brw_reg src0, dst;
- GLuint mask = inst->DstReg.WriteMask;
- int i;
- brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
- for (i = 0 ; i < 4; i++) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i);
- src0 = get_src_reg_imm(c, inst, 0, i);
- brw_FRC(p, dst, src0);
- }
- }
- if (inst->SaturateMode != SATURATE_OFF)
- brw_set_saturate(p, 0);
-}
-
-static void emit_flr(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- struct brw_reg src0, dst;
- GLuint mask = inst->DstReg.WriteMask;
- int i;
- brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
- for (i = 0 ; i < 4; i++) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i);
- src0 = get_src_reg_imm(c, inst, 0, i);
- brw_RNDD(p, dst, src0);
- }
- }
- brw_set_saturate(p, 0);
-}
-
-
-static void emit_min_max(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- const GLuint mask = inst->DstReg.WriteMask;
- const int mark = mark_tmps(c);
- int i;
- brw_push_insn_state(p);
- for (i = 0; i < 4; i++) {
- if (mask & (1<<i)) {
- struct brw_reg real_dst = get_dst_reg(c, inst, i);
- struct brw_reg src0 = get_src_reg(c, inst, 0, i);
- struct brw_reg src1 = get_src_reg(c, inst, 1, i);
- struct brw_reg dst;
- /* if dst==src0 or dst==src1 we need to use a temp reg */
- GLboolean use_temp = brw_same_reg(dst, src0) ||
- brw_same_reg(dst, src1);
- if (use_temp)
- dst = alloc_tmp(c);
- else
- dst = real_dst;
-
- /*
- printf(" Min/max: dst %d src0 %d src1 %d\n",
- dst.nr, src0.nr, src1.nr);
- */
- brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
- brw_MOV(p, dst, src0);
- brw_set_saturate(p, 0);
-
- if (inst->Opcode == OPCODE_MIN)
- brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0);
- else
- brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, src1, src0);
-
- brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
- brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
- brw_MOV(p, dst, src1);
- brw_set_saturate(p, 0);
- brw_set_predicate_control_flag_value(p, 0xff);
- if (use_temp)
- brw_MOV(p, real_dst, dst);
- }
- }
- brw_pop_insn_state(p);
- release_tmps(c, mark);
-}
-
-static void emit_pow(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- struct brw_reg dst, src0, src1;
- GLuint mask = inst->DstReg.WriteMask;
- int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
-
- if (!(mask & WRITEMASK_XYZW))
- return;
-
- assert(is_power_of_two(mask & WRITEMASK_XYZW));
-
- dst = get_dst_reg(c, inst, dst_chan);
- src0 = get_src_reg_imm(c, inst, 0, 0);
- src1 = get_src_reg_imm(c, inst, 1, 0);
-
- brw_MOV(p, brw_message_reg(2), src0);
- brw_MOV(p, brw_message_reg(3), src1);
-
- brw_math(p,
- dst,
- BRW_MATH_FUNCTION_POW,
- (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
- 2,
- brw_null_reg(),
- BRW_MATH_DATA_VECTOR,
- BRW_MATH_PRECISION_FULL);
-}
-
-static void emit_lrp(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
- struct brw_reg dst, tmp1, tmp2, src0, src1, src2;
- int i;
- int mark = mark_tmps(c);
- for (i = 0; i < 4; i++) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i);
- src0 = get_src_reg(c, inst, 0, i);
-
- src1 = get_src_reg_imm(c, inst, 1, i);
-
- if (src1.nr == dst.nr) {
- tmp1 = alloc_tmp(c);
- brw_MOV(p, tmp1, src1);
- } else
- tmp1 = src1;
-
- src2 = get_src_reg(c, inst, 2, i);
- if (src2.nr == dst.nr) {
- tmp2 = alloc_tmp(c);
- brw_MOV(p, tmp2, src2);
- } else
- tmp2 = src2;
-
- brw_ADD(p, dst, negate(src0), brw_imm_f(1.0));
- brw_MUL(p, brw_null_reg(), dst, tmp2);
- brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
- brw_MAC(p, dst, src0, tmp1);
- brw_set_saturate(p, 0);
- }
- release_tmps(c, mark);
- }
-}
-
/**
* For GLSL shaders, this KIL will be unconditional.
* It may be contained inside an IF/ENDIF structure of course.
@@ -1390,89 +748,6 @@ static void emit_kil(struct brw_wm_compile *c)
brw_pop_insn_state(p);
}
-static void emit_mad(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
- struct brw_reg dst, src0, src1, src2;
- int i;
-
- for (i = 0; i < 4; i++) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i);
- src0 = get_src_reg(c, inst, 0, i);
- src1 = get_src_reg_imm(c, inst, 1, i);
- src2 = get_src_reg_imm(c, inst, 2, i);
- brw_MUL(p, dst, src0, src1);
-
- brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
- brw_ADD(p, dst, dst, src2);
- brw_set_saturate(p, 0);
- }
- }
-}
-
-static void emit_sop(struct brw_wm_compile *c,
- const struct prog_instruction *inst, GLuint cond)
-{
- struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
- struct brw_reg dst, src0, src1;
- int i;
-
- for (i = 0; i < 4; i++) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i);
- src0 = get_src_reg(c, inst, 0, i);
- src1 = get_src_reg_imm(c, inst, 1, i);
- brw_push_insn_state(p);
- brw_CMP(p, brw_null_reg(), cond, src0, src1);
- brw_set_predicate_control(p, BRW_PREDICATE_NONE);
- brw_MOV(p, dst, brw_imm_f(0.0));
- brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
- brw_MOV(p, dst, brw_imm_f(1.0));
- brw_pop_insn_state(p);
- }
- }
-}
-
-static void emit_slt(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- emit_sop(c, inst, BRW_CONDITIONAL_L);
-}
-
-static void emit_sle(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- emit_sop(c, inst, BRW_CONDITIONAL_LE);
-}
-
-static void emit_sgt(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- emit_sop(c, inst, BRW_CONDITIONAL_G);
-}
-
-static void emit_sge(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- emit_sop(c, inst, BRW_CONDITIONAL_GE);
-}
-
-static void emit_seq(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- emit_sop(c, inst, BRW_CONDITIONAL_EQ);
-}
-
-static void emit_sne(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- emit_sop(c, inst, BRW_CONDITIONAL_NEQ);
-}
-
static INLINE struct brw_reg high_words( struct brw_reg reg )
{
return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_W ), 1 ),
@@ -2525,196 +1800,6 @@ static void emit_noise4( struct brw_wm_compile *c,
release_tmps( c, mark );
}
-
-static void emit_wpos_xy(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
- struct brw_reg src0[2], dst[2];
-
- dst[0] = get_dst_reg(c, inst, 0);
- dst[1] = get_dst_reg(c, inst, 1);
-
- src0[0] = get_src_reg(c, inst, 0, 0);
- src0[1] = get_src_reg(c, inst, 0, 1);
-
- /* Calculate the pixel offset from window bottom left into destination
- * X and Y channels.
- */
- if (mask & WRITEMASK_X) {
- /* X' = X - origin_x */
- brw_ADD(p,
- dst[0],
- retype(src0[0], BRW_REGISTER_TYPE_W),
- brw_imm_d(0 - c->key.origin_x));
- }
-
- if (mask & WRITEMASK_Y) {
- /* Y' = height - (Y - origin_y) = height + origin_y - Y */
- brw_ADD(p,
- dst[1],
- negate(retype(src0[1], BRW_REGISTER_TYPE_W)),
- brw_imm_d(c->key.origin_y + c->key.drawable_height - 1));
- }
-}
-
-/* TODO
- BIAS on SIMD8 not working yet...
- */
-static void emit_txb(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- struct brw_reg dst[4], src[4], payload_reg;
- /* Note: TexSrcUnit was already looked up through SamplerTextures[] */
- const GLuint unit = inst->TexSrcUnit;
- GLuint i;
- GLuint msg_type;
-
- assert(unit < BRW_MAX_TEX_UNIT);
-
- payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0);
-
- for (i = 0; i < 4; i++)
- dst[i] = get_dst_reg(c, inst, i);
- for (i = 0; i < 4; i++)
- src[i] = get_src_reg(c, inst, 0, i);
-
- switch (inst->TexSrcTarget) {
- case TEXTURE_1D_INDEX:
- brw_MOV(p, brw_message_reg(2), src[0]); /* s coord */
- brw_MOV(p, brw_message_reg(3), brw_imm_f(0)); /* t coord */
- brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); /* r coord */
- break;
- case TEXTURE_2D_INDEX:
- case TEXTURE_RECT_INDEX:
- brw_MOV(p, brw_message_reg(2), src[0]);
- brw_MOV(p, brw_message_reg(3), src[1]);
- brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
- break;
- case TEXTURE_3D_INDEX:
- case TEXTURE_CUBE_INDEX:
- brw_MOV(p, brw_message_reg(2), src[0]);
- brw_MOV(p, brw_message_reg(3), src[1]);
- brw_MOV(p, brw_message_reg(4), src[2]);
- break;
- default:
- /* invalid target */
- abort();
- }
- brw_MOV(p, brw_message_reg(5), src[3]); /* bias */
- brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); /* ref (unused?) */
-
- if (BRW_IS_IGDNG(p->brw)) {
- msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG;
- } else {
- /* Does it work well on SIMD8? */
- msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
- }
-
- brw_SAMPLE(p,
- retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */
- 1, /* msg_reg_nr */
- retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */
- SURF_INDEX_TEXTURE(unit),
- unit, /* sampler */
- inst->DstReg.WriteMask, /* writemask */
- msg_type, /* msg_type */
- 4, /* response_length */
- 4, /* msg_length */
- 0, /* eot */
- 1,
- BRW_SAMPLER_SIMD_MODE_SIMD8);
-}
-
-
-static void emit_tex(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- struct brw_reg dst[4], src[4], payload_reg;
- /* Note: TexSrcUnit was already looked up through SamplerTextures[] */
- const GLuint unit = inst->TexSrcUnit;
- GLuint msg_len;
- GLuint i, nr;
- GLuint emit;
- GLboolean shadow = (c->key.shadowtex_mask & (1<<unit)) ? 1 : 0;
- GLuint msg_type;
-
- assert(unit < BRW_MAX_TEX_UNIT);
-
- payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0);
-
- for (i = 0; i < 4; i++)
- dst[i] = get_dst_reg(c, inst, i);
- for (i = 0; i < 4; i++)
- src[i] = get_src_reg(c, inst, 0, i);
-
- switch (inst->TexSrcTarget) {
- case TEXTURE_1D_INDEX:
- emit = WRITEMASK_X;
- nr = 1;
- break;
- case TEXTURE_2D_INDEX:
- case TEXTURE_RECT_INDEX:
- emit = WRITEMASK_XY;
- nr = 2;
- break;
- case TEXTURE_3D_INDEX:
- case TEXTURE_CUBE_INDEX:
- emit = WRITEMASK_XYZ;
- nr = 3;
- break;
- default:
- /* invalid target */
- abort();
- }
- msg_len = 1;
-
- /* move/load S, T, R coords */
- for (i = 0; i < nr; i++) {
- static const GLuint swz[4] = {0,1,2,2};
- if (emit & (1<<i))
- brw_MOV(p, brw_message_reg(msg_len+1), src[swz[i]]);
- else
- brw_MOV(p, brw_message_reg(msg_len+1), brw_imm_f(0));
- msg_len += 1;
- }
-
- if (shadow) {
- brw_MOV(p, brw_message_reg(5), brw_imm_f(0)); /* lod / bias */
- brw_MOV(p, brw_message_reg(6), src[2]); /* ref value / R coord */
- }
-
- if (BRW_IS_IGDNG(p->brw)) {
- if (shadow)
- msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG;
- else
- msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG;
- } else {
- /* Does it work for shadow on SIMD8 ? */
- msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
- }
-
- brw_SAMPLE(p,
- retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */
- 1, /* msg_reg_nr */
- retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */
- SURF_INDEX_TEXTURE(unit),
- unit, /* sampler */
- inst->DstReg.WriteMask, /* writemask */
- msg_type, /* msg_type */
- 4, /* response_length */
- shadow ? 6 : 4, /* msg_length */
- 0, /* eot */
- 1,
- BRW_SAMPLER_SIMD_MODE_SIMD8);
-
- if (shadow)
- brw_MOV(p, dst[3], brw_imm_f(1.0));
-}
-
/**
* Resolve subroutine calls after code emit is done.
@@ -2771,6 +1856,21 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
if (c->fp->use_const_buffer)
fetch_constants(c, inst);
+ if (inst->Opcode != OPCODE_ARL) {
+ for (j = 0; j < 4; j++) {
+ if (inst->DstReg.WriteMask & (1 << j))
+ dst[j] = get_dst_reg(c, inst, j);
+ else
+ dst[j] = brw_null_reg();
+ }
+ }
+ for (j = 0; j < brw_wm_nr_args(inst->Opcode); j++)
+ get_argument_regs(c, inst, j, args[j], WRITEMASK_XYZW);
+
+ dst_flags = inst->DstReg.WriteMask;
+ if (inst->SaturateMode == SATURATE_ZERO_ONE)
+ dst_flags |= SATURATE;
+
if (inst->CondUpdate)
brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
else
@@ -2782,126 +1882,131 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
switch (inst->Opcode) {
case WM_PIXELXY:
- emit_pixel_xy(c, inst);
+ emit_pixel_xy(c, dst, dst_flags);
break;
case WM_DELTAXY:
- emit_delta_xy(c, inst);
+ emit_delta_xy(p, dst, dst_flags, args[0]);
break;
case WM_PIXELW:
- emit_pixel_w(c, inst);
+ emit_pixel_w(c, dst, dst_flags, args[0], args[1]);
break;
case WM_LINTERP:
- emit_linterp(c, inst);
+ emit_linterp(p, dst, dst_flags, args[0], args[1]);
break;
case WM_PINTERP:
- emit_pinterp(c, inst);
+ emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
break;
case WM_CINTERP:
- emit_cinterp(c, inst);
+ emit_cinterp(p, dst, dst_flags, args[0]);
break;
case WM_WPOSXY:
- emit_wpos_xy(c, inst);
+ emit_wpos_xy(c, dst, dst_flags, args[0]);
break;
case WM_FB_WRITE:
- emit_fb_write(c, inst);
+ emit_fb_write(c, args[0], args[1], args[2],
+ INST_AUX_GET_TARGET(inst->Aux),
+ inst->Aux & INST_AUX_EOT);
break;
case WM_FRONTFACING:
- emit_frontfacing(c, inst);
+ emit_frontfacing(p, dst, dst_flags);
break;
case OPCODE_ADD:
- emit_add(c, inst);
+ emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
break;
case OPCODE_ARL:
emit_arl(c, inst);
break;
case OPCODE_FRC:
- emit_frc(c, inst);
+ emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
break;
case OPCODE_FLR:
- emit_flr(c, inst);
+ emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
break;
case OPCODE_LRP:
- emit_lrp(c, inst);
+ unalias3(c, emit_lrp,
+ dst, dst_flags, args[0], args[1], args[2]);
break;
case OPCODE_TRUNC:
- emit_trunc(c, inst);
+ emit_alu1(p, brw_RNDZ, dst, dst_flags, args[0]);
break;
case OPCODE_MOV:
case OPCODE_SWZ:
- emit_mov(c, inst);
+ emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
break;
case OPCODE_DP3:
- emit_dp3(c, inst);
+ emit_dp3(p, dst, dst_flags, args[0], args[1]);
break;
case OPCODE_DP4:
- emit_dp4(c, inst);
+ emit_dp4(p, dst, dst_flags, args[0], args[1]);
break;
case OPCODE_XPD:
- emit_xpd(c, inst);
+ emit_xpd(p, dst, dst_flags, args[0], args[1]);
break;
case OPCODE_DPH:
- emit_dph(c, inst);
+ emit_dph(p, dst, dst_flags, args[0], args[1]);
break;
case OPCODE_RCP:
- emit_rcp(c, inst);
+ emit_math1(c, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
break;
case OPCODE_RSQ:
- emit_rsq(c, inst);
+ emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
break;
case OPCODE_SIN:
- emit_sin(c, inst);
+ emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
break;
case OPCODE_COS:
- emit_cos(c, inst);
+ emit_math1(c, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
break;
case OPCODE_EX2:
- emit_ex2(c, inst);
+ emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
break;
case OPCODE_LG2:
- emit_lg2(c, inst);
+ emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
break;
case OPCODE_MIN:
+ unalias2(c, emit_min, dst, dst_flags, args[0], args[1]);
+ break;
case OPCODE_MAX:
- emit_min_max(c, inst);
+ unalias2(c, emit_max, dst, dst_flags, args[0], args[1]);
break;
case OPCODE_DDX:
case OPCODE_DDY:
- for (j = 0; j < 4; j++) {
- if (inst->DstReg.WriteMask & (1 << j))
- dst[j] = get_dst_reg(c, inst, j);
- else
- dst[j] = brw_null_reg();
- }
- get_argument_regs(c, inst, 0, args[0], WRITEMASK_XYZW);
emit_ddxy(p, dst, dst_flags, (inst->Opcode == OPCODE_DDX),
args[0]);
break;
case OPCODE_SLT:
- emit_slt(c, inst);
+ emit_sop(p, dst, dst_flags,
+ BRW_CONDITIONAL_L, args[0], args[1]);
break;
case OPCODE_SLE:
- emit_sle(c, inst);
+ emit_sop(p, dst, dst_flags,
+ BRW_CONDITIONAL_LE, args[0], args[1]);
break;
case OPCODE_SGT:
- emit_sgt(c, inst);
+ emit_sop(p, dst, dst_flags,
+ BRW_CONDITIONAL_G, args[0], args[1]);
break;
case OPCODE_SGE:
- emit_sge(c, inst);
+ emit_sop(p, dst, dst_flags,
+ BRW_CONDITIONAL_GE, args[0], args[1]);
break;
case OPCODE_SEQ:
- emit_seq(c, inst);
+ emit_sop(p, dst, dst_flags,
+ BRW_CONDITIONAL_EQ, args[0], args[1]);
break;
case OPCODE_SNE:
- emit_sne(c, inst);
+ emit_sop(p, dst, dst_flags,
+ BRW_CONDITIONAL_NEQ, args[0], args[1]);
break;
case OPCODE_MUL:
- emit_mul(c, inst);
+ emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
break;
case OPCODE_POW:
- emit_pow(c, inst);
+ emit_math2(c, BRW_MATH_FUNCTION_POW,
+ dst, dst_flags, args[0], args[1]);
break;
case OPCODE_MAD:
- emit_mad(c, inst);
+ emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
break;
case OPCODE_NOISE1:
emit_noise1(c, inst);
@@ -2916,10 +2021,19 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
emit_noise4(c, inst);
break;
case OPCODE_TEX:
- emit_tex(c, inst);
+ emit_tex(c, dst, dst_flags, args[0],
+ get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH,
+ 0, 1, 0, 0),
+ inst->TexSrcTarget,
+ inst->TexSrcUnit,
+ (c->key.shadowtex_mask & (1 << inst->TexSrcUnit)) != 0);
break;
case OPCODE_TXB:
- emit_txb(c, inst);
+ emit_txb(c, dst, dst_flags, args[0],
+ get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH,
+ 0, 1, 0, 0),
+ inst->TexSrcTarget,
+ c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]);
break;
case OPCODE_KIL_NV:
emit_kil(c);
diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c
index 6279258339..ff4c082d5e 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_pass0.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_pass0.c
@@ -42,12 +42,14 @@
static struct brw_wm_ref *get_ref( struct brw_wm_compile *c )
{
assert(c->nr_refs < BRW_WM_MAX_REF);
+ memset(&c->refs[c->nr_refs], 0, sizeof(*c->refs));
return &c->refs[c->nr_refs++];
}
static struct brw_wm_value *get_value( struct brw_wm_compile *c)
{
assert(c->nr_refs < BRW_WM_MAX_VREG);
+ memset(&c->vreg[c->nr_vreg], 0, sizeof(*c->vreg));
return &c->vreg[c->nr_vreg++];
}
@@ -55,6 +57,7 @@ static struct brw_wm_value *get_value( struct brw_wm_compile *c)
static struct brw_wm_instruction *get_instruction( struct brw_wm_compile *c )
{
assert(c->nr_insns < BRW_WM_MAX_INSN);
+ memset(&c->instruction[c->nr_insns], 0, sizeof(*c->instruction));
return &c->instruction[c->nr_insns++];
}
@@ -322,8 +325,8 @@ translate_insn(struct brw_wm_compile *c,
out->tex_unit = inst->TexSrcUnit;
out->tex_idx = inst->TexSrcTarget;
out->tex_shadow = inst->TexShadow;
- out->eot = inst->Aux & 1;
- out->target = inst->Aux >> 1;
+ out->eot = inst->Aux & INST_AUX_EOT;
+ out->target = INST_AUX_GET_TARGET(inst->Aux);
/* Args:
*/
diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass2.c b/src/mesa/drivers/dri/i965/brw_wm_pass2.c
index 6faea018fb..31303febf0 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_pass2.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_pass2.c
@@ -82,8 +82,8 @@ static void init_registers( struct brw_wm_compile *c )
for (j = 0; j < c->nr_creg; j++)
prealloc_reg(c, &c->creg[j], i++);
- for (j = 0; j < FRAG_ATTRIB_MAX; j++) {
- if (c->key.vp_outputs_written & (1<<j)) {
+ for (j = 0; j < VERT_RESULT_MAX; j++) {
+ if (c->key.vp_outputs_written & BITFIELD64_BIT(j)) {
int fp_index;
if (j >= VERT_RESULT_VAR0)
diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
index dff466587a..aa2e519588 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
@@ -66,19 +66,6 @@ static GLuint translate_wrap_mode( GLenum wrap )
}
}
-
-static GLuint U_FIXED(GLfloat value, GLuint frac_bits)
-{
- value *= (1<<frac_bits);
- return value < 0 ? 0 : value;
-}
-
-static GLint S_FIXED(GLfloat value, GLuint frac_bits)
-{
- return value * (1<<frac_bits);
-}
-
-
static dri_bo *upload_default_color( struct brw_context *brw,
const GLfloat *color )
{
@@ -86,8 +73,8 @@ static dri_bo *upload_default_color( struct brw_context *brw,
COPY_4V(sdc.color, color);
- return brw_cache_data( &brw->cache, BRW_SAMPLER_DEFAULT_COLOR, &sdc,
- NULL, 0 );
+ return brw_cache_data(&brw->cache, BRW_SAMPLER_DEFAULT_COLOR,
+ &sdc, sizeof(sdc), NULL, 0);
}
@@ -228,8 +215,8 @@ static void brw_update_sampler_state(struct wm_sampler_entry *key,
*/
sampler->ss0.base_level = U_FIXED(0, 1);
- sampler->ss1.max_lod = U_FIXED(MIN2(MAX2(key->maxlod, 0), 13), 6);
- sampler->ss1.min_lod = U_FIXED(MIN2(MAX2(key->minlod, 0), 13), 6);
+ sampler->ss1.max_lod = U_FIXED(CLAMP(key->maxlod, 0, 13), 6);
+ sampler->ss1.min_lod = U_FIXED(CLAMP(key->minlod, 0, 13), 6);
sampler->ss2.default_color_pointer = sdc_bo->offset >> 5; /* reloc */
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c
index 361f91292b..f89ed9bce7 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@@ -106,7 +106,7 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
/* as far as we can tell */
key->computes_depth =
- (fp->Base.OutputsWritten & (1 << FRAG_RESULT_DEPTH)) != 0;
+ (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) != 0;
/* BRW_NEW_DEPTH_BUFFER
* Override for NULL depthbuffer case, required by the Pixel Shader Computed
* Depth field.
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index f7cc5153a8..47035cc6fc 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -31,7 +31,6 @@
#include "main/mtypes.h"
-#include "main/texformat.h"
#include "main/texstore.h"
#include "shader/prog_parameter.h"
@@ -70,7 +69,8 @@ static GLuint translate_tex_target( GLenum target )
}
-static GLuint translate_tex_format( GLuint mesa_format, GLenum internal_format,
+static GLuint translate_tex_format( gl_format mesa_format,
+ GLenum internal_format,
GLenum depth_mode )
{
switch( mesa_format ) {
@@ -86,6 +86,9 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum internal_format,
case MESA_FORMAT_AL88:
return BRW_SURFACEFORMAT_L8A8_UNORM;
+ case MESA_FORMAT_AL1616:
+ return BRW_SURFACEFORMAT_L16A16_UNORM;
+
case MESA_FORMAT_RGB888:
assert(0); /* not supported for sampling */
return BRW_SURFACEFORMAT_R8G8B8_UNORM;
@@ -96,7 +99,11 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum internal_format,
else
return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+ case MESA_FORMAT_XRGB8888:
+ return BRW_SURFACEFORMAT_B8G8R8X8_UNORM;
+
case MESA_FORMAT_RGBA8888_REV:
+ _mesa_problem(NULL, "unexpected format in i965:translate_tex_format()");
if (internal_format == GL_RGB)
return BRW_SURFACEFORMAT_R8G8B8X8_UNORM;
else
@@ -287,7 +294,7 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit )
key.bo = NULL;
key.offset = intelObj->textureOffset;
} else {
- key.format = firstImage->TexFormat->MesaFormat;
+ key.format = firstImage->TexFormat;
key.internal_format = firstImage->InternalFormat;
key.pitch = intelObj->mt->pitch;
key.depth = firstImage->Depth;
@@ -354,7 +361,10 @@ brw_create_constant_surface( struct brw_context *brw,
NULL, NULL);
if (key->bo) {
- /* Emit relocation to surface contents */
+ /* Emit relocation to surface contents. Section 5.1.1 of the gen4
+ * bspec ("Data Cache") says that the data cache does not exist as
+ * a separate cache and is just the sampler cache.
+ */
dri_bo_emit_reloc(bo,
I915_GEM_DOMAIN_SAMPLER, 0,
0,
@@ -527,10 +537,13 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
region_bo = region->buffer;
key.surface_type = BRW_SURFACE_2D;
- switch (irb->texformat->MesaFormat) {
+ switch (irb->texformat) {
case MESA_FORMAT_ARGB8888:
key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
break;
+ case MESA_FORMAT_XRGB8888:
+ key.surface_format = BRW_SURFACEFORMAT_B8G8R8X8_UNORM;
+ break;
case MESA_FORMAT_RGB565:
key.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM;
break;
@@ -541,8 +554,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
key.surface_format = BRW_SURFACEFORMAT_B4G4R4A4_UNORM;
break;
default:
- _mesa_problem(ctx, "Bad renderbuffer format: %d\n",
- irb->texformat->MesaFormat);
+ _mesa_problem(ctx, "Bad renderbuffer format: %d\n", irb->texformat);
}
key.tiling = region->tiling;
if (brw->intel.intelScreen->driScrnPriv->dri2.enabled) {
@@ -564,6 +576,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
key.cpp = 4;
key.draw_offset = 0;
}
+ /* _NEW_COLOR */
memcpy(key.color_mask, ctx->Color.ColorMask,
sizeof(key.color_mask));
key.color_blend = (!ctx->Color._LogicOpEnabled &&
@@ -696,7 +709,7 @@ static void prepare_wm_surfaces(struct brw_context *brw )
GLuint i;
int old_nr_surfaces;
- /* _NEW_BUFFERS */
+ /* _NEW_BUFFERS | _NEW_COLOR */
/* Update surfaces for drawing buffers */
if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
@@ -709,7 +722,7 @@ static void prepare_wm_surfaces(struct brw_context *brw )
}
old_nr_surfaces = brw->wm.nr_surfaces;
- brw->wm.nr_surfaces = MAX_DRAW_BUFFERS;
+ brw->wm.nr_surfaces = BRW_MAX_DRAW_BUFFERS;
if (brw->wm.surf_bo[SURF_INDEX_FRAG_CONST_BUFFER] != NULL)
brw->wm.nr_surfaces = SURF_INDEX_FRAG_CONST_BUFFER + 1;