summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/i965
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/i965')
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.c1
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h22
-rw-r--r--src/mesa/drivers/dri/i965/brw_curbe.c10
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw.c55
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw_upload.c20
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c8
-rw-r--r--src/mesa/drivers/dri/i965/brw_misc_state.c86
-rw-r--r--src/mesa/drivers/dri/i965/brw_queryobj.c8
-rw-r--r--src/mesa/drivers/dri/i965/brw_state.h19
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_batch.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_upload.c47
-rw-r--r--src/mesa/drivers/dri/i965/brw_vtbl.c1
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.h4
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_glsl.c412
15 files changed, 589 insertions, 108 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 474158b484..e2bc08a6cb 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -39,6 +39,7 @@
#include "brw_context.h"
#include "brw_defines.h"
#include "brw_draw.h"
+#include "brw_state.h"
#include "brw_vs.h"
#include "intel_tex.h"
#include "intel_blit.h"
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 1c6a0dede0..e3904be977 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -433,7 +433,6 @@ struct brw_context
GLuint primitive;
GLboolean emit_state_always;
- GLboolean wrap;
GLboolean tmp_fallback;
GLboolean no_batch_wrap;
@@ -445,6 +444,19 @@ struct brw_context
GLuint nr_draw_regions;
struct intel_region *draw_regions[MAX_DRAW_BUFFERS];
struct intel_region *depth_region;
+
+ /**
+ * List of buffers accumulated in brw_validate_state to receive
+ * dri_bo_check_aperture treatment before exec, so we can know if we
+ * should flush the batch and try again before emitting primitives.
+ *
+ * This can be a fixed number as we only have a limited number of
+ * objects referenced from the batchbuffer in a primitive emit,
+ * consisting of the vertex buffers, pipelined state pointers,
+ * the CURBE, the depth buffer, and a query BO.
+ */
+ dri_bo *validated_bos[VERT_ATTRIB_MAX + 16];
+ int validated_bo_count;
} state;
struct brw_state_pointers attribs;
@@ -680,14 +692,6 @@ void brw_emit_query_begin(struct brw_context *brw);
void brw_emit_query_end(struct brw_context *brw);
/*======================================================================
- * brw_state.c
- */
-void brw_validate_state( struct brw_context *brw );
-void brw_init_state( struct brw_context *brw );
-void brw_destroy_state( struct brw_context *brw );
-
-
-/*======================================================================
* brw_state_dump.c
*/
void brw_debug_batch(struct intel_context *intel);
diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c
index 6ffa221d66..c7bac7b0c5 100644
--- a/src/mesa/drivers/dri/i965/brw_curbe.c
+++ b/src/mesa/drivers/dri/i965/brw_curbe.c
@@ -307,6 +307,7 @@ static void prepare_constant_buffer(struct brw_context *brw)
dri_bo_subdata(brw->curbe.curbe_bo, brw->curbe.curbe_offset, bufsz, buf);
}
+ brw_add_validated_bo(brw, brw->curbe.curbe_bo);
/* Because this provokes an action (ie copy the constants into the
* URB), it shouldn't be shortcircuited if identical to the
@@ -328,15 +329,6 @@ static void emit_constant_buffer(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
GLuint sz = brw->curbe.total_size;
- dri_bo *aper_array[] = {
- brw->intel.batch->buf,
- brw->curbe.curbe_bo,
- };
-
- if (dri_bufmgr_check_aperture_space(aper_array, ARRAY_SIZE(aper_array))) {
- intel_batchbuffer_flush(intel->batch);
- return;
- }
BEGIN_BATCH(2, IGNORE_CLIPRECTS);
if (sz == 0) {
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index 6c71b4abcf..d87b8f8a84 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -256,6 +256,7 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
struct intel_context *intel = intel_context(ctx);
struct brw_context *brw = brw_context(ctx);
GLboolean retval = GL_FALSE;
+ GLboolean warn = GL_FALSE;
GLuint i;
if (ctx->NewState)
@@ -282,30 +283,25 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
LOCK_HARDWARE(intel);
- if (brw->intel.numClipRects == 0) {
+ if (!intel->constant_cliprect && intel->driDrawable->numClipRects == 0) {
UNLOCK_HARDWARE(intel);
return GL_TRUE;
}
+ /* Flush the batch if it's approaching full, so that we don't wrap while
+ * we've got validated state that needs to be in the same batch as the
+ * primitives. This fraction is just a guess (minimal full state plus
+ * a primitive is around 512 bytes), and would be better if we had
+ * an upper bound of how much we might emit in a single
+ * brw_try_draw_prims().
+ */
+ intel_batchbuffer_require_space(intel->batch, intel->batch->size / 4,
+ LOOP_CLIPRECTS);
{
- /* Flush the batch if it's approaching full, so that we don't wrap while
- * we've got validated state that needs to be in the same batch as the
- * primitives. This fraction is just a guess (minimal full state plus
- * a primitive is around 512 bytes), and would be better if we had
- * an upper bound of how much we might emit in a single
- * brw_try_draw_prims().
- */
- if (intel->batch->ptr - intel->batch->map > intel->batch->size * 3 / 4
- /* brw_emit_prim may change the cliprect_mode to LOOP_CLIPRECTS */
- || intel->batch->cliprect_mode != LOOP_CLIPRECTS)
- intel_batchbuffer_flush(intel->batch);
-
/* Set the first primitive early, ahead of validate_state:
*/
brw_set_prim(brw, prim[0].mode);
- /* XXX: Need to separate validate and upload of state.
- */
brw_validate_state( brw );
/* Various fallback checks:
@@ -316,6 +312,31 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
if (check_fallbacks( brw, prim, nr_prims ))
goto out;
+ /* Check that we can fit our state in with our existing batchbuffer, or
+ * flush otherwise.
+ */
+ if (dri_bufmgr_check_aperture_space(brw->state.validated_bos,
+ brw->state.validated_bo_count)) {
+ static GLboolean warned;
+ intel_batchbuffer_flush(intel->batch);
+
+ /* Validate the state after we flushed the batch (which would have
+ * changed the set of dirty state). If we still fail to
+ * check_aperture, warn of what's happening, but attempt to continue
+ * on since it may succeed anyway, and the user would probably rather
+ * see a failure and a warning than a fallback.
+ */
+ brw_validate_state(brw);
+ if (!warned &&
+ dri_bufmgr_check_aperture_space(brw->state.validated_bos,
+ brw->state.validated_bo_count)) {
+ warn = GL_TRUE;
+ warned = GL_TRUE;
+ }
+ }
+
+ brw_upload_state(brw);
+
for (i = 0; i < nr_prims; i++) {
brw_emit_prim(brw, &prim[i]);
}
@@ -326,6 +347,10 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
out:
UNLOCK_HARDWARE(intel);
+ if (warn)
+ fprintf(stderr, "i965: Single primitive emit potentially exceeded "
+ "available aperture space\n");
+
if (!retval)
DBG("%s failed\n", __FUNCTION__);
diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index 7b88b5eaa1..4080c5e322 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -250,10 +250,10 @@ static void get_space( struct brw_context *brw,
wrap_buffers(brw, size);
}
+ assert(*bo_return == NULL);
dri_bo_reference(brw->vb.upload.bo);
*bo_return = brw->vb.upload.bo;
*offset_return = brw->vb.upload.offset;
-
brw->vb.upload.offset += size;
}
@@ -359,6 +359,14 @@ static void brw_prepare_vertices(struct brw_context *brw)
input->offset = (unsigned long)input->glarray->Ptr;
input->stride = input->glarray->StrideB;
} else {
+ if (input->bo != NULL) {
+ /* Already-uploaded vertex data is present from a previous
+ * prepare_vertices, but we had to re-validate state due to
+ * check_aperture failing and a new batch being produced.
+ */
+ continue;
+ }
+
/* Queue the buffer object up to be uploaded in the next pass,
* when we've decided if we're doing interleaved or not.
*/
@@ -417,6 +425,12 @@ static void brw_prepare_vertices(struct brw_context *brw)
}
brw_prepare_query_begin(brw);
+
+ for (i = 0; i < nr_enabled; i++) {
+ struct brw_vertex_element *input = enabled[i];
+
+ brw_add_validated_bo(brw, input->bo);
+ }
}
static void brw_emit_vertices(struct brw_context *brw)
@@ -512,7 +526,7 @@ static void brw_prepare_indices(struct brw_context *brw)
struct intel_context *intel = &brw->intel;
const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
GLuint ib_size;
- dri_bo *bo;
+ dri_bo *bo = NULL;
struct gl_buffer_object *bufferobj;
GLuint offset;
@@ -561,6 +575,8 @@ static void brw_prepare_indices(struct brw_context *brw)
dri_bo_unreference(brw->ib.bo);
brw->ib.bo = bo;
brw->ib.offset = offset;
+
+ brw_add_validated_bo(brw, brw->ib.bo);
}
static void brw_emit_indices(struct brw_context *brw)
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 207b8b7ca3..8cbe4215fb 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -65,7 +65,7 @@ struct brw_reg
GLuint abs:1; /* source only */
GLuint vstride:4; /* source only */
GLuint width:3; /* src only, align1 only */
- GLuint hstride:2; /* src only, align1 only */
+ GLuint hstride:2; /* align1 only */
GLuint address_mode:1; /* relative addressing, hopefully! */
GLuint pad0:1;
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 0bfbec9d14..58d97465d1 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -64,7 +64,9 @@ static void brw_set_dest( struct brw_instruction *insn,
if (insn->header.access_mode == BRW_ALIGN_1) {
insn->bits1.da1.dest_subreg_nr = dest.subnr;
- insn->bits1.da1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1;
+ if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
+ dest.hstride = BRW_HORIZONTAL_STRIDE_1;
+ insn->bits1.da1.dest_horiz_stride = dest.hstride;
}
else {
insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
@@ -78,7 +80,9 @@ static void brw_set_dest( struct brw_instruction *insn,
*/
if (insn->header.access_mode == BRW_ALIGN_1) {
insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
- insn->bits1.ia1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1;
+ if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
+ dest.hstride = BRW_HORIZONTAL_STRIDE_1;
+ insn->bits1.ia1.dest_horiz_stride = dest.hstride;
}
else {
insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index afa8694ebb..5bba8c84ec 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -71,6 +71,38 @@ const struct brw_tracked_state brw_blend_constant_color = {
.emit = upload_blend_constant_color
};
+/* Constant single cliprect for framebuffer object or DRI2 drawing */
+static void upload_drawing_rect(struct brw_context *brw)
+{
+ struct intel_context *intel = &brw->intel;
+ GLcontext *ctx = &intel->ctx;
+
+ if (!intel->constant_cliprect)
+ return;
+
+ BEGIN_BATCH(4, NO_LOOP_CLIPRECTS);
+ OUT_BATCH(_3DSTATE_DRAWRECT_INFO_I965);
+ OUT_BATCH(0); /* xmin, ymin */
+ OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) |
+ ((ctx->DrawBuffer->Height - 1) << 16));
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+}
+
+const struct brw_tracked_state brw_drawing_rect = {
+ .dirty = {
+ .mesa = _NEW_BUFFERS,
+ .brw = 0,
+ .cache = 0
+ },
+ .emit = upload_drawing_rect
+};
+
+static void prepare_binding_table_pointers(struct brw_context *brw)
+{
+ brw_add_validated_bo(brw, brw->wm.bind_bo);
+}
+
/**
* Upload the binding table pointers, which point each stage's array of surface
* state pointers.
@@ -81,15 +113,6 @@ const struct brw_tracked_state brw_blend_constant_color = {
static void upload_binding_table_pointers(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
- dri_bo *aper_array[] = {
- intel->batch->buf,
- brw->wm.bind_bo,
- };
-
- if (dri_bufmgr_check_aperture_space(aper_array, ARRAY_SIZE(aper_array))) {
- intel_batchbuffer_flush(intel->batch);
- return;
- }
BEGIN_BATCH(6, IGNORE_CLIPRECTS);
OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2));
@@ -109,6 +132,7 @@ const struct brw_tracked_state brw_binding_table_pointers = {
.brw = BRW_NEW_BATCH,
.cache = CACHE_NEW_SURF_BIND,
},
+ .prepare = prepare_binding_table_pointers,
.emit = upload_binding_table_pointers,
};
@@ -142,23 +166,18 @@ static void upload_pipelined_state_pointers(struct brw_context *brw )
brw->state.dirty.brw |= BRW_NEW_PSP;
}
-static void upload_psp_urb_cbs(struct brw_context *brw )
+
+static void prepare_psp_urb_cbs(struct brw_context *brw)
{
- struct intel_context *intel = &brw->intel;
- dri_bo *aper_array[] = {
- intel->batch->buf,
- brw->vs.state_bo,
- brw->gs.state_bo,
- brw->clip.state_bo,
- brw->wm.state_bo,
- brw->cc.state_bo,
- };
-
- if (dri_bufmgr_check_aperture_space(aper_array, ARRAY_SIZE(aper_array))) {
- intel_batchbuffer_flush(intel->batch);
- return;
- }
+ brw_add_validated_bo(brw, brw->vs.state_bo);
+ brw_add_validated_bo(brw, brw->gs.state_bo);
+ brw_add_validated_bo(brw, brw->clip.state_bo);
+ brw_add_validated_bo(brw, brw->wm.state_bo);
+ brw_add_validated_bo(brw, brw->cc.state_bo);
+}
+static void upload_psp_urb_cbs(struct brw_context *brw )
+{
upload_pipelined_state_pointers(brw);
brw_upload_urb_fence(brw);
brw_upload_constant_buffer_state(brw);
@@ -176,9 +195,18 @@ const struct brw_tracked_state brw_psp_urb_cbs = {
CACHE_NEW_WM_UNIT |
CACHE_NEW_CC_UNIT)
},
+ .prepare = prepare_psp_urb_cbs,
.emit = upload_psp_urb_cbs,
};
+static void prepare_depthbuffer(struct brw_context *brw)
+{
+ struct intel_region *region = brw->state.depth_region;
+
+ if (region != NULL)
+ brw_add_validated_bo(brw, region->buffer);
+}
+
static void emit_depthbuffer(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
@@ -200,10 +228,6 @@ static void emit_depthbuffer(struct brw_context *brw)
ADVANCE_BATCH();
} else {
unsigned int format;
- dri_bo *aper_array[] = {
- intel->batch->buf,
- region->buffer
- };
switch (region->cpp) {
case 2:
@@ -220,11 +244,6 @@ static void emit_depthbuffer(struct brw_context *brw)
return;
}
- if (dri_bufmgr_check_aperture_space(aper_array, ARRAY_SIZE(aper_array))) {
- intel_batchbuffer_flush(intel->batch);
- return;
- }
-
BEGIN_BATCH(len, IGNORE_CLIPRECTS);
OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2));
OUT_BATCH(((region->pitch * region->cpp) - 1) |
@@ -253,6 +272,7 @@ const struct brw_tracked_state brw_depthbuffer = {
.brw = BRW_NEW_DEPTH_BUFFER | BRW_NEW_BATCH,
.cache = 0,
},
+ .prepare = prepare_depthbuffer,
.emit = emit_depthbuffer,
};
diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c b/src/mesa/drivers/dri/i965/brw_queryobj.c
index a1a1353dee..cb9169e2ee 100644
--- a/src/mesa/drivers/dri/i965/brw_queryobj.c
+++ b/src/mesa/drivers/dri/i965/brw_queryobj.c
@@ -42,6 +42,7 @@
#include "main/imports.h"
#include "brw_context.h"
+#include "brw_state.h"
#include "intel_batchbuffer.h"
#include "intel_reg.h"
@@ -163,10 +164,6 @@ void
brw_prepare_query_begin(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
- dri_bo *aper_array[] = {
- intel->batch->buf,
- brw->query.bo,
- };
/* Skip if we're not doing any queries. */
if (is_empty_list(&brw->query.active_head))
@@ -182,8 +179,7 @@ brw_prepare_query_begin(struct brw_context *brw)
brw->query.index = 0;
}
- if (dri_bufmgr_check_aperture_space(aper_array, ARRAY_SIZE(aper_array)))
- intel_batchbuffer_flush(intel->batch);
+ brw_add_validated_bo(brw, brw->query.bo);
}
/** Called just before primitive drawing to get a beginning PS_DEPTH_COUNT. */
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 4c04036ef0..bb22c03eeb 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -35,6 +35,16 @@
#include "brw_context.h"
+static inline void
+brw_add_validated_bo(struct brw_context *brw, dri_bo *bo)
+{
+ assert(brw->state.validated_bo_count < ARRAY_SIZE(brw->state.validated_bos));
+
+ if (bo != NULL) {
+ dri_bo_reference(bo);
+ brw->state.validated_bos[brw->state.validated_bo_count++] = bo;
+ }
+};
const struct brw_tracked_state brw_blend_constant_color;
const struct brw_tracked_state brw_cc_unit;
@@ -79,10 +89,19 @@ const struct brw_tracked_state brw_pipe_control;
const struct brw_tracked_state brw_clear_surface_cache;
const struct brw_tracked_state brw_clear_batch_cache;
+const struct brw_tracked_state brw_drawing_rect;
const struct brw_tracked_state brw_indices;
const struct brw_tracked_state brw_vertices;
/***********************************************************************
+ * brw_state.c
+ */
+void brw_validate_state(struct brw_context *brw);
+void brw_upload_state(struct brw_context *brw);
+void brw_init_state(struct brw_context *brw);
+void brw_destroy_state(struct brw_context *brw);
+
+/***********************************************************************
* brw_state_cache.c
*/
dri_bo *brw_cache_data(struct brw_cache *cache,
diff --git a/src/mesa/drivers/dri/i965/brw_state_batch.c b/src/mesa/drivers/dri/i965/brw_state_batch.c
index 94ef924868..dc87859f3f 100644
--- a/src/mesa/drivers/dri/i965/brw_state_batch.c
+++ b/src/mesa/drivers/dri/i965/brw_state_batch.c
@@ -97,8 +97,6 @@ void brw_clear_batch_cache_flush( struct brw_context *brw )
{
clear_batch_cache(brw);
- brw->wrap = 0;
-
/* brw_do_flush(brw, BRW_FLUSH_STATE_CACHE|BRW_FLUSH_READ_CACHE); */
brw->state.dirty.mesa |= ~0;
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index b6a52843a8..16b0496f47 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -99,6 +99,7 @@ const struct brw_tracked_state *atoms[] =
&brw_psp_urb_cbs,
#endif
+ &brw_drawing_rect,
&brw_indices,
&brw_vertices,
@@ -168,6 +169,18 @@ static void xor_states( struct brw_state_flags *result,
result->cache = a->cache ^ b->cache;
}
+static void
+brw_clear_validated_bos(struct brw_context *brw)
+{
+ int i;
+
+ /* Clear the last round of validated bos */
+ for (i = 0; i < brw->state.validated_bo_count; i++) {
+ dri_bo_unreference(brw->state.validated_bos[i]);
+ brw->state.validated_bos[i] = NULL;
+ }
+ brw->state.validated_bo_count = 0;
+}
/***********************************************************************
* Emit all state:
@@ -176,14 +189,14 @@ void brw_validate_state( struct brw_context *brw )
{
struct intel_context *intel = &brw->intel;
struct brw_state_flags *state = &brw->state.dirty;
- GLuint i, count, pass = 0;
- dri_bo *last_batch_bo = NULL;
+ GLuint i;
+
+ brw_clear_validated_bos(brw);
state->mesa |= brw->intel.NewGLState;
brw->intel.NewGLState = 0;
- if (brw->wrap)
- state->brw |= BRW_NEW_CONTEXT;
+ brw_add_validated_bo(brw, intel->batch->buf);
if (brw->emit_state_always) {
state->mesa |= ~0;
@@ -210,8 +223,6 @@ void brw_validate_state( struct brw_context *brw )
brw->intel.Fallback = 0;
- count = 0;
-
/* do prepare stage for all atoms */
for (i = 0; i < Elements(atoms); i++) {
const struct brw_tracked_state *atom = brw->state.atoms[i];
@@ -225,19 +236,15 @@ void brw_validate_state( struct brw_context *brw )
}
}
}
+}
- if (brw->intel.Fallback)
- return;
- /* We're about to try to set up a coherent state in the batchbuffer for
- * the emission of primitives. If we exceed the aperture size in any of the
- * emit() calls, we need to go back to square 1 and try setting up again.
- */
-got_flushed:
- dri_bo_unreference(last_batch_bo);
- last_batch_bo = intel->batch->buf;
- dri_bo_reference(last_batch_bo);
- assert(pass++ <= 2);
+void brw_upload_state(struct brw_context *brw)
+{
+ struct brw_state_flags *state = &brw->state.dirty;
+ int i;
+
+ brw_clear_validated_bos(brw);
if (INTEL_DEBUG) {
/* Debug version which enforces various sanity checks on the
@@ -262,8 +269,6 @@ got_flushed:
if (check_state(state, &atom->dirty)) {
if (atom->emit) {
atom->emit( brw );
- if (intel->batch->buf != last_batch_bo)
- goto got_flushed;
}
}
@@ -288,15 +293,11 @@ got_flushed:
if (check_state(state, &atom->dirty)) {
if (atom->emit) {
atom->emit( brw );
- if (intel->batch->buf != last_batch_bo)
- goto got_flushed;
}
}
}
}
- dri_bo_unreference(last_batch_bo);
-
if (!brw->intel.Fallback)
memset(state, 0, sizeof(*state));
}
diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c
index a64e437860..2d4c81274e 100644
--- a/src/mesa/drivers/dri/i965/brw_vtbl.c
+++ b/src/mesa/drivers/dri/i965/brw_vtbl.c
@@ -62,7 +62,6 @@ dri_bo_release(dri_bo **bo)
*/
static void brw_destroy_context( struct intel_context *intel )
{
- GLcontext *ctx = &intel->ctx;
struct brw_context *brw = brw_context(&intel->ctx);
int i;
diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
index 297617ee2d..896390c17b 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@@ -157,6 +157,7 @@ struct brw_wm_instruction {
#define BRW_WM_MAX_PARAM 256
#define BRW_WM_MAX_CONST 256
#define BRW_WM_MAX_KILLS MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS
+#define BRW_WM_MAX_SUBROUTINE 16
@@ -246,7 +247,10 @@ struct brw_wm_compile {
struct brw_reg stack;
struct brw_reg emit_mask_reg;
GLuint reg_index;
+ GLuint tmp_regs[BRW_WM_MAX_GRF];
GLuint tmp_index;
+ GLuint tmp_max;
+ GLuint subroutines[BRW_WM_MAX_SUBROUTINE];
};
diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
index 4d5e11f4b6..0ea8c3d50e 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
@@ -4,6 +4,10 @@
#include "brw_eu.h"
#include "brw_wm.h"
+enum _subroutine {
+ SUB_NOISE1, SUB_NOISE2, SUB_NOISE3, SUB_NOISE4
+};
+
/* Only guess, need a flag in gl_fragment_program later */
GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp)
{
@@ -19,6 +23,10 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp)
case OPCODE_RET:
case OPCODE_DDX:
case OPCODE_DDY:
+ case OPCODE_NOISE1:
+ case OPCODE_NOISE2:
+ case OPCODE_NOISE3:
+ case OPCODE_NOISE4:
case OPCODE_BGNLOOP:
return GL_TRUE;
default:
@@ -47,13 +55,26 @@ static int get_scalar_dst_index(struct prog_instruction *inst)
static struct brw_reg alloc_tmp(struct brw_wm_compile *c)
{
struct brw_reg reg;
- reg = brw_vec8_grf(c->tmp_index--, 0);
+ if(c->tmp_index == c->tmp_max)
+ c->tmp_regs[ c->tmp_max++ ] = c->reg_index++;
+
+ reg = brw_vec8_grf(c->tmp_regs[ c->tmp_index++ ], 0);
return reg;
}
-static void release_tmps(struct brw_wm_compile *c)
+static int mark_tmps(struct brw_wm_compile *c)
+{
+ return c->tmp_index;
+}
+
+static struct brw_reg lookup_tmp( struct brw_wm_compile *c, int index )
+{
+ return brw_vec8_grf( c->tmp_regs[ index ], 0 );
+}
+
+static void release_tmps(struct brw_wm_compile *c, int mark)
{
- c->tmp_index = 127;
+ c->tmp_index = mark;
}
static struct brw_reg
@@ -155,6 +176,68 @@ static struct brw_reg get_src_reg(struct brw_wm_compile *c,
src->NegateBase, src->Abs);
}
+/* Subroutines are minimal support for resusable instruction sequences.
+ They are implemented as simply as possible to minimise overhead: there
+ is no explicit support for communication between the caller and callee
+ other than saving the return address in a temporary register, nor is
+ there any automatic local storage. This implies that great care is
+ required before attempting reentrancy or any kind of nested
+ subroutine invocations. */
+static void invoke_subroutine( struct brw_wm_compile *c,
+ enum _subroutine subroutine,
+ void (*emit)( struct brw_wm_compile * ) )
+{
+ struct brw_compile *p = &c->func;
+
+ assert( subroutine < BRW_WM_MAX_SUBROUTINE );
+
+ if( c->subroutines[ subroutine ] ) {
+ /* subroutine previously emitted: reuse existing instructions */
+
+ int mark = mark_tmps( c );
+ struct brw_reg return_address = retype( alloc_tmp( c ),
+ BRW_REGISTER_TYPE_UD );
+ int here = p->nr_insn;
+
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_ADD( p, return_address, brw_ip_reg(), brw_imm_ud( 2 << 4 ) );
+
+ brw_ADD( p, brw_ip_reg(), brw_ip_reg(),
+ brw_imm_d( ( c->subroutines[ subroutine ] -
+ here - 1 ) << 4 ) );
+ brw_pop_insn_state(p);
+
+ release_tmps( c, mark );
+ } else {
+ /* previously unused subroutine: emit, and mark for later reuse */
+
+ int mark = mark_tmps( c );
+ struct brw_reg return_address = retype( alloc_tmp( c ),
+ BRW_REGISTER_TYPE_UD );
+ struct brw_instruction *calc;
+ int base = p->nr_insn;
+
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ calc = brw_ADD( p, return_address, brw_ip_reg(), brw_imm_ud( 0 ) );
+ brw_pop_insn_state(p);
+
+ c->subroutines[ subroutine ] = p->nr_insn;
+
+ emit( c );
+
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_MOV( p, brw_ip_reg(), return_address );
+ brw_pop_insn_state(p);
+
+ brw_set_src1( calc, brw_imm_ud( ( p->nr_insn - base ) << 4 ) );
+
+ release_tmps( c, mark );
+ }
+}
+
static void emit_abs( struct brw_wm_compile *c,
struct prog_instruction *inst)
{
@@ -778,6 +861,7 @@ static void emit_lrp(struct brw_wm_compile *c,
GLuint mask = inst->DstReg.WriteMask;
struct brw_reg dst, tmp1, tmp2, src0, src1, src2;
int i;
+ int mark = mark_tmps(c);
for (i = 0; i < 4; i++) {
if (mask & (1<<i)) {
dst = get_dst_reg(c, inst, i, 1);
@@ -804,7 +888,7 @@ static void emit_lrp(struct brw_wm_compile *c,
brw_MAC(p, dst, src0, tmp1);
brw_set_saturate(p, 0);
}
- release_tmps(c);
+ release_tmps(c, mark);
}
}
@@ -957,6 +1041,316 @@ static void emit_ddy(struct brw_wm_compile *c,
brw_set_saturate(p, 0);
}
+static __inline struct brw_reg high_words( struct brw_reg reg )
+{
+ return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_W ), 1 ),
+ 0, 8, 2 );
+}
+
+static __inline struct brw_reg low_words( struct brw_reg reg )
+{
+ return stride( retype( reg, BRW_REGISTER_TYPE_W ), 0, 8, 2 );
+}
+
+/* One- and two-dimensional Perlin noise, similar to the description in
+ _Improving Noise_, Ken Perlin, Computer Graphics vol. 35 no. 3. */
+static void noise1_sub( struct brw_wm_compile *c ) {
+
+ struct brw_compile *p = &c->func;
+ struct brw_reg param,
+ x0, x1, /* gradients at each end */
+ t, tmp[ 2 ], /* float temporaries */
+ itmp[ 5 ]; /* unsigned integer temporaries (aliases of floats above) */
+ int i;
+ int mark = mark_tmps( c );
+
+ x0 = alloc_tmp( c );
+ x1 = alloc_tmp( c );
+ t = alloc_tmp( c );
+ tmp[ 0 ] = alloc_tmp( c );
+ tmp[ 1 ] = alloc_tmp( c );
+ itmp[ 0 ] = retype( tmp[ 0 ], BRW_REGISTER_TYPE_UD );
+ itmp[ 1 ] = retype( tmp[ 1 ], BRW_REGISTER_TYPE_UD );
+ itmp[ 2 ] = retype( x0, BRW_REGISTER_TYPE_UD );
+ itmp[ 3 ] = retype( x1, BRW_REGISTER_TYPE_UD );
+ itmp[ 4 ] = retype( t, BRW_REGISTER_TYPE_UD );
+
+ param = lookup_tmp( c, mark - 2 );
+
+ brw_set_access_mode( p, BRW_ALIGN_1 );
+
+ brw_MOV( p, itmp[ 2 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
+
+ /* Arrange the two end coordinates into scalars (itmp0/itmp1) to
+ be hashed. Also compute the remainder (offset within the unit
+ length), interleaved to reduce register dependency penalties. */
+ brw_RNDD( p, itmp[ 0 ], param );
+ brw_FRC( p, param, param );
+ brw_ADD( p, itmp[ 1 ], itmp[ 0 ], brw_imm_ud( 1 ) );
+ brw_MOV( p, itmp[ 3 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
+ brw_MOV( p, itmp[ 4 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
+
+ /* We're now ready to perform the hashing. The two hashes are
+ interleaved for performance. The hash function used is
+ designed to rapidly achieve avalanche and require only 32x16
+ bit multiplication, and 16-bit swizzles (which we get for
+ free). We can't use immediate operands in the multiplies,
+ because immediates are permitted only in src1 and the 16-bit
+ factor is permitted only in src0. */
+ for( i = 0; i < 2; i++ )
+ brw_MUL( p, itmp[ i ], itmp[ 2 ], itmp[ i ] );
+ for( i = 0; i < 2; i++ )
+ brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
+ high_words( itmp[ i ] ) );
+ for( i = 0; i < 2; i++ )
+ brw_MUL( p, itmp[ i ], itmp[ 3 ], itmp[ i ] );
+ for( i = 0; i < 2; i++ )
+ brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
+ high_words( itmp[ i ] ) );
+ for( i = 0; i < 2; i++ )
+ brw_MUL( p, itmp[ i ], itmp[ 4 ], itmp[ i ] );
+ for( i = 0; i < 2; i++ )
+ brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
+ high_words( itmp[ i ] ) );
+
+ /* Now we want to initialise the two gradients based on the
+ hashes. Format conversion from signed integer to float leaves
+ everything scaled too high by a factor of pow( 2, 31 ), but
+ we correct for that right at the end. */
+ brw_ADD( p, t, param, brw_imm_f( -1.0 ) );
+ brw_MOV( p, x0, retype( tmp[ 0 ], BRW_REGISTER_TYPE_D ) );
+ brw_MOV( p, x1, retype( tmp[ 1 ], BRW_REGISTER_TYPE_D ) );
+
+ brw_MUL( p, x0, x0, param );
+ brw_MUL( p, x1, x1, t );
+
+ /* We interpolate between the gradients using the polynomial
+ 6t^5 - 15t^4 + 10t^3 (Perlin). */
+ brw_MUL( p, tmp[ 0 ], param, brw_imm_f( 6.0 ) );
+ brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( -15.0 ) );
+ brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param );
+ brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( 10.0 ) );
+ brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param );
+ brw_ADD( p, x1, x1, negate( x0 ) ); /* unrelated work to fill the
+ pipeline */
+ brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param );
+ brw_MUL( p, param, tmp[ 0 ], param );
+ brw_MUL( p, x1, x1, param );
+ brw_ADD( p, x0, x0, x1 );
+ /* scale by pow( 2, -30 ), to compensate for the format conversion
+ above and an extra factor of 2 so that a single gradient covers
+ the [-1,1] range */
+ brw_MUL( p, param, x0, brw_imm_f( 0.000000000931322574615478515625 ) );
+
+ release_tmps( c, mark );
+}
+
+static void emit_noise1( struct brw_wm_compile *c,
+ struct prog_instruction *inst )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg src, param, dst;
+ GLuint mask = inst->DstReg.WriteMask;
+ int i;
+ int mark = mark_tmps( c );
+
+ assert( mark == 0 );
+
+ src = get_src_reg( c, inst->SrcReg, 0, 1 );
+
+ param = alloc_tmp( c );
+
+ brw_MOV( p, param, src );
+
+ invoke_subroutine( c, SUB_NOISE1, noise1_sub );
+
+ /* Fill in the result: */
+ brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE );
+ for (i = 0 ; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i, 1);
+ brw_MOV( p, dst, param );
+ }
+ }
+ if( inst->SaturateMode == SATURATE_ZERO_ONE )
+ brw_set_saturate( p, 0 );
+
+ release_tmps( c, mark );
+}
+
+static void noise2_sub( struct brw_wm_compile *c ) {
+
+ struct brw_compile *p = &c->func;
+ struct brw_reg param0, param1,
+ x0y0, x0y1, x1y0, x1y1, /* gradients at each corner */
+ t, tmp[ 4 ], /* float temporaries */
+ itmp[ 7 ]; /* unsigned integer temporaries (aliases of floats above) */
+ int i;
+ int mark = mark_tmps( c );
+
+ x0y0 = alloc_tmp( c );
+ x0y1 = alloc_tmp( c );
+ x1y0 = alloc_tmp( c );
+ x1y1 = alloc_tmp( c );
+ t = alloc_tmp( c );
+ for( i = 0; i < 4; i++ ) {
+ tmp[ i ] = alloc_tmp( c );
+ itmp[ i ] = retype( tmp[ i ], BRW_REGISTER_TYPE_UD );
+ }
+ itmp[ 4 ] = retype( x0y0, BRW_REGISTER_TYPE_UD );
+ itmp[ 5 ] = retype( x0y1, BRW_REGISTER_TYPE_UD );
+ itmp[ 6 ] = retype( x1y0, BRW_REGISTER_TYPE_UD );
+
+ param0 = lookup_tmp( c, mark - 3 );
+ param1 = lookup_tmp( c, mark - 2 );
+
+ brw_set_access_mode( p, BRW_ALIGN_1 );
+
+ /* Arrange the four corner coordinates into scalars (itmp0..itmp3) to
+ be hashed. Also compute the remainders (offsets within the unit
+ square), interleaved to reduce register dependency penalties. */
+ brw_RNDD( p, itmp[ 0 ], param0 );
+ brw_RNDD( p, itmp[ 1 ], param1 );
+ brw_FRC( p, param0, param0 );
+ brw_FRC( p, param1, param1 );
+ brw_MOV( p, itmp[ 4 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
+ brw_ADD( p, high_words( itmp[ 0 ] ), high_words( itmp[ 0 ] ),
+ low_words( itmp[ 1 ] ) );
+ brw_MOV( p, itmp[ 5 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
+ brw_MOV( p, itmp[ 6 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
+ brw_ADD( p, itmp[ 1 ], itmp[ 0 ], brw_imm_ud( 0x10000 ) );
+ brw_ADD( p, itmp[ 2 ], itmp[ 0 ], brw_imm_ud( 0x1 ) );
+ brw_ADD( p, itmp[ 3 ], itmp[ 0 ], brw_imm_ud( 0x10001 ) );
+
+ /* We're now ready to perform the hashing. The four hashes are
+ interleaved for performance. The hash function used is
+ designed to rapidly achieve avalanche and require only 32x16
+ bit multiplication, and 16-bit swizzles (which we get for
+ free). We can't use immediate operands in the multiplies,
+ because immediates are permitted only in src1 and the 16-bit
+ factor is permitted only in src0. */
+ for( i = 0; i < 4; i++ )
+ brw_MUL( p, itmp[ i ], itmp[ 4 ], itmp[ i ] );
+ for( i = 0; i < 4; i++ )
+ brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
+ high_words( itmp[ i ] ) );
+ for( i = 0; i < 4; i++ )
+ brw_MUL( p, itmp[ i ], itmp[ 5 ], itmp[ i ] );
+ for( i = 0; i < 4; i++ )
+ brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
+ high_words( itmp[ i ] ) );
+ for( i = 0; i < 4; i++ )
+ brw_MUL( p, itmp[ i ], itmp[ 6 ], itmp[ i ] );
+ for( i = 0; i < 4; i++ )
+ brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
+ high_words( itmp[ i ] ) );
+
+ /* Now we want to initialise the four gradients based on the
+ hashes. Format conversion from signed integer to float leaves
+ everything scaled too high by a factor of pow( 2, 15 ), but
+ we correct for that right at the end. */
+ brw_ADD( p, t, param0, brw_imm_f( -1.0 ) );
+ brw_MOV( p, x0y0, low_words( tmp[ 0 ] ) );
+ brw_MOV( p, x0y1, low_words( tmp[ 1 ] ) );
+ brw_MOV( p, x1y0, low_words( tmp[ 2 ] ) );
+ brw_MOV( p, x1y1, low_words( tmp[ 3 ] ) );
+
+ brw_MOV( p, tmp[ 0 ], high_words( tmp[ 0 ] ) );
+ brw_MOV( p, tmp[ 1 ], high_words( tmp[ 1 ] ) );
+ brw_MOV( p, tmp[ 2 ], high_words( tmp[ 2 ] ) );
+ brw_MOV( p, tmp[ 3 ], high_words( tmp[ 3 ] ) );
+
+ brw_MUL( p, x1y0, x1y0, t );
+ brw_MUL( p, x1y1, x1y1, t );
+ brw_ADD( p, t, param1, brw_imm_f( -1.0 ) );
+ brw_MUL( p, x0y0, x0y0, param0 );
+ brw_MUL( p, x0y1, x0y1, param0 );
+
+ brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param1 );
+ brw_MUL( p, tmp[ 2 ], tmp[ 2 ], param1 );
+ brw_MUL( p, tmp[ 1 ], tmp[ 1 ], t );
+ brw_MUL( p, tmp[ 3 ], tmp[ 3 ], t );
+
+ brw_ADD( p, x0y0, x0y0, tmp[ 0 ] );
+ brw_ADD( p, x1y0, x1y0, tmp[ 2 ] );
+ brw_ADD( p, x0y1, x0y1, tmp[ 1 ] );
+ brw_ADD( p, x1y1, x1y1, tmp[ 3 ] );
+
+ /* We interpolate between the gradients using the polynomial
+ 6t^5 - 15t^4 + 10t^3 (Perlin). */
+ brw_MUL( p, tmp[ 0 ], param0, brw_imm_f( 6.0 ) );
+ brw_MUL( p, tmp[ 1 ], param1, brw_imm_f( 6.0 ) );
+ brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( -15.0 ) );
+ brw_ADD( p, tmp[ 1 ], tmp[ 1 ], brw_imm_f( -15.0 ) );
+ brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 );
+ brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 );
+ brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); /* unrelated work to fill the
+ pipeline */
+ brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( 10.0 ) );
+ brw_ADD( p, tmp[ 1 ], tmp[ 1 ], brw_imm_f( 10.0 ) );
+ brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 );
+ brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 );
+ brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); /* unrelated work to fill the
+ pipeline */
+ brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 );
+ brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 );
+ brw_MUL( p, param0, tmp[ 0 ], param0 );
+ brw_MUL( p, param1, tmp[ 1 ], param1 );
+
+ /* Here we interpolate in the y dimension... */
+ brw_MUL( p, x0y1, x0y1, param1 );
+ brw_MUL( p, x1y1, x1y1, param1 );
+ brw_ADD( p, x0y0, x0y0, x0y1 );
+ brw_ADD( p, x1y0, x1y0, x1y1 );
+
+ /* And now in x. There are horrible register dependencies here,
+ but we have nothing else to do. */
+ brw_ADD( p, x1y0, x1y0, negate( x0y0 ) );
+ brw_MUL( p, x1y0, x1y0, param0 );
+ brw_ADD( p, x0y0, x0y0, x1y0 );
+
+ /* scale by pow( 2, -15 ), as described above */
+ brw_MUL( p, param0, x0y0, brw_imm_f( 0.000030517578125 ) );
+
+ release_tmps( c, mark );
+}
+
+static void emit_noise2( struct brw_wm_compile *c,
+ struct prog_instruction *inst )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg src0, src1, param0, param1, dst;
+ GLuint mask = inst->DstReg.WriteMask;
+ int i;
+ int mark = mark_tmps( c );
+
+ assert( mark == 0 );
+
+ src0 = get_src_reg( c, inst->SrcReg, 0, 1 );
+ src1 = get_src_reg( c, inst->SrcReg, 1, 1 );
+
+ param0 = alloc_tmp( c );
+ param1 = alloc_tmp( c );
+
+ brw_MOV( p, param0, src0 );
+ brw_MOV( p, param1, src1 );
+
+ invoke_subroutine( c, SUB_NOISE2, noise2_sub );
+
+ /* Fill in the result: */
+ brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE );
+ for (i = 0 ; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i, 1);
+ brw_MOV( p, dst, param0 );
+ }
+ }
+ if( inst->SaturateMode == SATURATE_ZERO_ONE )
+ brw_set_saturate( p, 0 );
+
+ release_tmps( c, mark );
+}
+
static void emit_wpos_xy(struct brw_wm_compile *c,
struct prog_instruction *inst)
{
@@ -1276,6 +1670,15 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
case OPCODE_MAD:
emit_mad(c, inst);
break;
+ case OPCODE_NOISE1:
+ emit_noise1(c, inst);
+ break;
+ case OPCODE_NOISE2:
+ emit_noise2(c, inst);
+ break;
+ /* case OPCODE_NOISE3: */
+ /* case OPCODE_NOISE4: */
+ /* not yet implemented */
case OPCODE_TEX:
emit_tex(c, inst);
break;
@@ -1368,7 +1771,6 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
{
brw_wm_pass_fp(c);
- c->tmp_index = 127;
brw_wm_emit_glsl(brw, c);
c->prog_data.total_grf = c->reg_index;
c->prog_data.total_scratch = 0;