summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/i965/brw_curbe.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_curbe.c')
-rw-r--r--src/mesa/drivers/dri/i965/brw_curbe.c185
1 files changed, 86 insertions, 99 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c
index 3f0aaa1f86..4ec0cfb816 100644
--- a/src/mesa/drivers/dri/i965/brw_curbe.c
+++ b/src/mesa/drivers/dri/i965/brw_curbe.c
@@ -31,10 +31,10 @@
-#include "glheader.h"
-#include "context.h"
-#include "macros.h"
-#include "enums.h"
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "main/enums.h"
#include "shader/prog_parameter.h"
#include "shader/prog_statevars.h"
#include "intel_batchbuffer.h"
@@ -42,13 +42,13 @@
#include "brw_defines.h"
#include "brw_state.h"
#include "brw_util.h"
-#include "brw_aub.h"
/* Partition the CURBE between the various users of constant values:
*/
static void calculate_curbe_offsets( struct brw_context *brw )
{
+ GLcontext *ctx = &brw->intel.ctx;
/* CACHE_NEW_WM_PROG */
GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16;
@@ -59,8 +59,8 @@ static void calculate_curbe_offsets( struct brw_context *brw )
GLuint total_regs;
/* _NEW_TRANSFORM */
- if (brw->attribs.Transform->ClipPlanesEnabled) {
- GLuint nr_planes = 6 + brw_count_bits(brw->attribs.Transform->ClipPlanesEnabled);
+ if (ctx->Transform.ClipPlanesEnabled) {
+ GLuint nr_planes = 6 + brw_count_bits(ctx->Transform.ClipPlanesEnabled);
nr_clip_regs = (nr_planes * 4 + 15) / 16;
}
@@ -90,7 +90,7 @@ static void calculate_curbe_offsets( struct brw_context *brw )
*/
if (nr_fp_regs > brw->curbe.wm_size ||
nr_vp_regs > brw->curbe.vs_size ||
- nr_clip_regs > brw->curbe.clip_size ||
+ nr_clip_regs != brw->curbe.clip_size ||
(total_regs < brw->curbe.total_size / 4 &&
brw->curbe.total_size > 16)) {
@@ -127,7 +127,7 @@ const struct brw_tracked_state brw_curbe_offsets = {
.brw = BRW_NEW_VERTEX_PROGRAM,
.cache = CACHE_NEW_WM_PROG
},
- .update = calculate_curbe_offsets
+ .prepare = calculate_curbe_offsets
};
@@ -138,37 +138,25 @@ const struct brw_tracked_state brw_curbe_offsets = {
* fixed-function hardware in a double-buffering scheme to avoid a
* pipeline stall each time the contents of the curbe is changed.
*/
-void brw_upload_constant_buffer_state(struct brw_context *brw)
+void brw_upload_cs_urb_state(struct brw_context *brw)
{
- struct brw_constant_buffer_state cbs;
- memset(&cbs, 0, sizeof(cbs));
+ struct brw_cs_urb_state cs_urb;
+ memset(&cs_urb, 0, sizeof(cs_urb));
/* It appears that this is the state packet for the CS unit, ie. the
* urb entries detailed here are housed in the CS range from the
* URB_FENCE command.
*/
- cbs.header.opcode = CMD_CONST_BUFFER_STATE;
- cbs.header.length = sizeof(cbs)/4 - 2;
+ cs_urb.header.opcode = CMD_CS_URB_STATE;
+ cs_urb.header.length = sizeof(cs_urb)/4 - 2;
/* BRW_NEW_URB_FENCE */
- cbs.bits0.nr_urb_entries = brw->urb.nr_cs_entries;
- cbs.bits0.urb_entry_size = brw->urb.csize - 1;
+ cs_urb.bits0.nr_urb_entries = brw->urb.nr_cs_entries;
+ cs_urb.bits0.urb_entry_size = brw->urb.csize - 1;
assert(brw->urb.nr_cs_entries);
- BRW_CACHED_BATCH_STRUCT(brw, &cbs);
-}
-
-#if 0
-const struct brw_tracked_state brw_constant_buffer_state = {
- .dirty = {
- .mesa = 0,
- .brw = BRW_NEW_URB_FENCE,
- .cache = 0
- },
- .update = brw_upload_constant_buffer_state
-};
-#endif
-
+ BRW_CACHED_BATCH_STRUCT(brw, &cs_urb);
+}
static GLfloat fixed_plane[6][4] = {
{ 0, 0, -1, 1 },
@@ -183,12 +171,11 @@ static GLfloat fixed_plane[6][4] = {
* cache mechanism, but maybe would benefit from a comparison against
* the current uploaded set of constants.
*/
-static void upload_constant_buffer(struct brw_context *brw)
+static void prepare_constant_buffer(struct brw_context *brw)
{
GLcontext *ctx = &brw->intel.ctx;
struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program;
struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program;
- struct brw_mem_pool *pool = &brw->pool[BRW_GS_POOL];
GLuint sz = brw->curbe.total_size;
GLuint bufsz = sz * 16 * sizeof(GLfloat);
GLfloat *buf;
@@ -198,24 +185,17 @@ static void upload_constant_buffer(struct brw_context *brw)
* function will also be called whenever fp or vp changes.
*/
brw->curbe.tracked_state.dirty.mesa = (_NEW_TRANSFORM|_NEW_PROJECTION);
- brw->curbe.tracked_state.dirty.mesa |= vp->param_state;
- brw->curbe.tracked_state.dirty.mesa |= fp->param_state;
+ brw->curbe.tracked_state.dirty.mesa |= vp->program.Base.Parameters->StateFlags;
+ brw->curbe.tracked_state.dirty.mesa |= fp->program.Base.Parameters->StateFlags;
if (sz == 0) {
- struct brw_constant_buffer cb;
- cb.header.opcode = CMD_CONST_BUFFER;
- cb.header.length = sizeof(cb)/4 - 2;
- cb.header.valid = 0;
- cb.bits0.buffer_length = 0;
- cb.bits0.buffer_address = 0;
- BRW_BATCH_STRUCT(brw, &cb);
if (brw->curbe.last_buf) {
free(brw->curbe.last_buf);
brw->curbe.last_buf = NULL;
brw->curbe.last_bufsz = 0;
}
-
+
return;
}
@@ -254,11 +234,11 @@ static void upload_constant_buffer(struct brw_context *brw)
*/
assert(MAX_CLIP_PLANES == 6);
for (j = 0; j < MAX_CLIP_PLANES; j++) {
- if (brw->attribs.Transform->ClipPlanesEnabled & (1<<j)) {
- buf[offset + i * 4 + 0] = brw->attribs.Transform->_ClipUserPlane[j][0];
- buf[offset + i * 4 + 1] = brw->attribs.Transform->_ClipUserPlane[j][1];
- buf[offset + i * 4 + 2] = brw->attribs.Transform->_ClipUserPlane[j][2];
- buf[offset + i * 4 + 3] = brw->attribs.Transform->_ClipUserPlane[j][3];
+ if (ctx->Transform.ClipPlanesEnabled & (1<<j)) {
+ buf[offset + i * 4 + 0] = ctx->Transform._ClipUserPlane[j][0];
+ buf[offset + i * 4 + 1] = ctx->Transform._ClipUserPlane[j][1];
+ buf[offset + i * 4 + 2] = ctx->Transform._ClipUserPlane[j][2];
+ buf[offset + i * 4 + 3] = ctx->Transform._ClipUserPlane[j][3];
i++;
}
}
@@ -290,11 +270,11 @@ static void upload_constant_buffer(struct brw_context *brw)
brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1);
}
- if (brw->curbe.last_buf &&
+ if (brw->curbe.curbe_bo != NULL &&
+ brw->curbe.last_buf &&
bufsz == brw->curbe.last_bufsz &&
memcmp(buf, brw->curbe.last_buf, bufsz) == 0) {
free(buf);
-/* return; */
}
else {
if (brw->curbe.last_buf)
@@ -302,61 +282,66 @@ static void upload_constant_buffer(struct brw_context *brw)
brw->curbe.last_buf = buf;
brw->curbe.last_bufsz = bufsz;
-
- if (!brw_pool_alloc(pool,
- bufsz,
- 6,
- &brw->curbe.gs_offset)) {
- _mesa_printf("out of GS memory for curbe\n");
- assert(0);
- return;
+ if (brw->curbe.curbe_bo != NULL &&
+ (brw->curbe.need_new_bo ||
+ brw->curbe.curbe_next_offset + bufsz > brw->curbe.curbe_bo->size))
+ {
+ dri_bo_unreference(brw->curbe.curbe_bo);
+ brw->curbe.curbe_bo = NULL;
+ }
+
+ if (brw->curbe.curbe_bo == NULL) {
+ /* Allocate a single page for CURBE entries for this batchbuffer.
+ * They're generally around 64b.
+ */
+ brw->curbe.curbe_bo = dri_bo_alloc(brw->intel.bufmgr, "CURBE",
+ 4096, 1 << 6);
+ brw->curbe.curbe_next_offset = 0;
}
-
+
+ brw->curbe.curbe_offset = brw->curbe.curbe_next_offset;
+ brw->curbe.curbe_next_offset += bufsz;
+ brw->curbe.curbe_next_offset = ALIGN(brw->curbe.curbe_next_offset, 64);
/* Copy data to the buffer:
*/
- bmBufferSubDataAUB(&brw->intel,
- pool->buffer,
- brw->curbe.gs_offset,
- bufsz,
- buf,
- DW_CONSTANT_BUFFER,
- 0);
+ dri_bo_subdata(brw->curbe.curbe_bo, brw->curbe.curbe_offset, bufsz, buf);
}
- /* TODO: only emit the constant_buffer packet when necessary, ie:
- - contents have changed
- - offset has changed
- - hw requirements due to other packets emitted.
- */
- {
- struct brw_constant_buffer cb;
-
- memset(&cb, 0, sizeof(cb));
-
- cb.header.opcode = CMD_CONST_BUFFER;
- cb.header.length = sizeof(cb)/4 - 2;
- cb.header.valid = 1;
- cb.bits0.buffer_length = sz - 1;
- cb.bits0.buffer_address = brw->curbe.gs_offset >> 6;
-
- /* Because this provokes an action (ie copy the constants into the
- * URB), it shouldn't be shortcircuited if identical to the
- * previous time - because eg. the urb destination may have
- * changed, or the urb contents different to last time.
- *
- * Note that the data referred to is actually copied internally,
- * not just used in place according to passed pointer.
- *
- * It appears that the CS unit takes care of using each available
- * URB entry (Const URB Entry == CURBE) in turn, and issuing
- * flushes as necessary when doublebuffering of CURBEs isn't
- * possible.
- */
-/* intel_batchbuffer_align(brw->intel.batch, 64, sizeof(cb)); */
- BRW_BATCH_STRUCT(brw, &cb);
-/* intel_batchbuffer_align(brw->intel.batch, 64, 0); */
+ brw_add_validated_bo(brw, brw->curbe.curbe_bo);
+
+ /* Because this provokes an action (ie copy the constants into the
+ * URB), it shouldn't be shortcircuited if identical to the
+ * previous time - because eg. the urb destination may have
+ * changed, or the urb contents different to last time.
+ *
+ * Note that the data referred to is actually copied internally,
+ * not just used in place according to passed pointer.
+ *
+ * It appears that the CS unit takes care of using each available
+ * URB entry (Const URB Entry == CURBE) in turn, and issuing
+ * flushes as necessary when doublebuffering of CURBEs isn't
+ * possible.
+ */
+}
+
+
+static void emit_constant_buffer(struct brw_context *brw)
+{
+ struct intel_context *intel = &brw->intel;
+ GLuint sz = brw->curbe.total_size;
+
+ BEGIN_BATCH(2, IGNORE_CLIPRECTS);
+ if (sz == 0) {
+ OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2));
+ OUT_BATCH(0);
+ } else {
+ OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2));
+ OUT_RELOC(brw->curbe.curbe_bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ (sz - 1) + brw->curbe.curbe_offset);
}
+ ADVANCE_BATCH();
}
/* This tracked state is unique in that the state it monitors varies
@@ -372,9 +357,11 @@ const struct brw_tracked_state brw_constant_buffer = {
BRW_NEW_VERTEX_PROGRAM |
BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */
BRW_NEW_PSP | /* Implicit - hardware requires this, not used above */
- BRW_NEW_CURBE_OFFSETS),
+ BRW_NEW_CURBE_OFFSETS |
+ BRW_NEW_BATCH),
.cache = (CACHE_NEW_WM_PROG)
},
- .update = upload_constant_buffer
+ .prepare = prepare_constant_buffer,
+ .emit = emit_constant_buffer,
};