summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/i965/brw_misc_state.c
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2007-12-07 16:15:49 -0800
committerEric Anholt <eric@anholt.net>2007-12-07 16:19:10 -0800
commite3a6e60040b7f6ea7965e52f8f9881ed31e0347c (patch)
tree6fadf576b904bb479936a355ed716ca305556848 /src/mesa/drivers/dri/i965/brw_misc_state.c
parent3ecdae82d751f9f404d10332f030e3280949ce4e (diff)
[965] Convert the driver to dri_bufmgr interface and enable TTM.
This is currently believed to work but be a significant performance loss. Performance recovery should be soon to follow. The dri_bo_fake_disable_backing_store() call was added to allow backing store disable like bufmgr_fake.c did, which is a significant performance win (though it's missing the no-fence-subdata part). This commit is a squash merge of the 965-ttm branch, which had some history I wanted to avoid pulling due to noisiness and brokenness at many points for git-bisecting.
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_misc_state.c')
-rw-r--r--src/mesa/drivers/dri/i965/brw_misc_state.c143
1 files changed, 60 insertions, 83 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index d5779680ff..210745c63b 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -123,21 +123,18 @@ const struct brw_tracked_state brw_drawing_rect = {
.update = upload_drawing_rect
};
-/***********************************************************************
- * Binding table pointers
+/**
+ * Upload the binding table pointers, which point each stage's array of surface
+ * state pointers.
+ *
+ * The binding table pointers are relative to the surface state base address,
+ * which is the BRW_SS_POOL cache buffer.
*/
-
static void upload_binding_table_pointers(struct brw_context *brw)
{
struct brw_binding_table_pointers btp;
memset(&btp, 0, sizeof(btp));
- /* The binding table has been emitted to the SS pool already, so we
- * know what its offset is. When the batch buffer is fired, the
- * binding table and surface structs will get fixed up to point to
- * where the textures actually landed, but that won't change the
- * value of the offsets here:
- */
btp.header.opcode = CMD_BINDING_TABLE_PTRS;
btp.header.length = sizeof(btp)/4 - 2;
btp.vs = 0;
@@ -159,11 +156,12 @@ const struct brw_tracked_state brw_binding_table_pointers = {
};
-/***********************************************************************
- * Pipelined state pointers. This is the key state packet from which
- * the hardware chases pointers to all the uploaded state in VRAM.
+/**
+ * Upload pointers to the per-stage state.
+ *
+ * The state pointers in this packet are all relative to the general state
+ * base address set by CMD_STATE_BASE_ADDRESS, which is the BRW_GS_POOL buffer.
*/
-
static void upload_pipelined_state_pointers(struct brw_context *brw )
{
struct brw_pipelined_state_pointers psp;
@@ -233,71 +231,53 @@ const struct brw_tracked_state brw_psp_urb_cbs = {
.update = upload_psp_urb_cbs
};
-
-
-
-/***********************************************************************
- * Depthbuffer - currently constant, but rotation would change that.
+/**
+ * Upload the depthbuffer offset and format.
+ *
+ * We have to do this per state validation as we need to emit the relocation
+ * in the batch buffer.
*/
-
static void upload_depthbuffer(struct brw_context *brw)
{
- /* 0x79050003 Depth Buffer */
struct intel_context *intel = &brw->intel;
struct intel_region *region = brw->state.depth_region;
- struct brw_depthbuffer bd;
- memset(&bd, 0, sizeof(bd));
- bd.header.bits.opcode = CMD_DEPTH_BUFFER;
- bd.header.bits.length = sizeof(bd)/4-2;
- bd.dword1.bits.pitch = (region->pitch * region->cpp) - 1;
-
+ unsigned int format;
+
switch (region->cpp) {
case 2:
- bd.dword1.bits.format = BRW_DEPTHFORMAT_D16_UNORM;
+ format = BRW_DEPTHFORMAT_D16_UNORM;
break;
case 4:
if (intel->depth_buffer_is_float)
- bd.dword1.bits.format = BRW_DEPTHFORMAT_D32_FLOAT;
+ format = BRW_DEPTHFORMAT_D32_FLOAT;
else
- bd.dword1.bits.format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
+ format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
break;
default:
assert(0);
return;
}
- bd.dword1.bits.depth_offset_disable = 0; /* coordinate offset */
-
- /* The depthbuffer can only use YMAJOR tiling... This is a bit of
- * a shame as it clashes with the 2d blitter which only supports
- * XMAJOR tiling...
- */
- bd.dword1.bits.tile_walk = BRW_TILEWALK_YMAJOR;
- bd.dword1.bits.tiled_surface = intel->depth_region->tiled;
- bd.dword1.bits.surface_type = BRW_SURFACE_2D;
-
- /* BRW_NEW_LOCK */
- bd.dword2_base_addr = bmBufferOffset(intel, region->buffer);
-
- bd.dword3.bits.mipmap_layout = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
- bd.dword3.bits.lod = 0;
- bd.dword3.bits.width = region->pitch - 1; /* XXX: width ? */
- bd.dword3.bits.height = region->height - 1;
-
- bd.dword4.bits.min_array_element = 0;
- bd.dword4.bits.depth = 0;
-
- BRW_CACHED_BATCH_STRUCT(brw, &bd);
+ BEGIN_BATCH(5, INTEL_BATCH_NO_CLIPRECTS);
+ OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (5 - 2));
+ OUT_BATCH(((region->pitch * region->cpp) - 1) |
+ (format << 18) |
+ (BRW_TILEWALK_YMAJOR << 26) |
+ (region->tiled << 27) |
+ (BRW_SURFACE_2D << 29));
+ OUT_RELOC(region->buffer,
+ DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, 0);
+ OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) |
+ ((region->pitch - 1) << 6) |
+ ((region->height - 1) << 19));
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
}
const struct brw_tracked_state brw_depthbuffer = {
- .dirty = {
- .mesa = 0,
- .brw = BRW_NEW_CONTEXT | BRW_NEW_LOCK,
- .cache = 0
- },
- .update = upload_depthbuffer
+ .update = upload_depthbuffer,
+ .always_update = GL_TRUE,
};
@@ -494,40 +474,37 @@ const struct brw_tracked_state brw_invarient_state = {
.update = upload_invarient_state
};
-
-/* State pool addresses:
+/**
+ * Define the base addresses which some state is referenced from.
+ *
+ * This allows us to avoid having to emit relocations in many places for
+ * cached state, and instead emit pointers inside of large, mostly-static
+ * state pools. This comes at the expense of memory, and more expensive cache
+ * misses.
*/
static void upload_state_base_address( struct brw_context *brw )
{
struct intel_context *intel = &brw->intel;
- struct brw_state_base_address sba;
-
- memset(&sba, 0, sizeof(sba));
-
- sba.header.opcode = CMD_STATE_BASE_ADDRESS;
- sba.header.length = 0x4;
-
- /* BRW_NEW_LOCK */
- sba.bits0.general_state_address = bmBufferOffset(intel, brw->pool[BRW_GS_POOL].buffer) >> 5;
- sba.bits0.modify_enable = 1;
-
- /* BRW_NEW_LOCK */
- sba.bits1.surface_state_address = bmBufferOffset(intel, brw->pool[BRW_SS_POOL].buffer) >> 5;
- sba.bits1.modify_enable = 1;
- sba.bits2.modify_enable = 1;
- sba.bits3.modify_enable = 1;
- sba.bits4.modify_enable = 1;
-
- BRW_CACHED_BATCH_STRUCT(brw, &sba);
+ /* Output the structure (brw_state_base_address) directly to the
+ * batchbuffer, so we can emit relocations inline.
+ */
+ BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS);
+ OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2));
+ OUT_RELOC(brw->pool[BRW_GS_POOL].buffer,
+ DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+ 1); /* General state base address */
+ OUT_RELOC(brw->pool[BRW_SS_POOL].buffer,
+ DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+ 1); /* Surface state base address */
+ OUT_BATCH(1); /* Indirect object base address */
+ OUT_BATCH(1); /* General state upper bound */
+ OUT_BATCH(1); /* Indirect object upper bound */
+ ADVANCE_BATCH();
}
const struct brw_tracked_state brw_state_base_address = {
- .dirty = {
- .mesa = 0,
- .brw = BRW_NEW_CONTEXT | BRW_NEW_LOCK,
- .cache = 0
- },
+ .always_update = GL_TRUE,
.update = upload_state_base_address
};