summaryrefslogtreecommitdiff
path: root/src/mesa/pipe
diff options
context:
space:
mode:
authorBen Skeggs <skeggsb@gmail.com>2008-02-03 12:08:31 +1100
committerBen Skeggs <skeggsb@gmail.com>2008-02-15 13:50:29 +1100
commit705022f98c32c44b94411ea13dfe4cbc899f5a77 (patch)
treede1031e64b702e63faf0099c03bb520b3c5b00e0 /src/mesa/pipe
parent8bbedc3f4b7b281a60286ba573077a6e3e659f63 (diff)
nouveau: avoid relocations where possible.
Potential relocations are emitted as NOPs where they're needed. In the event a buffer moves, the pushbuf code will emit the relevant state changes into the NOPs. Just a start, more work is needed to get this looking how I want it to.
Diffstat (limited to 'src/mesa/pipe')
-rw-r--r--src/mesa/pipe/nouveau/nouveau_bo.h1
-rw-r--r--src/mesa/pipe/nouveau/nouveau_push.h13
-rw-r--r--src/mesa/pipe/nv40/nv40_fragprog.c5
-rw-r--r--src/mesa/pipe/nv40/nv40_fragtex.c8
-rw-r--r--src/mesa/pipe/nv40/nv40_state.c32
-rw-r--r--src/mesa/pipe/nv40/nv40_state_emit.c144
6 files changed, 131 insertions, 72 deletions
diff --git a/src/mesa/pipe/nouveau/nouveau_bo.h b/src/mesa/pipe/nouveau/nouveau_bo.h
index 2b57ee9263..18020e9c65 100644
--- a/src/mesa/pipe/nouveau/nouveau_bo.h
+++ b/src/mesa/pipe/nouveau/nouveau_bo.h
@@ -35,6 +35,7 @@
#define NOUVEAU_BO_HIGH (1 << 7)
#define NOUVEAU_BO_OR (1 << 8)
#define NOUVEAU_BO_LOCAL (1 << 9)
+#define NOUVEAU_BO_DUMMY (1 << 31)
struct nouveau_bo {
struct nouveau_device *device;
diff --git a/src/mesa/pipe/nouveau/nouveau_push.h b/src/mesa/pipe/nouveau/nouveau_push.h
index 117e3535cf..679472669b 100644
--- a/src/mesa/pipe/nouveau/nouveau_push.h
+++ b/src/mesa/pipe/nouveau/nouveau_push.h
@@ -44,9 +44,8 @@
#define OUT_RELOC(bo,data,flags,vor,tor) do { \
NOUVEAU_PUSH_CONTEXT(pc); \
pc->nvws->push_reloc(pc->nvws->channel, \
- pc->nvws->channel->pushbuf->cur, \
+ pc->nvws->channel->pushbuf->cur++, \
(bo), (data), (flags), (vor), (tor)); \
- OUT_RING(0); \
} while(0)
/* Raw data + flags depending on FB/TT buffer */
@@ -71,4 +70,14 @@
OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_HIGH, 0, 0); \
} while(0)
+/* A reloc which'll recombine into a NV_DMA_METHOD packet header */
+#define OUT_RELOCm(bo, flags, obj, mthd, size) do { \
+ NOUVEAU_PUSH_CONTEXT(pc); \
+ if (pc->nvws->channel->pushbuf->remaining < ((size) + 1)) \
+ pc->nvws->push_flush(pc->nvws->channel, ((size) + 1)); \
+ OUT_RELOCd((bo), (pc->obj->subc << 13) | ((size) << 18) | (mthd), \
+ (flags), 0, 0); \
+ pc->nvws->channel->pushbuf->remaining -= ((size) + 1); \
+} while(0)
+
#endif
diff --git a/src/mesa/pipe/nv40/nv40_fragprog.c b/src/mesa/pipe/nv40/nv40_fragprog.c
index 14897f9798..667eb89cb2 100644
--- a/src/mesa/pipe/nv40/nv40_fragprog.c
+++ b/src/mesa/pipe/nv40/nv40_fragprog.c
@@ -815,6 +815,11 @@ nv40_fragprog_bind(struct nv40_context *nv40, struct nv40_fragment_program *fp)
fp->on_hw = TRUE;
}
+ BEGIN_RING(curie, NV40TCL_FP_ADDRESS, 1);
+ OUT_RELOC (fp->buffer, 0, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW |
+ NOUVEAU_BO_OR, NV40TCL_FP_ADDRESS_DMA0,
+ NV40TCL_FP_ADDRESS_DMA1);
BEGIN_RING(curie, NV40TCL_FP_CONTROL, 1);
OUT_RING (fp->fp_control);
diff --git a/src/mesa/pipe/nv40/nv40_fragtex.c b/src/mesa/pipe/nv40/nv40_fragtex.c
index 48d6eb629f..7c5ecd5c56 100644
--- a/src/mesa/pipe/nv40/nv40_fragtex.c
+++ b/src/mesa/pipe/nv40/nv40_fragtex.c
@@ -104,7 +104,13 @@ nv40_fragtex_build(struct nv40_context *nv40, int unit)
nv40->tex[unit].buffer = nv40mt->buffer;
nv40->tex[unit].format = txf;
- BEGIN_RING(curie, NV40TCL_TEX_WRAP(unit), 6);
+ BEGIN_RING(curie, NV40TCL_TEX_OFFSET(unit), 8);
+ OUT_RELOCl(nv40->tex[unit].buffer, 0, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ OUT_RELOCd(nv40->tex[unit].buffer, nv40->tex[unit].format,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD |
+ NOUVEAU_BO_OR, NV40TCL_TEX_FORMAT_DMA0,
+ NV40TCL_TEX_FORMAT_DMA1);
OUT_RING (ps->wrap);
OUT_RING (NV40TCL_TEX_ENABLE_ENABLE | ps->en |
(0x00078000) /* mipmap related? */);
diff --git a/src/mesa/pipe/nv40/nv40_state.c b/src/mesa/pipe/nv40/nv40_state.c
index c619948b55..bb435b106b 100644
--- a/src/mesa/pipe/nv40/nv40_state.c
+++ b/src/mesa/pipe/nv40/nv40_state.c
@@ -603,33 +603,51 @@ nv40_set_framebuffer_state(struct pipe_context *pipe,
}
if (rt_enable & NV40TCL_RT_ENABLE_COLOR0) {
- BEGIN_RING(curie, NV40TCL_COLOR0_PITCH, 1);
- OUT_RING (rt[0]->pitch * rt[0]->cpp);
nv40->rt[0] = rt[0]->buffer;
+ BEGIN_RING(curie, NV40TCL_DMA_COLOR0, 1);
+ OUT_RELOCo(nv40->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(curie, NV40TCL_COLOR0_PITCH, 2);
+ OUT_RING (rt[0]->pitch * rt[0]->cpp);
+ OUT_RELOCl(nv40->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
}
if (rt_enable & NV40TCL_RT_ENABLE_COLOR1) {
- BEGIN_RING(curie, NV40TCL_COLOR1_PITCH, 2);
- OUT_RING (rt[1]->pitch * rt[1]->cpp);
nv40->rt[1] = rt[1]->buffer;
+ BEGIN_RING(curie, NV40TCL_DMA_COLOR1, 1);
+ OUT_RELOCo(nv40->rt[1], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(curie, NV40TCL_COLOR1_OFFSET, 2);
+ OUT_RELOCl(nv40->rt[1], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ OUT_RING (rt[1]->pitch * rt[1]->cpp);
}
if (rt_enable & NV40TCL_RT_ENABLE_COLOR2) {
+ nv40->rt[2] = rt[2]->buffer;
+ BEGIN_RING(curie, NV40TCL_DMA_COLOR2, 1);
+ OUT_RELOCo(nv40->rt[2], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(curie, NV40TCL_COLOR2_OFFSET, 1);
+ OUT_RELOCl(nv40->rt[2], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
BEGIN_RING(curie, NV40TCL_COLOR2_PITCH, 1);
OUT_RING (rt[2]->pitch * rt[2]->cpp);
- nv40->rt[2] = rt[2]->buffer;
}
if (rt_enable & NV40TCL_RT_ENABLE_COLOR3) {
+ nv40->rt[3] = rt[3]->buffer;
+ BEGIN_RING(curie, NV40TCL_DMA_COLOR3, 1);
+ OUT_RELOCo(nv40->rt[3], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(curie, NV40TCL_COLOR3_OFFSET, 1);
+ OUT_RELOCl(nv40->rt[3], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
BEGIN_RING(curie, NV40TCL_COLOR3_PITCH, 1);
OUT_RING (rt[3]->pitch * rt[3]->cpp);
- nv40->rt[3] = rt[3]->buffer;
}
if (zeta_format) {
+ nv40->zeta = zeta->buffer;
+ BEGIN_RING(curie, NV40TCL_DMA_ZETA, 1);
+ OUT_RELOCo(nv40->zeta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(curie, NV40TCL_ZETA_OFFSET, 1);
+ OUT_RELOCl(nv40->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
BEGIN_RING(curie, NV40TCL_ZETA_PITCH, 1);
OUT_RING (zeta->pitch * zeta->cpp);
- nv40->zeta = zeta->buffer;
}
nv40->rt_enable = rt_enable;
diff --git a/src/mesa/pipe/nv40/nv40_state_emit.c b/src/mesa/pipe/nv40/nv40_state_emit.c
index c9a7a2e364..66b98d5fab 100644
--- a/src/mesa/pipe/nv40/nv40_state_emit.c
+++ b/src/mesa/pipe/nv40/nv40_state_emit.c
@@ -1,94 +1,114 @@
#include "nv40_context.h"
#include "nv40_state.h"
-void
-nv40_emit_hw_state(struct nv40_context *nv40)
+/* Emit relocs for every referenced buffer.
+ *
+ * This is to ensure the bufmgr has an accurate idea of how
+ * the buffer is used. These relocs appear in the push buffer as
+ * NOPs, and will only be turned into state changes if a buffer
+ * actually moves.
+ */
+static void
+nv40_state_emit_dummy_relocs(struct nv40_context *nv40)
{
- int i;
-
- if (nv40->dirty & NV40_NEW_FRAGPROG) {
- nv40_fragprog_bind(nv40, nv40->fragprog.current);
- /*XXX: clear NV40_NEW_FRAGPROG if no new program uploaded */
- }
-
- if (nv40->dirty_samplers || (nv40->dirty & NV40_NEW_FRAGPROG)) {
- nv40_fragtex_bind(nv40);
-
- BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1);
- OUT_RING (2);
- BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1);
- OUT_RING (1);
- nv40->dirty &= ~NV40_NEW_FRAGPROG;
- }
-
- if (nv40->dirty & NV40_NEW_VERTPROG) {
- nv40_vertprog_bind(nv40, nv40->vertprog.current);
- nv40->dirty &= ~NV40_NEW_VERTPROG;
- }
-
- nv40->dirty_samplers = 0;
-
- /* Emit relocs for every referenced buffer.
- * This is to ensure the bufmgr has an accurate idea of how
- * the buffer is used. This isn't very efficient, but we don't
- * seem to take a significant performance hit. Will be improved
- * at some point. Vertex arrays are emitted by nv40_vbo.c
- */
+ unsigned rt_flags, tx_flags, fp_flags;
+ int i;
+
+ rt_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR | NOUVEAU_BO_DUMMY;
+ tx_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD |
+ NOUVEAU_BO_DUMMY;
+ fp_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD |
+ NOUVEAU_BO_DUMMY;
/* Render targets */
if (nv40->rt_enable & NV40TCL_RT_ENABLE_COLOR0) {
- BEGIN_RING(curie, NV40TCL_DMA_COLOR0, 1);
- OUT_RELOCo(nv40->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
- BEGIN_RING(curie, NV40TCL_COLOR0_OFFSET, 1);
- OUT_RELOCl(nv40->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ OUT_RELOCm(nv40->rt[0], rt_flags,
+ curie, NV40TCL_DMA_COLOR0, 1);
+ OUT_RELOCo(nv40->rt[0], rt_flags);
+ OUT_RELOCm(nv40->rt[0], rt_flags,
+ curie, NV40TCL_COLOR0_OFFSET, 1);
+ OUT_RELOCl(nv40->rt[0], 0, rt_flags);
}
if (nv40->rt_enable & NV40TCL_RT_ENABLE_COLOR1) {
- BEGIN_RING(curie, NV40TCL_DMA_COLOR1, 1);
- OUT_RELOCo(nv40->rt[1], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
- BEGIN_RING(curie, NV40TCL_COLOR1_OFFSET, 1);
- OUT_RELOCl(nv40->rt[1], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ OUT_RELOCm(nv40->rt[1], rt_flags,
+ curie, NV40TCL_DMA_COLOR1, 1);
+ OUT_RELOCo(nv40->rt[1], rt_flags);
+ OUT_RELOCm(nv40->rt[1], rt_flags,
+ curie, NV40TCL_COLOR1_OFFSET, 1);
+ OUT_RELOCl(nv40->rt[1], 0, rt_flags);
}
if (nv40->rt_enable & NV40TCL_RT_ENABLE_COLOR2) {
- BEGIN_RING(curie, NV40TCL_DMA_COLOR2, 1);
- OUT_RELOCo(nv40->rt[2], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
- BEGIN_RING(curie, NV40TCL_COLOR2_OFFSET, 1);
- OUT_RELOCl(nv40->rt[2], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ OUT_RELOCm(nv40->rt[2], rt_flags,
+ curie, NV40TCL_DMA_COLOR2, 1);
+ OUT_RELOCo(nv40->rt[2], rt_flags);
+ OUT_RELOCm(nv40->rt[2], rt_flags,
+ curie, NV40TCL_COLOR2_OFFSET, 1);
+ OUT_RELOCl(nv40->rt[2], 0, rt_flags);
}
if (nv40->rt_enable & NV40TCL_RT_ENABLE_COLOR3) {
- BEGIN_RING(curie, NV40TCL_DMA_COLOR3, 1);
- OUT_RELOCo(nv40->rt[3], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
- BEGIN_RING(curie, NV40TCL_COLOR3_OFFSET, 1);
- OUT_RELOCl(nv40->rt[3], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ OUT_RELOCm(nv40->rt[3], rt_flags,
+ curie, NV40TCL_DMA_COLOR3, 1);
+ OUT_RELOCo(nv40->rt[3], rt_flags);
+ OUT_RELOCm(nv40->rt[3], rt_flags,
+ curie, NV40TCL_COLOR3_OFFSET, 1);
+ OUT_RELOCl(nv40->rt[3], 0, rt_flags);
}
if (nv40->zeta) {
- BEGIN_RING(curie, NV40TCL_DMA_ZETA, 1);
- OUT_RELOCo(nv40->zeta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
- BEGIN_RING(curie, NV40TCL_ZETA_OFFSET, 1);
- OUT_RELOCl(nv40->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ OUT_RELOCm(nv40->zeta, rt_flags, curie, NV40TCL_DMA_ZETA, 1);
+ OUT_RELOCo(nv40->zeta, rt_flags);
+ OUT_RELOCm(nv40->zeta, rt_flags, curie, NV40TCL_ZETA_OFFSET, 1);
+ OUT_RELOCl(nv40->zeta, 0, rt_flags);
}
/* Texture images */
for (i = 0; i < 16; i++) {
if (!(nv40->fp_samplers & (1 << i)))
continue;
- BEGIN_RING(curie, NV40TCL_TEX_OFFSET(i), 2);
- OUT_RELOCl(nv40->tex[i].buffer, 0, NOUVEAU_BO_VRAM |
- NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ OUT_RELOCm(nv40->tex[i].buffer, tx_flags,
+ curie, NV40TCL_TEX_OFFSET(i), 2);
+ OUT_RELOCl(nv40->tex[i].buffer, 0, tx_flags);
OUT_RELOCd(nv40->tex[i].buffer, nv40->tex[i].format,
- NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD |
- NOUVEAU_BO_OR, NV40TCL_TEX_FORMAT_DMA0,
+ tx_flags | NOUVEAU_BO_OR, NV40TCL_TEX_FORMAT_DMA0,
NV40TCL_TEX_FORMAT_DMA1);
}
/* Fragment program */
- BEGIN_RING(curie, NV40TCL_FP_ADDRESS, 1);
- OUT_RELOC (nv40->fragprog.active->buffer, 0, NOUVEAU_BO_VRAM |
- NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW |
- NOUVEAU_BO_OR, NV40TCL_FP_ADDRESS_DMA0,
- NV40TCL_FP_ADDRESS_DMA1);
+ OUT_RELOCm(nv40->fragprog.active->buffer, fp_flags,
+ curie, NV40TCL_FP_ADDRESS, 1);
+ OUT_RELOC (nv40->fragprog.active->buffer, 0,
+ fp_flags | NOUVEAU_BO_OR | NOUVEAU_BO_LOW,
+ NV40TCL_FP_ADDRESS_DMA0, NV40TCL_FP_ADDRESS_DMA1);
+}
+
+void
+nv40_emit_hw_state(struct nv40_context *nv40)
+{
+ if (nv40->dirty & NV40_NEW_FRAGPROG) {
+ nv40_fragprog_bind(nv40, nv40->fragprog.current);
+ /*XXX: clear NV40_NEW_FRAGPROG if no new program uploaded */
+ }
+
+ if (nv40->dirty_samplers || (nv40->dirty & NV40_NEW_FRAGPROG)) {
+ nv40_fragtex_bind(nv40);
+
+ BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1);
+ OUT_RING (2);
+ BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1);
+ OUT_RING (1);
+ nv40->dirty &= ~NV40_NEW_FRAGPROG;
+ }
+
+ if (nv40->dirty & NV40_NEW_VERTPROG) {
+ nv40_vertprog_bind(nv40, nv40->vertprog.current);
+ nv40->dirty &= ~NV40_NEW_VERTPROG;
+ }
+
+ nv40->dirty_samplers = 0;
+
+ nv40_state_emit_dummy_relocs(nv40);
}