From f30810cb68a53c4fef360778a230126ed0ee0ee3 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Thu, 9 Sep 2010 19:12:54 +0200 Subject: nv50: use actual loads/stores if TEMPs are accessed indirectly --- src/gallium/drivers/nv50/nv50_screen.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_screen.c') diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index c1efa443da..24a6d8055c 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -274,7 +274,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) uint64_t value; unsigned chipset = dev->chipset; unsigned tesla_class = 0; - unsigned stack_size; + unsigned stack_size, local_size, max_warps; int ret, i; const unsigned rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD; @@ -495,9 +495,10 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) /* shader stack */ nouveau_device_get_param(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value); - stack_size = util_bitcount(value & 0xffff); - stack_size *= util_bitcount((value >> 24) & 0xf); - stack_size *= 32 * 64 * 8; + max_warps = util_bitcount(value & 0xffff); + max_warps *= util_bitcount((value >> 24) & 0xf) * 32; + + stack_size = max_warps * 64 * 8; ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, stack_size, &screen->stack_bo); @@ -510,6 +511,22 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) OUT_RELOCl(chan, screen->stack_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); OUT_RING (chan, 4); + local_size = (NV50_CAP_MAX_PROGRAM_TEMPS * 16) * max_warps * 32; + + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, + local_size, &screen->local_bo); + if (ret) { + nv50_screen_destroy(pscreen); + return NULL; + } + + local_size = NV50_CAP_MAX_PROGRAM_TEMPS * 16; + + BEGIN_RING(chan, screen->tesla, NV50TCL_LOCAL_ADDRESS_HIGH, 3); + OUT_RELOCh(chan, screen->local_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCl(chan, screen->local_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RING (chan, util_unsigned_logbase2(local_size / 8)); + /* Vertex array limits - max them out */ for (i = 0; i < 16; i++) { BEGIN_RING(chan, screen->tesla, -- cgit v1.2.3