From 4c2247538394a313e1e90bfcd07c1ab9c7d41281 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Fri, 12 Nov 2010 15:17:40 +0100 Subject: nvc0: import nvc0 gallium driver --- src/gallium/drivers/nvc0/nvc0_resource.h | 109 +++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100644 src/gallium/drivers/nvc0/nvc0_resource.h (limited to 'src/gallium/drivers/nvc0/nvc0_resource.h') diff --git a/src/gallium/drivers/nvc0/nvc0_resource.h b/src/gallium/drivers/nvc0/nvc0_resource.h new file mode 100644 index 0000000000..9c6895ea81 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_resource.h @@ -0,0 +1,109 @@ + +#ifndef __NVC0_RESOURCE_H__ +#define __NVC0_RESOURCE_H__ + +#include "util/u_transfer.h" +#include "util/u_double_list.h" +#define NOUVEAU_NVC0 +#include "nouveau/nouveau_winsys.h" +#undef NOUVEAU_NVC0 + +#include "nvc0_fence.h" + +struct pipe_resource; +struct nouveau_bo; + +/* Resources, if mapped into the GPU's address space, are guaranteed to + * have constant virtual addresses. + * The address of a resource will lie within the nouveau_bo referenced, + * and this bo should be added to the memory manager's validation list. + */ +struct nvc0_resource { + struct pipe_resource base; + const struct u_resource_vtbl *vtbl; + uint64_t address; + + uint8_t *data; + struct nouveau_bo *bo; + uint32_t offset; + + uint8_t status; + uint8_t domain; + struct nvc0_fence *fence; + struct list_head list; +}; + +#define NVC0_TILE_H(m) (8 << ((m >> 4) & 0xf)) +#define NVC0_TILE_D(m) (1 << (m >> 8)) + +struct nvc0_miptree_level { + int *image_offset; + uint32_t pitch; + uint32_t tile_mode; +}; + +#define NVC0_MAX_TEXTURE_LEVELS 16 + +struct nvc0_miptree { + struct nvc0_resource base; + struct nvc0_miptree_level level[NVC0_MAX_TEXTURE_LEVELS]; + int image_nr; + int total_size; +}; + +static INLINE struct nvc0_miptree * +nvc0_miptree(struct pipe_resource *pt) +{ + return (struct nvc0_miptree *)pt; +} + +static INLINE struct nvc0_resource * +nvc0_resource(struct pipe_resource *resource) +{ + return (struct nvc0_resource *)resource; +} + +/* is resource mapped into the GPU's address space (i.e. VRAM or GART) ? */ +static INLINE boolean +nvc0_resource_mapped_by_gpu(struct pipe_resource *resource) +{ + return nvc0_resource(resource)->bo->offset != 0ULL; +} + +void +nvc0_init_resource_functions(struct pipe_context *pcontext); + +void +nvc0_screen_init_resource_functions(struct pipe_screen *pscreen); + +/* Internal functions: + */ +struct pipe_resource * +nvc0_miptree_create(struct pipe_screen *pscreen, + const struct pipe_resource *tmp); + +struct pipe_resource * +nvc0_miptree_from_handle(struct pipe_screen *pscreen, + const struct pipe_resource *template, + struct winsys_handle *whandle); + +struct pipe_resource * +nvc0_buffer_create(struct pipe_screen *pscreen, + const struct pipe_resource *templ); + +struct pipe_resource * +nvc0_user_buffer_create(struct pipe_screen *screen, + void *ptr, + unsigned bytes, + unsigned usage); + + +struct pipe_surface * +nvc0_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_resource *pt, + unsigned face, unsigned level, unsigned zslice, + unsigned flags); + +void +nvc0_miptree_surface_del(struct pipe_surface *ps); + +#endif -- cgit v1.2.3 From 3ef1616b63507db01f54efa882a9cf28839cfdf3 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Thu, 9 Dec 2010 15:01:37 +0100 Subject: nvc0: buffer suballocation with a primitive slab allocator --- src/gallium/drivers/nvc0/Makefile | 3 +- src/gallium/drivers/nvc0/SConscript | 1 + src/gallium/drivers/nvc0/nvc0_buffer.c | 203 ++++++++++++++++++---- src/gallium/drivers/nvc0/nvc0_context.c | 2 + src/gallium/drivers/nvc0/nvc0_context.h | 2 + src/gallium/drivers/nvc0/nvc0_fence.c | 57 +++++-- src/gallium/drivers/nvc0/nvc0_fence.h | 29 ++-- src/gallium/drivers/nvc0/nvc0_graph_macros.h | 2 +- src/gallium/drivers/nvc0/nvc0_mm.c | 245 +++++++++++++++++++++++++++ src/gallium/drivers/nvc0/nvc0_push.c | 4 +- src/gallium/drivers/nvc0/nvc0_resource.h | 46 ++++- src/gallium/drivers/nvc0/nvc0_screen.c | 55 ++++-- src/gallium/drivers/nvc0/nvc0_screen.h | 61 +++++++ src/gallium/drivers/nvc0/nvc0_shader_state.c | 8 +- src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c | 6 +- src/gallium/drivers/nvc0/nvc0_transfer.c | 119 +++++++++---- src/gallium/drivers/nvc0/nvc0_vbo.c | 97 ++++++++--- src/gallium/drivers/nvc0/nvc0_winsys.h | 16 ++ 18 files changed, 807 insertions(+), 149 deletions(-) create mode 100644 src/gallium/drivers/nvc0/nvc0_mm.c (limited to 'src/gallium/drivers/nvc0/nvc0_resource.h') diff --git a/src/gallium/drivers/nvc0/Makefile b/src/gallium/drivers/nvc0/Makefile index 7aefd6f1f3..5c3d46d9ea 100644 --- a/src/gallium/drivers/nvc0/Makefile +++ b/src/gallium/drivers/nvc0/Makefile @@ -27,6 +27,7 @@ C_SOURCES = \ nvc0_pc_regalloc.c \ nvc0_push.c \ nvc0_push2.c \ - nvc0_fence.c + nvc0_fence.c \ + nvc0_mm.c include ../../Makefile.template diff --git a/src/gallium/drivers/nvc0/SConscript b/src/gallium/drivers/nvc0/SConscript index 808d6895f1..ec529d44f5 100644 --- a/src/gallium/drivers/nvc0/SConscript +++ b/src/gallium/drivers/nvc0/SConscript @@ -29,6 +29,7 @@ nvc0 = env.ConvenienceLibrary( 'nvc0_push.c', 'nvc0_push2.c', 'nvc0_fence.c', + 'nvc0_mm' ]) Export('nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_buffer.c b/src/gallium/drivers/nvc0/nvc0_buffer.c index 873016f0d5..93d7f5d303 100644 --- a/src/gallium/drivers/nvc0/nvc0_buffer.c +++ b/src/gallium/drivers/nvc0/nvc0_buffer.c @@ -11,46 +11,116 @@ #include "nvc0_context.h" #include "nvc0_resource.h" +#define NVC0_BUFFER_STATUS_USER_MEMORY 0xff + +static INLINE boolean +nvc0_buffer_allocate(struct nvc0_screen *screen, struct nvc0_resource *buf, + unsigned domain) +{ + if (domain == NOUVEAU_BO_VRAM) { + buf->mm = nvc0_mm_allocate(screen->mm_VRAM, buf->base.width0, &buf->bo, + &buf->offset); + if (!buf->bo) + return nvc0_buffer_allocate(screen, buf, NOUVEAU_BO_GART); + } else + if (domain == NOUVEAU_BO_GART) { + buf->mm = nvc0_mm_allocate(screen->mm_GART, buf->base.width0, &buf->bo, + &buf->offset); + if (!buf->bo) + return FALSE; + } else { + assert(!domain); + if (!buf->data) + buf->data = MALLOC(buf->base.width0); + if (!buf->data) + return FALSE; + } + buf->domain = domain; + return TRUE; +} + +static INLINE void +release_allocation(struct nvc0_mm_allocation **mm, struct nvc0_fence *fence) +{ + (*mm)->next = fence->buffers; + fence->buffers = (*mm); + (*mm) = NULL; +} + static void nvc0_buffer_destroy(struct pipe_screen *pscreen, struct pipe_resource *presource) { + struct nvc0_screen *screen = nvc0_screen(pscreen); struct nvc0_resource *res = nvc0_resource(presource); - if (res->bo) - nouveau_screen_bo_release(pscreen, res->bo); + nouveau_bo_ref(NULL, &res->bo); - if (res->data) + if (res->mm) + release_allocation(&res->mm, screen->fence.current); + + if (res->status != NVC0_BUFFER_STATUS_USER_MEMORY && res->data) FREE(res->data); FREE(res); } +static INLINE uint32_t +nouveau_buffer_rw_flags(unsigned pipe) +{ + uint32_t flags = 0; + + if (pipe & PIPE_TRANSFER_READ) + flags = NOUVEAU_BO_RD; + if (pipe & PIPE_TRANSFER_WRITE) + flags |= NOUVEAU_BO_WR; + + return flags; +} + static void * nvc0_buffer_transfer_map(struct pipe_context *pipe, struct pipe_transfer *transfer) { struct nvc0_resource *res = nvc0_resource(transfer->resource); + struct nvc0_fence *fence; uint8_t *map; - uint32_t flags; + int ret; + uint32_t flags = nouveau_buffer_rw_flags(transfer->usage); - if (res->base.bind & PIPE_BIND_VERTEX_BUFFER) + if ((res->base.bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) && + (flags & NOUVEAU_BO_WR)) nvc0_context(pipe)->vbo_dirty = TRUE; -// #ifdef NOUVEAU_USERPSACE_MM - if (res->base.bind & PIPE_BIND_CONSTANT_BUFFER) + if (res->domain == 0) return res->data + transfer->box.x; -// #endif - flags = nouveau_screen_transfer_flags(transfer->usage); - map = nouveau_screen_bo_map_range(pipe->screen, - res->bo, - transfer->box.x, transfer->box.width, - flags); - if (!map) + if (res->domain == NOUVEAU_BO_VRAM) { + NOUVEAU_ERR("transfers to/from VRAM buffers are not allowed\n"); + /* if this happens, migrate back to GART */ return NULL; + } + + if (res->score > -1024) + --res->score; + + ret = nouveau_bo_map(res->bo, flags | NOUVEAU_BO_NOSYNC); + if (ret) + return NULL; + map = res->bo->map; + nouveau_bo_unmap(res->bo); + + fence = (flags == NOUVEAU_BO_RD) ? res->fence_wr : res->fence; - return map + transfer->box.x; + if (fence) { + if (nvc0_fence_wait(fence) == FALSE) + NOUVEAU_ERR("failed to fence buffer\n"); + + nvc0_fence_reference(&res->fence, NULL); + nvc0_fence_reference(&res->fence_wr, NULL); + } + + return map + transfer->box.x + res->offset; } @@ -62,13 +132,12 @@ nvc0_buffer_transfer_flush_region(struct pipe_context *pipe, { struct nvc0_resource *res = nvc0_resource(transfer->resource); -#ifdef NOUVEAU_USERPSACE_MM if (!res->bo) return; -#endif + nouveau_screen_bo_map_flush_range(pipe->screen, res->bo, - transfer->box.x + box->x, + res->offset + transfer->box.x + box->x, box->width); } @@ -78,11 +147,10 @@ nvc0_buffer_transfer_unmap(struct pipe_context *pipe, { struct nvc0_resource *res = nvc0_resource(transfer->resource); -// #ifdef NOUVEAU_USERPSACE_MM if (res->data) return; -// #endif - nouveau_screen_bo_unmap(pipe->screen, res->bo); + + /* nouveau_screen_bo_unmap(pipe->screen, res->bo); */ } const struct u_resource_vtbl nvc0_buffer_vtbl = @@ -102,7 +170,9 @@ struct pipe_resource * nvc0_buffer_create(struct pipe_screen *pscreen, const struct pipe_resource *templ) { + struct nvc0_screen *screen = nvc0_screen(pscreen); struct nvc0_resource *buffer; + boolean ret; buffer = CALLOC_STRUCT(nvc0_resource); if (!buffer) @@ -114,14 +184,11 @@ nvc0_buffer_create(struct pipe_screen *pscreen, buffer->base.screen = pscreen; if (buffer->base.bind & PIPE_BIND_CONSTANT_BUFFER) - buffer->data = MALLOC(buffer->base.width0); - - buffer->bo = nouveau_screen_bo_new(pscreen, - 16, - buffer->base.usage, - buffer->base.bind, - buffer->base.width0); - if (buffer->bo == NULL) + ret = nvc0_buffer_allocate(screen, buffer, 0); + else + ret = nvc0_buffer_allocate(screen, buffer, NOUVEAU_BO_GART); + + if (ret == FALSE) goto fail; return &buffer->base; @@ -154,13 +221,77 @@ nvc0_user_buffer_create(struct pipe_screen *pscreen, buffer->base.height0 = 1; buffer->base.depth0 = 1; - buffer->bo = nouveau_screen_bo_user(pscreen, ptr, bytes); - if (!buffer->bo) - goto fail; - + buffer->data = ptr; + buffer->status = NVC0_BUFFER_STATUS_USER_MEMORY; + return &buffer->base; +} -fail: - FREE(buffer); - return NULL; +/* Migrate a linear buffer (vertex, index, constants) USER -> GART -> VRAM. */ +boolean +nvc0_buffer_migrate(struct nvc0_context *nvc0, + struct nvc0_resource *buf, unsigned domain) +{ + struct nvc0_screen *screen = nvc0_screen(buf->base.screen); + struct nouveau_bo *bo; + unsigned size = buf->base.width0; + int ret; + + if (domain == NOUVEAU_BO_GART && buf->domain == 0) { + if (!nvc0_buffer_allocate(screen, buf, domain)) + return FALSE; + ret = nouveau_bo_map(buf->bo, NOUVEAU_BO_WR | NOUVEAU_BO_NOSYNC); + if (ret) + return ret; + memcpy((uint8_t *)buf->bo->map + buf->offset, buf->data, size); + nouveau_bo_unmap(buf->bo); + } else + if (domain == NOUVEAU_BO_VRAM && buf->domain == NOUVEAU_BO_GART) { + struct nvc0_mm_allocation *mm = buf->mm; + + bo = buf->bo; + buf->bo = NULL; + buf->mm = NULL; + nvc0_buffer_allocate(screen, buf, domain); + + nvc0_m2mf_copy_linear(nvc0, buf->bo, 0, NOUVEAU_BO_VRAM, + bo, 0, NOUVEAU_BO_GART, buf->base.width0); + + release_allocation(&mm, screen->fence.current); + nouveau_bo_ref(NULL, &bo); + } else + if (domain == NOUVEAU_BO_VRAM && buf->domain == 0) { + /* should use a scratch buffer instead here */ + if (!nvc0_buffer_migrate(nvc0, buf, NOUVEAU_BO_GART)) + return FALSE; + return nvc0_buffer_migrate(nvc0, buf, NOUVEAU_BO_VRAM); + } else + return -1; + + buf->domain = domain; + + return TRUE; +} + +/* Migrate data from glVertexAttribPointer(non-VBO) user buffers to GART. + * MUST NOT FLUSH THE PUSH BUFFER, we could be in the middle of a method. + */ +boolean +nvc0_migrate_vertices(struct nvc0_resource *buf, unsigned base, unsigned size) +{ + struct nvc0_screen *screen = nvc0_screen(buf->base.screen); + int ret; + + assert(buf->data && !buf->domain); + + if (!nvc0_buffer_allocate(screen, buf, NOUVEAU_BO_GART)) + return FALSE; + ret = nouveau_bo_map_range(buf->bo, base + buf->offset, size, + NOUVEAU_BO_WR | NOUVEAU_BO_NOSYNC); + if (ret) + return FALSE; + memcpy(buf->bo->map, buf->data + base, size); + nouveau_bo_unmap(buf->bo); + + return TRUE; } diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c index f718feccd7..a05408a678 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.c +++ b/src/gallium/drivers/nvc0/nvc0_context.c @@ -49,6 +49,8 @@ nvc0_flush(struct pipe_context *pipe, unsigned flags, if (flags & (PIPE_FLUSH_SWAPBUFFERS | PIPE_FLUSH_FRAME)) { FIRE_RING(chan); + + nvc0_screen_fence_next(nvc0->screen); } } diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h index d0f78a564d..26263309d8 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nvc0/nvc0_context.h @@ -118,6 +118,7 @@ struct nvc0_context { unsigned sample_mask; boolean vbo_dirty; + boolean vbo_push_hint; struct draw_context *draw; }; @@ -150,6 +151,7 @@ static INLINE void nvc0_make_buffer_resident(struct nvc0_context *nvc0, struct nvc0_resource *res, unsigned flags) { + nvc0_resource_validate(res, flags); nvc0_make_bo_resident(nvc0, res->bo, flags); } diff --git a/src/gallium/drivers/nvc0/nvc0_fence.c b/src/gallium/drivers/nvc0/nvc0_fence.c index 3587097449..dc2abe45bd 100644 --- a/src/gallium/drivers/nvc0/nvc0_fence.c +++ b/src/gallium/drivers/nvc0/nvc0_fence.c @@ -30,14 +30,14 @@ boolean nvc0_screen_fence_new(struct nvc0_screen *screen, struct nvc0_fence **fence, - boolean emit) + boolean emit) { *fence = CALLOC_STRUCT(nvc0_fence); if (!*fence) return FALSE; (*fence)->screen = screen; - pipe_reference_init(&(*fence)->reference, 1); + (*fence)->ref = 1; if (emit) nvc0_fence_emit(*fence); @@ -53,15 +53,15 @@ nvc0_fence_emit(struct nvc0_fence *fence) fence->sequence = ++screen->fence.sequence; - assert(!(fence->state & NVC0_FENCE_STATE_EMITTED)); + assert(fence->state == NVC0_FENCE_STATE_AVAILABLE); BEGIN_RING(chan, RING_3D(QUERY_ADDRESS_HIGH), 4); OUT_RELOCh(chan, screen->fence.bo, 0, NOUVEAU_BO_WR); OUT_RELOCl(chan, screen->fence.bo, 0, NOUVEAU_BO_WR); OUT_RING (chan, fence->sequence); - OUT_RING (chan, 0x1000f010); + OUT_RING (chan, NVC0_3D_QUERY_GET_FENCE); - pipe_reference(NULL, &fence->reference); + ++fence->ref; if (screen->fence.tail) screen->fence.tail->next = fence; @@ -94,6 +94,18 @@ nvc0_fence_del(struct nvc0_fence *fence) FREE(fence); } +static void +nvc0_fence_trigger_release_buffers(struct nvc0_fence *fence) +{ + struct nvc0_mm_allocation *alloc = fence->buffers; + + while (alloc) { + struct nvc0_mm_allocation *next = alloc->next; + nvc0_mm_free(alloc); + alloc = next; + }; +} + static void nvc0_screen_fence_update(struct nvc0_screen *screen) { @@ -110,10 +122,12 @@ nvc0_screen_fence_update(struct nvc0_screen *screen) sequence = fence->sequence; fence->state = NVC0_FENCE_STATE_SIGNALLED; - if (fence->trigger.func) - fence->trigger.func(fence->trigger.arg); + + if (fence->buffers) + nvc0_fence_trigger_release_buffers(fence); nvc0_fence_reference(&fence, NULL); + if (sequence == screen->fence.sequence_ack) break; } @@ -122,24 +136,45 @@ nvc0_screen_fence_update(struct nvc0_screen *screen) screen->fence.tail = NULL; } +#define NVC0_FENCE_MAX_SPINS (1 << 17) + boolean nvc0_fence_wait(struct nvc0_fence *fence) { struct nvc0_screen *screen = fence->screen; int spins = 0; - if (fence->state != NVC0_FENCE_STATE_EMITTED) - return TRUE; + if (fence->state == NVC0_FENCE_STATE_AVAILABLE) { + nvc0_fence_emit(fence); + + FIRE_RING(screen->base.channel); + + if (fence == screen->fence.current) + nvc0_screen_fence_new(screen, &screen->fence.current, FALSE); + } + do { nvc0_screen_fence_update(screen); if (fence->state == NVC0_FENCE_STATE_SIGNALLED) return TRUE; + spins++; #ifdef PIPE_OS_UNIX - if ((spins & 7) == 7) /* spend a few cycles */ + if (!(spins % 8)) /* donate a few cycles */ sched_yield(); #endif - } while (++spins < 10000); + } while (spins < NVC0_FENCE_MAX_SPINS); + + if (spins > 9000) + NOUVEAU_ERR("fence %x: been spinning too long\n", fence->sequence); return FALSE; } + +void +nvc0_screen_fence_next(struct nvc0_screen *screen) +{ + nvc0_fence_emit(screen->fence.current); + nvc0_screen_fence_new(screen, &screen->fence.current, FALSE); + nvc0_screen_fence_update(screen); +} diff --git a/src/gallium/drivers/nvc0/nvc0_fence.h b/src/gallium/drivers/nvc0/nvc0_fence.h index 513ac07c1a..7b31f28808 100644 --- a/src/gallium/drivers/nvc0/nvc0_fence.h +++ b/src/gallium/drivers/nvc0/nvc0_fence.h @@ -3,24 +3,21 @@ #define __NVC0_FENCE_H__ #include "util/u_inlines.h" +#include "util/u_double_list.h" -struct nvc0_fence_trigger { - void (*func)(void *); - void *arg; - struct nvc0_fence_trigger *next; -}; - +#define NVC0_FENCE_STATE_AVAILABLE 0 #define NVC0_FENCE_STATE_EMITTED 1 #define NVC0_FENCE_STATE_SIGNALLED 2 -/* reference first, so pipe_reference works directly */ +struct nvc0_mm_allocation; + struct nvc0_fence { - struct pipe_reference reference; struct nvc0_fence *next; struct nvc0_screen *screen; int state; + int ref; uint32_t sequence; - struct nvc0_fence_trigger trigger; + struct nvc0_mm_allocation *buffers; }; void nvc0_fence_emit(struct nvc0_fence *); @@ -31,10 +28,20 @@ boolean nvc0_fence_wait(struct nvc0_fence *); static INLINE void nvc0_fence_reference(struct nvc0_fence **ref, struct nvc0_fence *fence) { - if (pipe_reference(&(*ref)->reference, &fence->reference)) - nvc0_fence_del(*ref); + if (*ref) { + if (--(*ref)->ref == 0) + nvc0_fence_del(*ref); + } + if (fence) + ++fence->ref; *ref = fence; } +static INLINE struct nvc0_fence * +nvc0_fence(struct pipe_fence_handle *fence) +{ + return (struct nvc0_fence *)fence; +} + #endif // __NVC0_FENCE_H__ diff --git a/src/gallium/drivers/nvc0/nvc0_graph_macros.h b/src/gallium/drivers/nvc0/nvc0_graph_macros.h index b8b89de28e..7db09287ab 100644 --- a/src/gallium/drivers/nvc0/nvc0_graph_macros.h +++ b/src/gallium/drivers/nvc0/nvc0_graph_macros.h @@ -63,7 +63,7 @@ static const uint32_t nvc0_9097_vertex_array_select[] = static const uint32_t nvc0_9097_color_mask_brdc[] = { - 0x05a00021, /* maddr [0x1680] */ + 0x05a00021, /* maddr [0x1a00, increment = 4] */ 0x00000841, /* send $r1 */ 0x00000841, /* send $r1 */ 0x00000841, /* send $r1 */ diff --git a/src/gallium/drivers/nvc0/nvc0_mm.c b/src/gallium/drivers/nvc0/nvc0_mm.c new file mode 100644 index 0000000000..e031fb393a --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_mm.c @@ -0,0 +1,245 @@ + +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "util/u_double_list.h" + +#include "nvc0_screen.h" + +#define MM_MIN_ORDER 7 +#define MM_MAX_ORDER 20 + +#define MM_NUM_BUCKETS (MM_MAX_ORDER - MM_MIN_ORDER + 1) + +#define MM_MIN_SIZE (1 << MM_MIN_ORDER) +#define MM_MAX_SIZE (1 << MM_MAX_ORDER) + +struct mm_bucket { + struct list_head free; + struct list_head used; + struct list_head full; + int num_free; +}; + +struct nvc0_mman { + struct nouveau_device *dev; + struct mm_bucket bucket[MM_NUM_BUCKETS]; + uint32_t storage_type; + uint32_t domain; + uint64_t allocated; +}; + +struct mm_slab { + struct list_head head; + struct nouveau_bo *bo; + struct nvc0_mman *cache; + int order; + int count; + int free; + uint32_t bits[0]; +}; + +static int +mm_slab_alloc(struct mm_slab *slab) +{ + int i, n, b; + + if (slab->free == 0) + return -1; + + for (i = 0; i < (slab->count + 31) / 32; ++i) { + b = ffs(slab->bits[i]) - 1; + if (b >= 0) { + n = i * 32 + b; + assert(n < slab->count); + slab->free--; + slab->bits[i] &= ~(1 << b); + return n; + } + } + return -1; +} + +static INLINE void +mm_slab_free(struct mm_slab *slab, int i) +{ + assert(i < slab->count); + slab->bits[i / 32] |= 1 << (i % 32); + slab->free++; + assert(slab->free <= slab->count); +} + +static INLINE int +mm_get_order(uint32_t size) +{ + int s = __builtin_clz(size) ^ 31; + + if (size > (1 << s)) + s += 1; + return s; +} + +static struct mm_bucket * +mm_bucket_by_order(struct nvc0_mman *cache, int order) +{ + if (order > MM_MAX_ORDER) + return NULL; + return &cache->bucket[MAX2(order, MM_MIN_ORDER) - MM_MIN_ORDER]; +} + +static struct mm_bucket * +mm_bucket_by_size(struct nvc0_mman *cache, unsigned size) +{ + return mm_bucket_by_order(cache, mm_get_order(size)); +} + +/* size of bo allocation for slab with chunks of (1 << chunk_order) bytes */ +static INLINE uint32_t +mm_default_slab_size(unsigned chunk_order) +{ + assert(chunk_order <= MM_MAX_ORDER && chunk_order >= MM_MIN_ORDER); + + static const int8_t slab_order[MM_MAX_ORDER - MM_MIN_ORDER + 1] = + { + 12, 12, 13, 14, 14, 17, 17, 17, 17, 19, 19, 20, 21, 22 + }; + + return 1 << slab_order[chunk_order - MM_MIN_ORDER]; +} + +static int +mm_slab_new(struct nvc0_mman *cache, int chunk_order) +{ + struct mm_slab *slab; + int words, ret; + const uint32_t size = mm_default_slab_size(chunk_order); + + words = ((size >> chunk_order) + 31) / 32; + assert(words); + + slab = MALLOC(sizeof(struct mm_slab) + words * 4); + if (!slab) + return PIPE_ERROR_OUT_OF_MEMORY; + + memset(&slab->bits[0], ~0, words * 4); + + slab->bo = NULL; + ret = nouveau_bo_new_tile(cache->dev, cache->domain, 0, size, + 0, cache->storage_type, &slab->bo); + if (ret) { + FREE(slab); + return PIPE_ERROR_OUT_OF_MEMORY; + } + + LIST_INITHEAD(&slab->head); + + slab->cache = cache; + slab->order = chunk_order; + slab->count = slab->free = size >> chunk_order; + + LIST_ADD(&slab->head, &mm_bucket_by_order(cache, chunk_order)->free); + + cache->allocated += size; + + debug_printf("MM: new slab, total memory = %lu KiB\n", + cache->allocated / 1024); + + return PIPE_OK; +} + +/* @return token to identify slab or NULL if we just allocated a new bo */ +struct nvc0_mm_allocation * +nvc0_mm_allocate(struct nvc0_mman *cache, + uint32_t size, struct nouveau_bo **bo, uint32_t *offset) +{ + struct mm_bucket *bucket; + struct mm_slab *slab; + struct nvc0_mm_allocation *alloc; + int ret; + + bucket = mm_bucket_by_size(cache, size); + if (!bucket) { + ret = nouveau_bo_new_tile(cache->dev, cache->domain, 0, size, + 0, cache->storage_type, bo); + if (ret) + debug_printf("bo_new(%x, %x): %i\n", size, cache->storage_type, ret); + + *offset = 0; + return NULL; + } + + if (!LIST_IS_EMPTY(&bucket->used)) { + slab = LIST_ENTRY(struct mm_slab, bucket->used.next, head); + } else { + if (LIST_IS_EMPTY(&bucket->free)) { + mm_slab_new(cache, MAX2(mm_get_order(size), MM_MIN_ORDER)); + } + slab = LIST_ENTRY(struct mm_slab, bucket->free.next, head); + + LIST_DEL(&slab->head); + LIST_ADD(&slab->head, &bucket->used); + } + + *offset = mm_slab_alloc(slab) << slab->order; + + alloc = MALLOC_STRUCT(nvc0_mm_allocation); + if (!alloc) + return NULL; + + nouveau_bo_ref(slab->bo, bo); + + if (slab->free == 0) { + LIST_DEL(&slab->head); + LIST_ADD(&slab->head, &bucket->full); + } + + alloc->next = NULL; + alloc->offset = *offset; + alloc->priv = (void *)slab; + + return alloc; +} + +void +nvc0_mm_free(struct nvc0_mm_allocation *alloc) +{ + struct mm_slab *slab = (struct mm_slab *)alloc->priv; + struct mm_bucket *bucket = mm_bucket_by_order(slab->cache, slab->order); + + mm_slab_free(slab, alloc->offset >> slab->order); + + if (slab->free == 1) { + LIST_DEL(&slab->head); + + if (slab->count > 1) + LIST_ADDTAIL(&slab->head, &bucket->used); + else + LIST_ADDTAIL(&slab->head, &bucket->free); + } + + FREE(alloc); +} + +struct nvc0_mman * +nvc0_mm_create(struct nouveau_device *dev, uint32_t domain, + uint32_t storage_type) +{ + struct nvc0_mman *cache = MALLOC_STRUCT(nvc0_mman); + int i; + + if (!cache) + return NULL; + + cache->dev = dev; + cache->domain = domain; + cache->storage_type = storage_type; + cache->allocated = 0; + + for (i = 0; i < MM_NUM_BUCKETS; ++i) { + LIST_INITHEAD(&cache->bucket[i].free); + LIST_INITHEAD(&cache->bucket[i].used); + LIST_INITHEAD(&cache->bucket[i].full); + } + + return cache; +} + diff --git a/src/gallium/drivers/nvc0/nvc0_push.c b/src/gallium/drivers/nvc0/nvc0_push.c index 8b8fe610e2..1bdc8e88a7 100644 --- a/src/gallium/drivers/nvc0/nvc0_push.c +++ b/src/gallium/drivers/nvc0/nvc0_push.c @@ -215,9 +215,7 @@ nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info) struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[i]; struct nvc0_resource *res = nvc0_resource(vb->buffer); - if (nouveau_bo_map(res->bo, NOUVEAU_BO_RD)) - return; - data = (uint8_t *)res->bo->map + vb->buffer_offset; + data = nvc0_resource_map_offset(res, vb->buffer_offset, NOUVEAU_BO_RD); if (info->indexed) data += info->index_bias * vb->stride; diff --git a/src/gallium/drivers/nvc0/nvc0_resource.h b/src/gallium/drivers/nvc0/nvc0_resource.h index 9c6895ea81..b9f3f7b5d8 100644 --- a/src/gallium/drivers/nvc0/nvc0_resource.h +++ b/src/gallium/drivers/nvc0/nvc0_resource.h @@ -29,10 +29,43 @@ struct nvc0_resource { uint8_t status; uint8_t domain; + + int16_t score; /* low if mapped very often, if high can move to VRAM */ + struct nvc0_fence *fence; - struct list_head list; + struct nvc0_fence *fence_wr; + + struct nvc0_mm_allocation *mm; }; +/* XXX: wait for fence (atm only using this for vertex push) */ +static INLINE void * +nvc0_resource_map_offset(struct nvc0_resource *res, uint32_t offset, + uint32_t flags) +{ + void *map; + + if (res->domain == 0) + return res->data + offset; + + if (nouveau_bo_map_range(res->bo, res->offset + offset, + res->base.width0, flags | NOUVEAU_BO_NOSYNC)) + return NULL; + + /* With suballocation, the same bo can be mapped several times, so unmap + * immediately. Maps are guaranteed to persist. */ + map = res->bo->map; + nouveau_bo_unmap(res->bo); + return map; +} + +static INLINE void +nvc0_resource_unmap(struct nvc0_resource *res) +{ + if (res->domain != 0 && 0) + nouveau_bo_unmap(res->bo); +} + #define NVC0_TILE_H(m) (8 << ((m >> 4) & 0xf)) #define NVC0_TILE_D(m) (1 << (m >> 8)) @@ -67,7 +100,7 @@ nvc0_resource(struct pipe_resource *resource) static INLINE boolean nvc0_resource_mapped_by_gpu(struct pipe_resource *resource) { - return nvc0_resource(resource)->bo->offset != 0ULL; + return nvc0_resource(resource)->domain != 0; } void @@ -106,4 +139,13 @@ nvc0_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_resource *pt, void nvc0_miptree_surface_del(struct pipe_surface *ps); +struct nvc0_context; + +boolean +nvc0_buffer_migrate(struct nvc0_context *, + struct nvc0_resource *, unsigned domain); + +boolean +nvc0_migrate_vertices(struct nvc0_resource *buf, unsigned base, unsigned size); + #endif diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c index 616a990337..107d50d7f2 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nvc0/nvc0_screen.c @@ -229,12 +229,28 @@ nvc0_graph_set_macro(struct nvc0_screen *screen, uint32_t m, unsigned pos, return pos + size; } +static void +nvc0_screen_fence_reference(struct pipe_screen *pscreen, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence) +{ + nvc0_fence_reference((struct nvc0_fence **)ptr, nvc0_fence(fence)); +} + +static int +nvc0_screen_fence_signalled(struct pipe_screen *pscreen, + struct pipe_fence_handle *fence, + unsigned flags) +{ + return !(((struct nvc0_fence *)fence)->state == NVC0_FENCE_STATE_SIGNALLED); +} + static int nvc0_screen_fence_finish(struct pipe_screen *pscreen, - struct pipe_fence_handle *pfence, + struct pipe_fence_handle *fence, unsigned flags) { - return nvc0_fence_wait((struct nvc0_fence *)pfence) != TRUE; + return nvc0_fence_wait((struct nvc0_fence *)fence) != TRUE; } static void @@ -339,6 +355,8 @@ nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) pscreen->get_param = nvc0_screen_get_param; pscreen->get_shader_param = nvc0_screen_get_shader_param; pscreen->get_paramf = nvc0_screen_get_paramf; + pscreen->fence_reference = nvc0_screen_fence_reference; + pscreen->fence_signalled = nvc0_screen_fence_signalled; pscreen->fence_finish = nvc0_screen_fence_finish; nvc0_screen_init_resource_functions(pscreen); @@ -353,6 +371,18 @@ nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) screen->fence.map = screen->fence.bo->map; nouveau_bo_unmap(screen->fence.bo); + for (i = 0; i < NVC0_SCRATCH_NR_BUFFERS; ++i) { + ret = nouveau_bo_new(dev, NOUVEAU_BO_GART, 0, NVC0_SCRATCH_SIZE, + &screen->scratch.bo[i]); + if (ret) + goto fail; + } + + for (i = 0; i < 8; ++i) { + BEGIN_RING(chan, (i << 13) | (0x0000 >> 2), 1); + OUT_RING (chan, 0x0000); + } + BEGIN_RING(chan, RING_MF_(0x0000), 1); OUT_RING (chan, 0x9039); BEGIN_RING(chan, RING_MF(NOTIFY_ADDRESS_HIGH), 3); @@ -510,15 +540,11 @@ nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) BEGIN_RING(chan, RING_3D(RASTERIZE_ENABLE), 1); OUT_RING (chan, 1); - // BEGIN_RING(chan, RING_3D(GP_SELECT), 1); - // OUT_RING (chan, 0x40); - BEGIN_RING(chan, RING_3D(SP_SELECT(4)), 1); + BEGIN_RING(chan, RING_3D(GP_SELECT), 1); OUT_RING (chan, 0x40); BEGIN_RING(chan, RING_3D(GP_BUILTIN_RESULT_EN), 1); OUT_RING (chan, 0); - // BEGIN_RING(chan, RING_3D(TEP_SELECT), 1); - // OUT_RING (chan, 0x30); - BEGIN_RING(chan, RING_3D(SP_SELECT(3)), 1); + BEGIN_RING(chan, RING_3D(TEP_SELECT), 1); OUT_RING (chan, 0x30); BEGIN_RING(chan, RING_3D(PATCH_VERTICES), 1); OUT_RING (chan, 3); @@ -538,18 +564,19 @@ nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) BEGIN_RING(chan, RING_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2); OUT_RING (chan, 0xab); OUT_RING (chan, 0x00000000); - BEGIN_RING(chan, RING_3D_(0x07e8), 2); - OUT_RING (chan, 0xac); - OUT_RING (chan, 0x00000000); - BEGIN_RING(chan, RING_3D_(0x07f0), 2); - OUT_RING (chan, 0xac); - OUT_RING (chan, 0x00000000); FIRE_RING (chan); screen->tic.entries = CALLOC(4096, sizeof(void *)); screen->tsc.entries = screen->tic.entries + 2048; + screen->mm_GART = nvc0_mm_create(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, + 0x000); + screen->mm_VRAM = nvc0_mm_create(dev, NOUVEAU_BO_VRAM, 0x000); + screen->mm_VRAM_fe0 = nvc0_mm_create(dev, NOUVEAU_BO_VRAM, 0xfe0); + + nvc0_screen_fence_new(screen, &screen->fence.current, FALSE); + return pscreen; fail: diff --git a/src/gallium/drivers/nvc0/nvc0_screen.h b/src/gallium/drivers/nvc0/nvc0_screen.h index 989e183bde..12cea658a5 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.h +++ b/src/gallium/drivers/nvc0/nvc0_screen.h @@ -10,9 +10,13 @@ #define NVC0_TIC_MAX_ENTRIES 2048 #define NVC0_TSC_MAX_ENTRIES 2048 +struct nvc0_mman; struct nvc0_context; struct nvc0_fence; +#define NVC0_SCRATCH_SIZE (2 << 20) +#define NVC0_SCRATCH_NR_BUFFERS 2 + struct nvc0_screen { struct nouveau_screen base; struct nouveau_winsys *nvws; @@ -29,6 +33,13 @@ struct nvc0_screen { struct nouveau_resource *text_heap; + struct { + struct nouveau_bo *bo[NVC0_SCRATCH_NR_BUFFERS]; + uint8_t *buf; + int index; + uint32_t offset; + } scratch; + struct { void **entries; int next; @@ -50,6 +61,10 @@ struct nvc0_screen { uint32_t sequence_ack; struct nouveau_bo *bo; } fence; + + struct nvc0_mman *mm_GART; + struct nvc0_mman *mm_VRAM; + struct nvc0_mman *mm_VRAM_fe0; }; static INLINE struct nvc0_screen * @@ -58,14 +73,60 @@ nvc0_screen(struct pipe_screen *screen) return (struct nvc0_screen *)screen; } +/* Since a resource can be migrated, we need to decouple allocations from + * them. This struct is linked with fences for delayed freeing of allocs. + */ +struct nvc0_mm_allocation { + struct nvc0_mm_allocation *next; + void *priv; + uint32_t offset; +}; + +extern struct nvc0_mman * +nvc0_mm_create(struct nouveau_device *, uint32_t domain, uint32_t storage_type); + +extern struct nvc0_mm_allocation * +nvc0_mm_allocate(struct nvc0_mman *, + uint32_t size, struct nouveau_bo **, uint32_t *offset); +extern void +nvc0_mm_free(struct nvc0_mm_allocation *); + void nvc0_screen_make_buffers_resident(struct nvc0_screen *); int nvc0_screen_tic_alloc(struct nvc0_screen *, void *); int nvc0_screen_tsc_alloc(struct nvc0_screen *, void *); +static INLINE void +nvc0_resource_validate(struct nvc0_resource *res, uint32_t flags) +{ + struct nvc0_screen *screen = nvc0_screen(res->base.screen); + + assert(res->mm); + + nvc0_fence_reference(&res->fence, screen->fence.current); + + if (flags & NOUVEAU_BO_WR) + nvc0_fence_reference(&res->fence_wr, screen->fence.current); + + nouveau_reloc_emit(screen->base.channel, + NULL, 0, NULL, res->bo, 0, 0, NOUVEAU_BO_RDWR, 0, 0); +} + + boolean nvc0_screen_fence_new(struct nvc0_screen *, struct nvc0_fence **, boolean emit); +void +nvc0_screen_fence_next(struct nvc0_screen *); + +static INLINE boolean +nvc0_screen_fence_emit(struct nvc0_screen *screen) +{ + nvc0_fence_emit(screen->fence.current); + + return nvc0_screen_fence_new(screen, &screen->fence.current, FALSE); +} + struct nvc0_format { uint32_t rt; uint32_t tic; diff --git a/src/gallium/drivers/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nvc0/nvc0_shader_state.c index 932848ea94..25a9ef722b 100644 --- a/src/gallium/drivers/nvc0/nvc0_shader_state.c +++ b/src/gallium/drivers/nvc0/nvc0_shader_state.c @@ -76,10 +76,10 @@ nvc0_vertprog_validate(struct nvc0_context *nvc0) BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(1)), 1); OUT_RING (chan, vp->max_gpr); - BEGIN_RING(chan, RING_3D_(0x163c), 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, RING_3D_(0x2600), 1); - OUT_RING (chan, 1); + // BEGIN_RING(chan, RING_3D_(0x163c), 1); + // OUT_RING (chan, 0); + // BEGIN_RING(chan, RING_3D_(0x2600), 1); + // OUT_RING (chan, 1); } void diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index 1b161f66dc..a6797db9c5 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -22,6 +22,8 @@ #include +#define NOUVEAU_DEBUG 1 + #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" @@ -194,7 +196,7 @@ static INLINE void bld_warn_uninitialized(struct bld_context *bld, int kind, struct bld_register *reg, struct nv_basic_block *b) { -#ifdef NOUVEAU_DEBUG_BITS +#ifdef NOUVEAU_DEBUG long i = (reg - &bld->tvs[0][0]) / 4; long c = (reg - &bld->tvs[0][0]) & 3; @@ -1359,7 +1361,7 @@ bld_instruction(struct bld_context *bld, uint opcode = translate_opcode(insn->Instruction.Opcode); uint8_t mask = insn->Dst[0].Register.WriteMask; -#ifdef NOUVEAU_DEBUG_BITS +#ifdef NOUVEAU_DEBUG debug_printf("bld_instruction:"); tgsi_dump_instruction(insn, 1); #endif diff --git a/src/gallium/drivers/nvc0/nvc0_transfer.c b/src/gallium/drivers/nvc0/nvc0_transfer.c index adf9192943..56c5fe12c2 100644 --- a/src/gallium/drivers/nvc0/nvc0_transfer.c +++ b/src/gallium/drivers/nvc0/nvc0_transfer.c @@ -111,6 +111,8 @@ nvc0_m2mf_push_linear(struct nvc0_context *nvc0, uint32_t *src = (uint32_t *)data; unsigned count = (size + 3) / 4; + MARK_RING (chan, 8, 2); + BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2); OUT_RELOCh(chan, dst, offset, domain | NOUVEAU_BO_WR); OUT_RELOCl(chan, dst, offset, domain | NOUVEAU_BO_WR); @@ -125,6 +127,7 @@ nvc0_m2mf_push_linear(struct nvc0_context *nvc0, if (nr < 9) { FIRE_RING(chan); + nvc0_make_bo_resident(nvc0, dst, NOUVEAU_BO_WR); continue; } nr = MIN2(count, nr - 1); @@ -138,53 +141,90 @@ nvc0_m2mf_push_linear(struct nvc0_context *nvc0, } } +void +nvc0_m2mf_copy_linear(struct nvc0_context *nvc0, + struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom, + struct nouveau_bo *src, unsigned srcoff, unsigned srcdom, + unsigned size) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + + while (size) { + unsigned bytes = MIN2(size, 1 << 17); + + MARK_RING (chan, 11, 4); + + BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2); + OUT_RELOCh(chan, dst, dstoff, dstdom | NOUVEAU_BO_WR); + OUT_RELOCl(chan, dst, dstoff, dstdom | NOUVEAU_BO_WR); + BEGIN_RING(chan, RING_MF(OFFSET_IN_HIGH), 2); + OUT_RELOCh(chan, src, srcoff, srcdom | NOUVEAU_BO_RD); + OUT_RELOCl(chan, src, srcoff, srcdom | NOUVEAU_BO_RD); + BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2); + OUT_RING (chan, bytes); + OUT_RING (chan, 1); + BEGIN_RING(chan, RING_MF(EXEC), 1); + OUT_RING (chan, (1 << NVC0_M2MF_EXEC_INC__SHIFT) | + NVC0_M2MF_EXEC_LINEAR_IN | NVC0_M2MF_EXEC_LINEAR_OUT); + + srcoff += bytes; + dstoff += bytes; + size -= bytes; + } +} + static void -nvc0_sifc_push_rect(struct pipe_screen *pscreen, - const struct nvc0_m2mf_rect *dst, unsigned dst_format, - unsigned src_format, unsigned src_pitch, void *src, +nvc0_m2mf_push_rect(struct pipe_screen *pscreen, + const struct nvc0_m2mf_rect *dst, + const void *data, unsigned nblocksx, unsigned nblocksy) { struct nouveau_channel *chan; + const uint8_t *src = (const uint8_t *)data; + const int cpp = dst->cpp; + const int line_len = nblocksx * cpp; + int dy = dst->y; - if (dst->bo->tile_flags) { - BEGIN_RING(chan, RING_2D(DST_FORMAT), 5); - OUT_RING (chan, dst_format); - OUT_RING (chan, 0); - OUT_RING (chan, dst->tile_mode); - OUT_RING (chan, 1); - OUT_RING (chan, 0); - } else { - BEGIN_RING(chan, RING_2D(DST_FORMAT), 2); - OUT_RING (chan, NV50_SURFACE_FORMAT_A8R8G8B8_UNORM); - OUT_RING (chan, 1); - BEGIN_RING(chan, RING_2D(DST_PITCH), 1); - OUT_RING (chan, dst->pitch); - } + assert(dst->bo->tile_flags); - BEGIN_RING(chan, RING_2D(DST_WIDTH), 4); - OUT_RING (chan, dst->width); + BEGIN_RING(chan, RING_MF(TILING_MODE_OUT), 5); + OUT_RING (chan, dst->tile_mode); + OUT_RING (chan, dst->width * cpp); OUT_RING (chan, dst->height); - OUT_RELOCh(chan, dst->bo, dst->base, dst->domain | NOUVEAU_BO_WR); - OUT_RELOCl(chan, dst->bo, dst->base, dst->domain | NOUVEAU_BO_WR); - - BEGIN_RING(chan, RING_2D(SIFC_BITMAP_ENABLE), 2); - OUT_RING (chan, 0); - OUT_RING (chan, src_format); - BEGIN_RING(chan, RING_2D(SIFC_WIDTH), 10); - OUT_RING (chan, nblocksx); - OUT_RING (chan, nblocksy); - OUT_RING (chan, 0); - OUT_RING (chan, 1); - OUT_RING (chan, 0); - OUT_RING (chan, 1); - OUT_RING (chan, 0); - OUT_RING (chan, dst->x); - OUT_RING (chan, 0); - OUT_RING (chan, dst->y); + OUT_RING (chan, dst->depth); + OUT_RING (chan, dst->z); while (nblocksy) { + int line_count, words; + int size = MIN2(AVAIL_RING(chan), NV04_PFIFO_MAX_PACKET_LEN); - src = (uint8_t *)src + src_pitch; + if (size < (12 + words)) { + FIRE_RING(chan); + continue; + } + line_count = (size * 4) / line_len; + words = (line_count * line_len + 3) / 4; + + BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2); + OUT_RELOCh(chan, dst->bo, dst->base, dst->domain | NOUVEAU_BO_WR); + OUT_RELOCl(chan, dst->bo, dst->base, dst->domain | NOUVEAU_BO_WR); + + BEGIN_RING(chan, RING_MF(TILING_POSITION_OUT_X), 2); + OUT_RING (chan, dst->x * cpp); + OUT_RING (chan, dy); + BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2); + OUT_RING (chan, line_len); + OUT_RING (chan, line_count); + BEGIN_RING(chan, RING_MF(EXEC), 1); + OUT_RING (chan, (1 << NVC0_M2MF_EXEC_INC__SHIFT) | + NVC0_M2MF_EXEC_PUSH | NVC0_M2MF_EXEC_LINEAR_IN); + + BEGIN_RING(chan, RING_MF(DATA), words); + OUT_RINGp (chan, src, words); + + dy += line_count; + src += line_len * line_count; + nblocksy -= line_count; } } @@ -242,6 +282,11 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx, tx->rect[0].pitch = lvl->pitch; tx->rect[0].domain = NOUVEAU_BO_VRAM; + if (!(usage & PIPE_TRANSFER_READ) && + (res->depth0 == 1) && (tx->nblocksy * tx->base.stride < 512 * 4)) { + /* don't allocate scratch buffer, upload through FIFO */ + } + ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, tx->nblocksy * tx->base.stride, &tx->rect[1].bo); if (ret) { diff --git a/src/gallium/drivers/nvc0/nvc0_vbo.c b/src/gallium/drivers/nvc0/nvc0_vbo.c index b8529e632d..84951ed945 100644 --- a/src/gallium/drivers/nvc0/nvc0_vbo.c +++ b/src/gallium/drivers/nvc0/nvc0_vbo.c @@ -131,8 +131,16 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0) ve = &vertex->element[i]; vb = &nvc0->vtxbuf[ve->pipe.vertex_buffer_index]; - if (!nvc0_resource_mapped_by_gpu(vb->buffer)) - nvc0->vbo_fifo |= 1 << i; + if (!nvc0_resource_mapped_by_gpu(vb->buffer)) { + if (nvc0->vbo_push_hint) { + nvc0->vbo_fifo |= 1 << i; + } else { + nvc0_migrate_vertices(nvc0_resource(vb->buffer), + vb->buffer_offset, + vb->buffer->width0 - vb->buffer_offset); + nvc0->vbo_dirty = TRUE; + } + } if (1 || likely(vb->stride)) { OUT_RING(chan, ve->state); @@ -142,7 +150,7 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0) } for (i = 0; i < vertex->num_elements; ++i) { - struct nouveau_bo *bo; + struct nvc0_resource *res; unsigned size, offset; ve = &vertex->element[i]; @@ -158,7 +166,7 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0) continue; } - bo = nvc0_resource(vb->buffer)->bo; + res = nvc0_resource(vb->buffer); size = vb->buffer->width0; offset = ve->pipe.src_offset + vb->buffer_offset; @@ -173,17 +181,16 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0) INLIN_RING(chan, RING_3D(VERTEX_ARRAY_PER_INSTANCE(i)), 0); } - nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_VERTEX, - nvc0_resource(vb->buffer), NOUVEAU_BO_RD); + nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_VERTEX, res, NOUVEAU_BO_RD); BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FETCH(i)), 1); OUT_RING (chan, (1 << 12) | vb->stride); BEGIN_RING_1I(chan, RING_3D(VERTEX_ARRAY_SELECT), 5); OUT_RING (chan, i); - OUT_RELOCh(chan, bo, size, NOUVEAU_BO_GART | NOUVEAU_BO_RD); - OUT_RELOCl(chan, bo, size, NOUVEAU_BO_GART | NOUVEAU_BO_RD); - OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD); - OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RESRCh(chan, res, size, NOUVEAU_BO_RD); + OUT_RESRCl(chan, res, size, NOUVEAU_BO_RD); + OUT_RESRCh(chan, res, offset, NOUVEAU_BO_RD); + OUT_RESRCl(chan, res, offset, NOUVEAU_BO_RD); } for (; i < nvc0->state.num_vtxelts; ++i) { BEGIN_RING(chan, RING_3D(VERTEX_ATTRIB_FORMAT(i)), 1); @@ -231,8 +238,6 @@ nvc0_draw_vbo_flush_notify(struct nouveau_channel *chan) struct nvc0_context *nvc0 = chan->user_private; nvc0_bufctx_emit_relocs(nvc0); - - debug_printf("%s(%p)\n", __FUNCTION__, nvc0); } #if 0 @@ -325,7 +330,7 @@ nvc0_draw_elements_inline_u08(struct nouveau_channel *chan, uint8_t *map, count &= ~3; } while (count) { - unsigned i, nr = MIN2(count, (NV04_PFIFO_MAX_PACKET_LEN & ~3) * 4) / 4; + unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 4) / 4; BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U8), nr); for (i = 0; i < nr; ++i) { @@ -333,7 +338,7 @@ nvc0_draw_elements_inline_u08(struct nouveau_channel *chan, uint8_t *map, (map[3] << 24) | (map[2] << 16) | (map[1] << 8) | map[0]); map += 4; } - count -= nr; + count -= nr * 4; } } @@ -349,14 +354,14 @@ nvc0_draw_elements_inline_u16(struct nouveau_channel *chan, uint16_t *map, OUT_RING (chan, *map++); } while (count) { - unsigned i, nr = MIN2(count, (NV04_PFIFO_MAX_PACKET_LEN & ~1) * 2) / 2; + unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 2) / 2; BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U16), nr); for (i = 0; i < nr; ++i) { OUT_RING(chan, (map[1] << 16) | map[0]); map += 2; } - count -= nr; + count -= nr * 2; } } @@ -367,18 +372,41 @@ nvc0_draw_elements_inline_u32(struct nouveau_channel *chan, uint32_t *map, map += start; while (count) { - unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN); + const unsigned nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN); BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U32), nr); - for (i = 0; i < nr; ++i) - OUT_RING(chan, *map++); + OUT_RINGp (chan, map, nr); + map += nr; count -= nr; } } static void -nvc0_draw_elements(struct nvc0_context *nvc0, +nvc0_draw_elements_inline_u32_short(struct nouveau_channel *chan, uint32_t *map, + unsigned start, unsigned count) +{ + map += start; + + if (count & 1) { + count--; + BEGIN_RING(chan, RING_3D(VB_ELEMENT_U32), 1); + OUT_RING (chan, *map++); + } + while (count) { + unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 2) / 2; + + BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U16), nr); + for (i = 0; i < nr; ++i) { + OUT_RING(chan, (map[1] << 16) | map[0]); + map += 2; + } + count -= nr * 2; + } +} + +static void +nvc0_draw_elements(struct nvc0_context *nvc0, boolean shorten, unsigned mode, unsigned start, unsigned count, unsigned instance_count, int32_t index_bias) { @@ -400,7 +428,7 @@ nvc0_draw_elements(struct nvc0_context *nvc0, } if (nvc0_resource_mapped_by_gpu(nvc0->idxbuf.buffer)) { - struct nouveau_bo *bo = nvc0_resource(nvc0->idxbuf.buffer)->bo; + struct nvc0_resource *res = nvc0_resource(nvc0->idxbuf.buffer); unsigned offset = nvc0->idxbuf.offset; unsigned limit = nvc0->idxbuf.buffer->width0 - 1; @@ -415,10 +443,10 @@ nvc0_draw_elements(struct nvc0_context *nvc0, BEGIN_RING(chan, RING_3D(VERTEX_BEGIN_GL), 1); OUT_RING (chan, mode); BEGIN_RING(chan, RING_3D(INDEX_ARRAY_START_HIGH), 7); - OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART); - OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART); - OUT_RELOCh(chan, bo, limit, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART); - OUT_RELOCl(chan, bo, limit, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART); + OUT_RESRCh(chan, res, offset, NOUVEAU_BO_RD); + OUT_RESRCl(chan, res, offset, NOUVEAU_BO_RD); + OUT_RESRCh(chan, res, limit, NOUVEAU_BO_RD); + OUT_RESRCl(chan, res, limit, NOUVEAU_BO_RD); OUT_RING (chan, index_size); OUT_RING (chan, start); OUT_RING (chan, count); @@ -443,7 +471,10 @@ nvc0_draw_elements(struct nvc0_context *nvc0, nvc0_draw_elements_inline_u16(chan, data, start, count); break; case 4: - nvc0_draw_elements_inline_u32(chan, data, start, count); + if (shorten) + nvc0_draw_elements_inline_u32_short(chan, data, start, count); + else + nvc0_draw_elements_inline_u32(chan, data, start, count); break; default: assert(0); @@ -464,6 +495,13 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) struct nvc0_context *nvc0 = nvc0_context(pipe); struct nouveau_channel *chan = nvc0->screen->base.channel; + /* For picking only a few vertices from a large user buffer, push is better, + * if index count is larger and we expect repeated vertices, suggest upload. + */ + nvc0->vbo_push_hint = /* the 64 is heuristic */ + !(info->indexed && + ((info->max_index - info->min_index + 64) < info->count)); + nvc0_state_validate(nvc0); if (nvc0->state.instance_base != info->start_instance) { @@ -488,6 +526,8 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) info->mode, info->start, info->count, info->instance_count); } else { + boolean shorten = info->max_index <= 65535; + assert(nvc0->idxbuf.buffer); if (info->primitive_restart != nvc0->state.prim_restart) { @@ -495,6 +535,9 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) BEGIN_RING(chan, RING_3D(PRIM_RESTART_ENABLE), 2); OUT_RING (chan, 1); OUT_RING (chan, info->restart_index); + + if (info->restart_index > 65535) + shorten = FALSE; } else { INLIN_RING(chan, RING_3D(PRIM_RESTART_ENABLE), 0); } @@ -505,7 +548,7 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) OUT_RING (chan, info->restart_index); } - nvc0_draw_elements(nvc0, + nvc0_draw_elements(nvc0, shorten, info->mode, info->start, info->count, info->instance_count, info->index_bias); } diff --git a/src/gallium/drivers/nvc0/nvc0_winsys.h b/src/gallium/drivers/nvc0/nvc0_winsys.h index 48ea87613b..e97ca8e90d 100644 --- a/src/gallium/drivers/nvc0/nvc0_winsys.h +++ b/src/gallium/drivers/nvc0/nvc0_winsys.h @@ -12,6 +12,8 @@ #include "nouveau/nouveau_resource.h" #include "nouveau/nouveau_reloc.h" +#include "nvc0_resource.h" /* OUT_RESRC */ + #ifndef NV04_PFIFO_MAX_PACKET_LEN #define NV04_PFIFO_MAX_PACKET_LEN 2047 #endif @@ -143,6 +145,20 @@ OUT_RELOCh(struct nouveau_channel *chan, struct nouveau_bo *bo, return OUT_RELOC(chan, bo, delta, flags | NOUVEAU_BO_HIGH, 0, 0); } +static INLINE int +OUT_RESRCh(struct nouveau_channel *chan, struct nvc0_resource *res, + unsigned delta, unsigned flags) +{ + return OUT_RELOCh(chan, res->bo, res->offset + delta, res->domain | flags); +} + +static INLINE int +OUT_RESRCl(struct nouveau_channel *chan, struct nvc0_resource *res, + unsigned delta, unsigned flags) +{ + return OUT_RELOCl(chan, res->bo, res->offset + delta, res->domain | flags); +} + static INLINE void FIRE_RING(struct nouveau_channel *chan) { -- cgit v1.2.3 From ca5deb0c355cc4a120b754a228ff5f51007fbcea Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sun, 19 Dec 2010 21:48:39 +0100 Subject: nvc0: adapt to array textures interface change --- src/gallium/drivers/nvc0/nvc0_context.h | 6 +- src/gallium/drivers/nvc0/nvc0_miptree.c | 137 +++++++++++----------- src/gallium/drivers/nvc0/nvc0_resource.c | 6 +- src/gallium/drivers/nvc0/nvc0_resource.h | 23 ++-- src/gallium/drivers/nvc0/nvc0_state_validate.c | 33 +++--- src/gallium/drivers/nvc0/nvc0_surface.c | 156 ++++++++++++++----------- src/gallium/drivers/nvc0/nvc0_tex.c | 21 +++- src/gallium/drivers/nvc0/nvc0_transfer.c | 68 ++++++----- src/gallium/drivers/nvc0/nvc0_transfer.h | 2 +- 9 files changed, 251 insertions(+), 201 deletions(-) (limited to 'src/gallium/drivers/nvc0/nvc0_resource.h') diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h index 0ea18d74ee..df8a6da075 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nvc0/nvc0_context.h @@ -130,7 +130,11 @@ nvc0_context(struct pipe_context *pipe) } struct nvc0_surface { - struct pipe_surface pipe; + struct pipe_surface base; + uint32_t offset; + uint32_t width; + uint16_t height; + uint16_t depth; }; static INLINE struct nvc0_surface * diff --git a/src/gallium/drivers/nvc0/nvc0_miptree.c b/src/gallium/drivers/nvc0/nvc0_miptree.c index cca307b37f..70b10a0fc3 100644 --- a/src/gallium/drivers/nvc0/nvc0_miptree.c +++ b/src/gallium/drivers/nvc0/nvc0_miptree.c @@ -60,12 +60,12 @@ get_tile_dims(unsigned nx, unsigned ny, unsigned nz) static INLINE unsigned get_zslice_offset(uint32_t tile_mode, unsigned z, unsigned pitch, unsigned nbh) { - unsigned tile_h = NVC0_TILE_H(tile_mode); - unsigned tile_d = NVC0_TILE_D(tile_mode); + unsigned tile_h = NVC0_TILE_HEIGHT(tile_mode); + unsigned tile_d = NVC0_TILE_DEPTH(tile_mode); /* pitch_2d == to next slice within this volume tile */ /* pitch_3d == size (in bytes) of a volume tile */ - unsigned pitch_2d = tile_h * 64; + unsigned pitch_2d = tile_h * NVC0_TILE_PITCH(tile_mode); unsigned pitch_3d = tile_d * align(nbh, tile_h) * pitch; return (z % tile_d) * pitch_2d + (z / tile_d) * pitch_3d; @@ -75,10 +75,6 @@ static void nvc0_miptree_destroy(struct pipe_screen *pscreen, struct pipe_resource *pt) { struct nvc0_miptree *mt = nvc0_miptree(pt); - unsigned l; - - for (l = 0; l <= pt->last_level; ++l) - FREE(mt->level[l].image_offset); nouveau_screen_bo_release(pscreen, mt->base.bo); @@ -125,8 +121,8 @@ nvc0_miptree_create(struct pipe_screen *pscreen, struct nouveau_device *dev = nouveau_screen(pscreen)->device; struct nvc0_miptree *mt = CALLOC_STRUCT(nvc0_miptree); struct pipe_resource *pt = &mt->base.base; - int ret, i; - unsigned w, h, d, l, image_alignment, alloc_size; + int ret; + unsigned w, h, d, l, alloc_size; uint32_t tile_flags; if (!mt) @@ -137,9 +133,11 @@ nvc0_miptree_create(struct pipe_screen *pscreen, pipe_reference_init(&pt->reference, 1); pt->screen = pscreen; + mt->layout_3d = pt->target == PIPE_TEXTURE_3D; + w = pt->width0; h = pt->height0; - d = pt->depth0; + d = mt->layout_3d ? pt->depth0 : 1; switch (pt->format) { case PIPE_FORMAT_Z16_UNORM: @@ -180,47 +178,32 @@ nvc0_miptree_create(struct pipe_screen *pscreen, break; } - /* XXX: texture arrays */ - mt->image_nr = (pt->target == PIPE_TEXTURE_CUBE) ? 6 : 1; - - for (l = 0; l <= pt->last_level; l++) { + /* For 3D textures, a mipmap is spanned by all the layers, for array + * textures and cube maps, each layer contains its own mipmaps. + */ + for (l = 0; l <= pt->last_level; ++l) { struct nvc0_miptree_level *lvl = &mt->level[l]; + unsigned nbx = util_format_get_nblocksx(pt->format, w); unsigned nby = util_format_get_nblocksy(pt->format, h); + unsigned blocksize = util_format_get_blocksize(pt->format); + + lvl->offset = mt->total_size; + lvl->tile_mode = get_tile_dims(nbx, nby, d); + lvl->pitch = align(nbx * blocksize, NVC0_TILE_PITCH(lvl->tile_mode)); - lvl->image_offset = CALLOC(mt->image_nr, sizeof(int)); - lvl->pitch = align(util_format_get_stride(pt->format, w), 64); - lvl->tile_mode = get_tile_dims(w, nby, d); + mt->total_size += lvl->pitch * + align(nby, NVC0_TILE_HEIGHT(lvl->tile_mode)) * + align(d, NVC0_TILE_DEPTH(lvl->tile_mode)); w = u_minify(w, 1); h = u_minify(h, 1); d = u_minify(d, 1); } - image_alignment = NVC0_TILE_H(mt->level[0].tile_mode) * 64; - image_alignment *= NVC0_TILE_D(mt->level[0].tile_mode); - - /* NOTE the distinction between arrays of mip-mapped 2D textures and - * mip-mapped 3D textures. We can't use image_nr == depth for 3D mip. - */ - for (i = 0; i < mt->image_nr; i++) { - for (l = 0; l <= pt->last_level; l++) { - struct nvc0_miptree_level *lvl = &mt->level[l]; - int size; - unsigned tile_h = NVC0_TILE_H(lvl->tile_mode); - unsigned tile_d = NVC0_TILE_D(lvl->tile_mode); - - h = u_minify(pt->height0, l); - d = u_minify(pt->depth0, l); - - size = lvl->pitch; - size *= align(util_format_get_nblocksy(pt->format, h), tile_h); - size *= align(d, tile_d); - - lvl->image_offset[i] = mt->total_size; - - mt->total_size += size; - } - mt->total_size = align(mt->total_size, image_alignment); + if (pt->array_size > 1) { + mt->layer_stride = align(mt->total_size, + NVC0_TILE_SIZE(mt->level[0].tile_mode)); + mt->total_size = mt->layer_stride * pt->array_size; } alloc_size = mt->total_size; @@ -231,11 +214,10 @@ nvc0_miptree_create(struct pipe_screen *pscreen, mt->level[0].tile_mode, tile_flags, &mt->base.bo); if (ret) { - for (l = 0; l <= pt->last_level; ++l) - FREE(mt->level[l].image_offset); FREE(mt); return NULL; } + mt->base.domain = NOUVEAU_BO_VRAM; return pt; } @@ -248,11 +230,12 @@ nvc0_miptree_from_handle(struct pipe_screen *pscreen, struct nvc0_miptree *mt; unsigned stride; - /* only supports 2D, non-mip mapped textures for the moment */ + /* only supports 2D, non-mipmapped textures for the moment */ if ((templ->target != PIPE_TEXTURE_2D && templ->target != PIPE_TEXTURE_RECT) || templ->last_level != 0 || - templ->depth0 != 1) + templ->depth0 != 1 || + templ->array_size > 1) return NULL; mt = CALLOC_STRUCT(nvc0_miptree); @@ -269,9 +252,8 @@ nvc0_miptree_from_handle(struct pipe_screen *pscreen, mt->base.vtbl = &nvc0_miptree_vtbl; pipe_reference_init(&mt->base.base.reference, 1); mt->base.base.screen = pscreen; - mt->image_nr = 1; mt->level[0].pitch = stride; - mt->level[0].image_offset = CALLOC(1, sizeof(unsigned)); + mt->level[0].offset = 0; mt->level[0].tile_mode = mt->base.bo->tile_mode; /* no need to adjust bo reference count */ @@ -283,41 +265,52 @@ nvc0_miptree_from_handle(struct pipe_screen *pscreen, */ struct pipe_surface * -nvc0_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_resource *pt, - unsigned face, unsigned level, unsigned zslice, - unsigned flags) +nvc0_miptree_surface_new(struct pipe_context *pipe, + struct pipe_resource *pt, + const struct pipe_surface *templ) { - struct nvc0_miptree *mt = nvc0_miptree(pt); - struct nvc0_miptree_level *lvl = &mt->level[level]; + struct nvc0_miptree *mt = nvc0_miptree(pt); /* guaranteed */ + struct nvc0_surface *ns; struct pipe_surface *ps; - unsigned img = 0; - - if (pt->target == PIPE_TEXTURE_CUBE) - img = face; + struct nvc0_miptree_level *lvl = &mt->level[templ->u.tex.level]; - ps = CALLOC_STRUCT(pipe_surface); - if (!ps) + ns = CALLOC_STRUCT(nvc0_surface); + if (!ns) return NULL; + ps = &ns->base; + + pipe_reference_init(&ps->reference, 1); pipe_resource_reference(&ps->texture, pt); + ps->context = pipe; ps->format = pt->format; - ps->width = u_minify(pt->width0, level); - ps->height = u_minify(pt->height0, level); - ps->usage = flags; - pipe_reference_init(&ps->reference, 1); - ps->face = face; - ps->level = level; - ps->zslice = zslice; - ps->offset = lvl->image_offset[img]; - - if (pt->target == PIPE_TEXTURE_3D) - ps->offset += get_zslice_offset(lvl->tile_mode, zslice, lvl->pitch, + ps->usage = templ->usage; + ps->u.tex.level = templ->u.tex.level; + ps->u.tex.first_layer = templ->u.tex.first_layer; + ps->u.tex.last_layer = templ->u.tex.last_layer; + + ns->width = u_minify(pt->width0, ps->u.tex.level); + ns->height = u_minify(pt->height0, ps->u.tex.level); + ns->depth = ps->u.tex.last_layer - ps->u.tex.first_layer + 1; + ns->offset = lvl->offset; + + /* comment says there are going to be removed, but they're used by the st */ + ps->width = ns->width; + ps->height = ns->height; + + if (mt->layout_3d) { + ns->offset += get_zslice_offset(lvl->tile_mode, ps->u.tex.first_layer, + lvl->pitch, util_format_get_nblocksy(pt->format, - ps->height)); + ns->height)); + } else { + ns->offset += mt->layer_stride * ps->u.tex.first_layer; + } + return ps; } void -nvc0_miptree_surface_del(struct pipe_surface *ps) +nvc0_miptree_surface_del(struct pipe_context *pipe, struct pipe_surface *ps) { struct nvc0_surface *s = nvc0_surface(ps); diff --git a/src/gallium/drivers/nvc0/nvc0_resource.c b/src/gallium/drivers/nvc0/nvc0_resource.c index 181d917f22..7e42cedd16 100644 --- a/src/gallium/drivers/nvc0/nvc0_resource.c +++ b/src/gallium/drivers/nvc0/nvc0_resource.c @@ -6,7 +6,7 @@ static unsigned nvc0_resource_is_referenced(struct pipe_context *pipe, struct pipe_resource *resource, - unsigned face, unsigned level) + unsigned face, int layer) { struct nvc0_resource *res = nvc0_resource(resource); unsigned flags = 0; @@ -56,6 +56,8 @@ nvc0_init_resource_functions(struct pipe_context *pcontext) pcontext->transfer_destroy = u_transfer_destroy_vtbl; pcontext->transfer_inline_write = u_transfer_inline_write_vtbl; pcontext->is_resource_referenced = nvc0_resource_is_referenced; + pcontext->create_surface = nvc0_miptree_surface_new; + pcontext->surface_destroy = nvc0_miptree_surface_del; } void @@ -66,6 +68,4 @@ nvc0_screen_init_resource_functions(struct pipe_screen *pscreen) pscreen->resource_get_handle = u_resource_get_handle_vtbl; pscreen->resource_destroy = u_resource_destroy_vtbl; pscreen->user_buffer_create = nvc0_user_buffer_create; - pscreen->get_tex_surface = nvc0_miptree_surface_new; - pscreen->tex_surface_destroy = nvc0_miptree_surface_del; } diff --git a/src/gallium/drivers/nvc0/nvc0_resource.h b/src/gallium/drivers/nvc0/nvc0_resource.h index b9f3f7b5d8..0ff7a03198 100644 --- a/src/gallium/drivers/nvc0/nvc0_resource.h +++ b/src/gallium/drivers/nvc0/nvc0_resource.h @@ -66,11 +66,15 @@ nvc0_resource_unmap(struct nvc0_resource *res) nouveau_bo_unmap(res->bo); } -#define NVC0_TILE_H(m) (8 << ((m >> 4) & 0xf)) -#define NVC0_TILE_D(m) (1 << (m >> 8)) +#define NVC0_TILE_PITCH(m) (64 << ((m) & 0xf)) +#define NVC0_TILE_HEIGHT(m) (8 << (((m) >> 4) & 0xf)) +#define NVC0_TILE_DEPTH(m) (1 << ((m) >> 8)) + +#define NVC0_TILE_SIZE(m) \ + (NVC0_TILE_PITCH(m) * NVC0_TILE_HEIGHT(m) * NVC0_TILE_DEPTH(m)) struct nvc0_miptree_level { - int *image_offset; + uint32_t offset; uint32_t pitch; uint32_t tile_mode; }; @@ -80,8 +84,9 @@ struct nvc0_miptree_level { struct nvc0_miptree { struct nvc0_resource base; struct nvc0_miptree_level level[NVC0_MAX_TEXTURE_LEVELS]; - int image_nr; - int total_size; + uint32_t total_size; + uint32_t layer_stride; + boolean layout_3d; /* TRUE if layer count varies with mip level */ }; static INLINE struct nvc0_miptree * @@ -132,12 +137,12 @@ nvc0_user_buffer_create(struct pipe_screen *screen, struct pipe_surface * -nvc0_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_resource *pt, - unsigned face, unsigned level, unsigned zslice, - unsigned flags); +nvc0_miptree_surface_new(struct pipe_context *, + struct pipe_resource *, + const struct pipe_surface *templ); void -nvc0_miptree_surface_del(struct pipe_surface *ps); +nvc0_miptree_surface_del(struct pipe_context *, struct pipe_surface *); struct nvc0_context; diff --git a/src/gallium/drivers/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nvc0/nvc0_state_validate.c index a1419bb310..a395b18a99 100644 --- a/src/gallium/drivers/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nvc0/nvc0_state_validate.c @@ -7,12 +7,15 @@ nvc0_validate_zcull(struct nvc0_context *nvc0) { struct nouveau_channel *chan = nvc0->screen->base.channel; struct pipe_framebuffer_state *fb = &nvc0->framebuffer; - struct nvc0_miptree *mt = nvc0_miptree(fb->zsbuf->texture); + struct nvc0_surface *sf = nvc0_surface(fb->zsbuf); + struct nvc0_miptree *mt = nvc0_miptree(sf->base.texture); struct nouveau_bo *bo = mt->base.bo; uint32_t size; uint32_t offset = align(mt->total_size, 1 << 17); unsigned width, height; + assert(mt->base.base.depth0 == 1 && mt->base.base.array_size < 2); + size = mt->total_size * 2; height = align(fb->height, 32); @@ -65,18 +68,20 @@ nvc0_validate_fb(struct nvc0_context *nvc0) for (i = 0; i < fb->nr_cbufs; ++i) { struct nvc0_miptree *mt = nvc0_miptree(fb->cbufs[i]->texture); + struct nvc0_surface *sf = nvc0_surface(fb->cbufs[i]); struct nouveau_bo *bo = mt->base.bo; - unsigned offset = fb->cbufs[i]->offset; + uint32_t offset = sf->offset; BEGIN_RING(chan, RING_3D(RT_ADDRESS_HIGH(i)), 8); OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); - OUT_RING (chan, fb->cbufs[i]->width); - OUT_RING (chan, fb->cbufs[i]->height); - OUT_RING (chan, nvc0_format_table[fb->cbufs[i]->format].rt); - OUT_RING (chan, mt->level[fb->cbufs[i]->level].tile_mode); - OUT_RING (chan, 1); - OUT_RING (chan, 0); + OUT_RING (chan, sf->width); + OUT_RING (chan, sf->height); + OUT_RING (chan, nvc0_format_table[sf->base.format].rt); + OUT_RING (chan, (mt->layout_3d << 16) | + mt->level[sf->base.u.tex.level].tile_mode); + OUT_RING (chan, sf->depth); + OUT_RING (chan, mt->layer_stride); nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_FRAME, &mt->base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); @@ -84,21 +89,23 @@ nvc0_validate_fb(struct nvc0_context *nvc0) if (fb->zsbuf) { struct nvc0_miptree *mt = nvc0_miptree(fb->zsbuf->texture); + struct nvc0_surface *sf = nvc0_surface(fb->zsbuf); struct nouveau_bo *bo = mt->base.bo; - unsigned offset = fb->zsbuf->offset; + int unk = mt->base.base.target == PIPE_TEXTURE_2D; + uint32_t offset = sf->offset; BEGIN_RING(chan, RING_3D(ZETA_ADDRESS_HIGH), 5); OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); OUT_RING (chan, nvc0_format_table[fb->zsbuf->format].rt); - OUT_RING (chan, mt->level[fb->zsbuf->level].tile_mode); + OUT_RING (chan, mt->level[sf->base.u.tex.level].tile_mode); OUT_RING (chan, 0); BEGIN_RING(chan, RING_3D(ZETA_ENABLE), 1); OUT_RING (chan, 1); BEGIN_RING(chan, RING_3D(ZETA_HORIZ), 3); - OUT_RING (chan, fb->zsbuf->width); - OUT_RING (chan, fb->zsbuf->height); - OUT_RING (chan, (1 << 16) | 1); + OUT_RING (chan, sf->width); + OUT_RING (chan, sf->height); + OUT_RING (chan, (unk << 16) | sf->depth); nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_FRAME, &mt->base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); diff --git a/src/gallium/drivers/nvc0/nvc0_surface.c b/src/gallium/drivers/nvc0/nvc0_surface.c index b52b09877c..cc0a65687d 100644 --- a/src/gallium/drivers/nvc0/nvc0_surface.c +++ b/src/gallium/drivers/nvc0/nvc0_surface.c @@ -78,75 +78,90 @@ nvc0_2d_format(enum pipe_format format) } static int -nvc0_surface_set(struct nvc0_screen *screen, struct pipe_surface *ps, int dst) +nvc0_2d_texture_set(struct nouveau_channel *chan, int dst, + struct nvc0_miptree *mt, unsigned level, unsigned layer) { - struct nvc0_miptree *mt = nvc0_miptree(ps->texture); - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_bo *bo = nvc0_miptree(ps->texture)->base.bo; - int format, mthd = dst ? NVC0_2D_DST_FORMAT : NVC0_2D_SRC_FORMAT; - int flags = NOUVEAU_BO_VRAM | (dst ? NOUVEAU_BO_WR : NOUVEAU_BO_RD); - - format = nvc0_2d_format(ps->format); + struct nouveau_bo *bo = mt->base.bo; + uint32_t width, height, depth; + uint32_t format; + uint32_t mthd = dst ? NVC0_2D_DST_FORMAT : NVC0_2D_SRC_FORMAT; + uint32_t flags = mt->base.domain | (dst ? NOUVEAU_BO_WR : NOUVEAU_BO_RD); + uint32_t offset = mt->level[level].offset; + + format = nvc0_2d_format(mt->base.base.format); if (!format) { NOUVEAU_ERR("invalid/unsupported surface format: %s\n", - util_format_name(ps->format)); + util_format_name(mt->base.base.format)); return 1; } - if (!bo->tile_flags) { + width = u_minify(mt->base.base.width0, level); + height = u_minify(mt->base.base.height0, level); + + offset = mt->level[level].offset; + if (!mt->layout_3d) { + offset += mt->layer_stride * layer; + depth = 1; + layer = 0; + } else { + depth = u_minify(mt->base.base.depth0, level); + } + + if (!(bo->tile_flags & NOUVEAU_BO_TILE_LAYOUT_MASK)) { BEGIN_RING(chan, RING_2D_(mthd), 2); OUT_RING (chan, format); OUT_RING (chan, 1); BEGIN_RING(chan, RING_2D_(mthd + 0x14), 5); - OUT_RING (chan, mt->level[ps->level].pitch); - OUT_RING (chan, ps->width); - OUT_RING (chan, ps->height); - OUT_RELOCh(chan, bo, ps->offset, flags); - OUT_RELOCl(chan, bo, ps->offset, flags); + OUT_RING (chan, mt->level[level].pitch); + OUT_RING (chan, width); + OUT_RING (chan, height); + OUT_RELOCh(chan, bo, offset, flags); + OUT_RELOCl(chan, bo, offset, flags); } else { BEGIN_RING(chan, RING_2D_(mthd), 5); OUT_RING (chan, format); OUT_RING (chan, 0); - OUT_RING (chan, mt->level[ps->level].tile_mode); - OUT_RING (chan, 1); - OUT_RING (chan, 0); + OUT_RING (chan, mt->level[level].tile_mode); + OUT_RING (chan, depth); + OUT_RING (chan, layer); BEGIN_RING(chan, RING_2D_(mthd + 0x18), 4); - OUT_RING (chan, ps->width); - OUT_RING (chan, ps->height); - OUT_RELOCh(chan, bo, ps->offset, flags); - OUT_RELOCl(chan, bo, ps->offset, flags); + OUT_RING (chan, width); + OUT_RING (chan, height); + OUT_RELOCh(chan, bo, offset, flags); + OUT_RELOCl(chan, bo, offset, flags); } - + #if 0 if (dst) { BEGIN_RING(chan, RING_2D_(NVC0_2D_CLIP_X), 4); OUT_RING (chan, 0); OUT_RING (chan, 0); - OUT_RING (chan, surf->width); - OUT_RING (chan, surf->height); + OUT_RING (chan, width); + OUT_RING (chan, height); } #endif return 0; } static int -nvc0_surface_do_copy(struct nvc0_screen *screen, - struct pipe_surface *dst, int dx, int dy, - struct pipe_surface *src, int sx, int sy, - int w, int h) +nvc0_2d_texture_do_copy(struct nouveau_channel *chan, + struct nvc0_miptree *dst, unsigned dst_level, + unsigned dx, unsigned dy, unsigned dz, + struct nvc0_miptree *src, unsigned src_level, + unsigned sx, unsigned sy, unsigned sz, + unsigned w, unsigned h) { - struct nouveau_channel *chan = screen->base.channel; int ret; - ret = MARK_RING(chan, 2*16 + 32, 4); + ret = MARK_RING(chan, 2 * 16 + 32, 4); if (ret) return ret; - ret = nvc0_surface_set(screen, dst, 1); + ret = nvc0_2d_texture_set(chan, 1, dst, dst_level, dz); if (ret) return ret; - ret = nvc0_surface_set(screen, src, 0); + ret = nvc0_2d_texture_set(chan, 0, src, src_level, sz); if (ret) return ret; @@ -173,44 +188,44 @@ nvc0_surface_do_copy(struct nvc0_screen *screen, } static void -nvc0_surface_copy(struct pipe_context *pipe, - struct pipe_resource *dest, struct pipe_subresource subdst, - unsigned destx, unsigned desty, unsigned destz, - struct pipe_resource *src, struct pipe_subresource subsrc, - unsigned srcx, unsigned srcy, unsigned srcz, - unsigned width, unsigned height) +nvc0_resource_copy_region(struct pipe_context *pipe, + struct pipe_resource *dst, unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, unsigned src_level, + const struct pipe_box *src_box) { - struct nvc0_context *nv50 = nvc0_context(pipe); - struct nvc0_screen *screen = nv50->screen; - struct pipe_surface *ps_dst, *ps_src; + struct nvc0_screen *screen = nvc0_context(pipe)->screen; + int ret; + unsigned dst_layer = dstz, src_layer = src_box->z; - assert((src->format == dest->format) || + assert((src->format == dst->format) || (nvc0_2d_format_faithful(src->format) && - nvc0_2d_format_faithful(dest->format))); - - ps_src = nvc0_miptree_surface_new(pipe->screen, src, subsrc.face, - subsrc.level, srcz, 0 /* bind flags */); - ps_dst = nvc0_miptree_surface_new(pipe->screen, dest, subdst.face, - subdst.level, destz, 0 /* bind flags */); - - nvc0_surface_do_copy(screen, ps_dst, destx, desty, ps_src, srcx, - srcy, width, height); - - nvc0_miptree_surface_del(ps_src); - nvc0_miptree_surface_del(ps_dst); + nvc0_2d_format_faithful(dst->format))); + + for (; dst_layer < dstz + src_box->depth; ++dst_layer, ++src_layer) { + ret = nvc0_2d_texture_do_copy(screen->base.channel, + nvc0_miptree(dst), dst_level, + dstx, dsty, dst_layer, + nvc0_miptree(src), src_level, + src_box->x, src_box->y, src_layer, + src_box->width, src_box->height); + if (ret) + return; + } } static void nvc0_clear_render_target(struct pipe_context *pipe, - struct pipe_surface *dst, - const float *rgba, - unsigned dstx, unsigned dsty, - unsigned width, unsigned height) + struct pipe_surface *dst, + const float *rgba, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) { struct nvc0_context *nv50 = nvc0_context(pipe); struct nvc0_screen *screen = nv50->screen; struct nouveau_channel *chan = screen->base.channel; struct nvc0_miptree *mt = nvc0_miptree(dst->texture); + struct nvc0_surface *sf = nvc0_surface(dst); struct nouveau_bo *bo = mt->base.bo; BEGIN_RING(chan, RING_3D(CLEAR_COLOR(0)), 4); @@ -225,12 +240,12 @@ nvc0_clear_render_target(struct pipe_context *pipe, BEGIN_RING(chan, RING_3D(RT_CONTROL), 1); OUT_RING (chan, 1); BEGIN_RING(chan, RING_3D(RT_ADDRESS_HIGH(0)), 8); - OUT_RELOCh(chan, bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - OUT_RELOCl(chan, bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - OUT_RING (chan, dst->width); - OUT_RING (chan, dst->height); + OUT_RELOCh(chan, bo, sf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCl(chan, bo, sf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RING (chan, sf->width); + OUT_RING (chan, sf->height); OUT_RING (chan, nvc0_format_table[dst->format].rt); - OUT_RING (chan, mt->level[dst->level].tile_mode); + OUT_RING (chan, mt->level[sf->base.u.tex.level].tile_mode); OUT_RING (chan, 1); OUT_RING (chan, 0); @@ -259,6 +274,7 @@ nvc0_clear_depth_stencil(struct pipe_context *pipe, struct nvc0_screen *screen = nv50->screen; struct nouveau_channel *chan = screen->base.channel; struct nvc0_miptree *mt = nvc0_miptree(dst->texture); + struct nvc0_surface *sf = nvc0_surface(dst); struct nouveau_bo *bo = mt->base.bo; uint32_t mode = 0; @@ -278,16 +294,16 @@ nvc0_clear_depth_stencil(struct pipe_context *pipe, return; BEGIN_RING(chan, RING_3D(ZETA_ADDRESS_HIGH), 5); - OUT_RELOCh(chan, bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - OUT_RELOCl(chan, bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCh(chan, bo, sf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCl(chan, bo, sf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); OUT_RING (chan, nvc0_format_table[dst->format].rt); - OUT_RING (chan, mt->level[dst->level].tile_mode); + OUT_RING (chan, mt->level[sf->base.u.tex.level].tile_mode); OUT_RING (chan, 0); BEGIN_RING(chan, RING_3D(ZETA_ENABLE), 1); OUT_RING (chan, 1); BEGIN_RING(chan, RING_3D(ZETA_HORIZ), 3); - OUT_RING (chan, dst->width); - OUT_RING (chan, dst->height); + OUT_RING (chan, sf->width); + OUT_RING (chan, sf->height); OUT_RING (chan, (1 << 16) | 1); BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2); @@ -353,7 +369,7 @@ nvc0_clear(struct pipe_context *pipe, unsigned buffers, void nvc0_init_surface_functions(struct nvc0_context *nvc0) { - nvc0->pipe.resource_copy_region = nvc0_surface_copy; + nvc0->pipe.resource_copy_region = nvc0_resource_copy_region; nvc0->pipe.clear_render_target = nvc0_clear_render_target; nvc0->pipe.clear_depth_stencil = nvc0_clear_depth_stencil; } diff --git a/src/gallium/drivers/nvc0/nvc0_tex.c b/src/gallium/drivers/nvc0/nvc0_tex.c index 5f28b83282..bf6ffc7da9 100644 --- a/src/gallium/drivers/nvc0/nvc0_tex.c +++ b/src/gallium/drivers/nvc0/nvc0_tex.c @@ -54,6 +54,7 @@ nvc0_create_sampler_view(struct pipe_context *pipe, const struct util_format_description *desc; uint32_t *tic; uint32_t swz[4]; + uint32_t depth; struct nvc0_tic_entry *view; struct nvc0_miptree *mt = nvc0_miptree(texture); @@ -101,7 +102,9 @@ nvc0_create_sampler_view(struct pipe_context *pipe, tic[2] |= ((mt->base.bo->tile_mode & 0x0f0) << (22 - 4)) | - ((mt->base.bo->tile_mode & 0xf00) << (21 - 4)); + ((mt->base.bo->tile_mode & 0xf00) << (25 - 8)); + + depth = MAX2(mt->base.base.array_size, mt->base.base.depth0); switch (mt->base.base.target) { case PIPE_TEXTURE_1D: @@ -117,7 +120,17 @@ nvc0_create_sampler_view(struct pipe_context *pipe, tic[2] |= NV50_TIC_2_TARGET_3D; break; case PIPE_TEXTURE_CUBE: - tic[2] |= NV50_TIC_2_TARGET_CUBE; + depth /= 6; + if (depth > 1) + tic[2] |= NV50_TIC_2_TARGET_CUBE_ARRAY; + else + tic[2] |= NV50_TIC_2_TARGET_CUBE; + break; + case PIPE_TEXTURE_1D_ARRAY: + tic[2] |= NV50_TIC_2_TARGET_1D_ARRAY; + break; + case PIPE_TEXTURE_2D_ARRAY: + tic[2] |= NV50_TIC_2_TARGET_2D_ARRAY; break; case PIPE_BUFFER: tic[2] |= NV50_TIC_2_TARGET_BUFFER | /* NV50_TIC_2_LINEAR */ (1 << 18); @@ -134,12 +147,12 @@ nvc0_create_sampler_view(struct pipe_context *pipe, tic[4] = (1 << 31) | mt->base.base.width0; tic[5] = mt->base.base.height0 & 0xffff; - tic[5] |= mt->base.base.depth0 << 16; + tic[5] |= depth << 16; tic[5] |= mt->base.base.last_level << 28; tic[6] = 0x03000000; - tic[7] = (view->pipe.last_level << 4) | view->pipe.first_level; + tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level; return &view->pipe; } diff --git a/src/gallium/drivers/nvc0/nvc0_transfer.c b/src/gallium/drivers/nvc0/nvc0_transfer.c index 10d0995a5a..f6c78f239e 100644 --- a/src/gallium/drivers/nvc0/nvc0_transfer.c +++ b/src/gallium/drivers/nvc0/nvc0_transfer.c @@ -231,7 +231,7 @@ nvc0_m2mf_push_rect(struct pipe_screen *pscreen, struct pipe_transfer * nvc0_miptree_transfer_new(struct pipe_context *pctx, struct pipe_resource *res, - struct pipe_subresource sr, + unsigned level, unsigned usage, const struct pipe_box *box) { @@ -239,16 +239,21 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx, struct pipe_screen *pscreen = pctx->screen; struct nouveau_device *dev = nvc0->screen->base.device; struct nvc0_miptree *mt = nvc0_miptree(res); - struct nvc0_miptree_level *lvl = &mt->level[sr.level]; + struct nvc0_miptree_level *lvl = &mt->level[level]; struct nvc0_transfer *tx; - uint32_t image; - uint32_t w, h, z; + uint32_t size; + uint32_t w, h, d, z, layer; int ret; - if (res->target == PIPE_TEXTURE_CUBE) - image = sr.face; - else - image = 0; + if (mt->layout_3d) { + z = box->z; + d = u_minify(res->depth0, level); + layer = 0; + } else { + z = 0; + d = 1; + layer = box->z; + } tx = CALLOC_STRUCT(nvc0_transfer); if (!tx) @@ -256,7 +261,7 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx, pipe_resource_reference(&tx->base.resource, res); - tx->base.sr = sr; + tx->base.level = level; tx->base.usage = usage; tx->base.box = *box; @@ -265,30 +270,27 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx, tx->base.stride = tx->nblocksx * util_format_get_blocksize(res->format); - w = u_minify(res->width0, sr.level); - h = u_minify(res->height0, sr.level); + w = u_minify(res->width0, level); + h = u_minify(res->height0, level); tx->rect[0].cpp = tx->rect[1].cpp = util_format_get_blocksize(res->format); tx->rect[0].bo = mt->base.bo; - tx->rect[0].base = lvl->image_offset[image]; + tx->rect[0].base = lvl->offset + layer * mt->layer_stride; tx->rect[0].tile_mode = lvl->tile_mode; tx->rect[0].x = util_format_get_nblocksx(res->format, box->x); - tx->rect[0].y = util_format_get_nblocksx(res->format, box->y); - tx->rect[0].z = box->z; + tx->rect[0].y = util_format_get_nblocksy(res->format, box->y); + tx->rect[0].z = z; tx->rect[0].width = util_format_get_nblocksx(res->format, w); - tx->rect[0].height = util_format_get_nblocksx(res->format, h); - tx->rect[0].depth = res->depth0; + tx->rect[0].height = util_format_get_nblocksy(res->format, h); + tx->rect[0].depth = d; tx->rect[0].pitch = lvl->pitch; tx->rect[0].domain = NOUVEAU_BO_VRAM; - if (!(usage & PIPE_TRANSFER_READ) && - (res->depth0 == 1) && (tx->nblocksy * tx->base.stride < 512 * 4)) { - /* don't allocate scratch buffer, upload through FIFO */ - } + size = tx->nblocksy * tx->base.stride; ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, - tx->nblocksy * tx->base.stride, &tx->rect[1].bo); + size * tx->base.box.depth, &tx->rect[1].bo); if (ret) { FREE(tx); return NULL; @@ -296,18 +298,23 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx, tx->rect[1].width = tx->nblocksx; tx->rect[1].height = tx->nblocksy; - tx->rect[1].depth = box->depth; + tx->rect[1].depth = 1; tx->rect[1].pitch = tx->base.stride; tx->rect[1].domain = NOUVEAU_BO_GART; if (usage & PIPE_TRANSFER_READ) { - for (z = 0; z < box->depth; ++z) { + unsigned i; + for (i = 0; i < box->depth; ++i) { nvc0_m2mf_transfer_rect(pscreen, &tx->rect[1], &tx->rect[0], tx->nblocksx, tx->nblocksy); - tx->rect[0].z++; + if (mt->layout_3d) + tx->rect[0].z++; + else + tx->rect[0].base += mt->layer_stride; + tx->rect[1].base += size; } } - tx->rect[0].z = box->z; + tx->rect[0].z = z; return &tx->base; } @@ -318,13 +325,18 @@ nvc0_miptree_transfer_del(struct pipe_context *pctx, { struct pipe_screen *pscreen = pctx->screen; struct nvc0_transfer *tx = (struct nvc0_transfer *)transfer; - unsigned z; + struct nvc0_miptree *mt = nvc0_miptree(tx->base.resource); + unsigned i; if (tx->base.usage & PIPE_TRANSFER_WRITE) { - for (z = 0; z < tx->base.box.depth; ++z) { + for (i = 0; i < tx->base.box.depth; ++i) { nvc0_m2mf_transfer_rect(pscreen, &tx->rect[0], &tx->rect[1], tx->nblocksx, tx->nblocksy); - tx->rect[0].z++; + if (mt->layout_3d) + tx->rect[0].z++; + else + tx->rect[0].base += mt->layer_stride; + tx->rect[1].base += tx->nblocksy * tx->base.stride; } } diff --git a/src/gallium/drivers/nvc0/nvc0_transfer.h b/src/gallium/drivers/nvc0/nvc0_transfer.h index aaebe408b5..222f72d274 100644 --- a/src/gallium/drivers/nvc0/nvc0_transfer.h +++ b/src/gallium/drivers/nvc0/nvc0_transfer.h @@ -7,7 +7,7 @@ struct pipe_transfer * nvc0_miptree_transfer_new(struct pipe_context *pcontext, struct pipe_resource *pt, - struct pipe_subresource sr, + unsigned level, unsigned usage, const struct pipe_box *box); void -- cgit v1.2.3 From 96def0c3147ac7b9449de00bf1306dfd1ca436fb Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Thu, 23 Dec 2010 15:21:36 +0100 Subject: nvc0: fix layer stride state --- src/gallium/drivers/nvc0/nvc0_miptree.c | 29 ++++++++++++++++---------- src/gallium/drivers/nvc0/nvc0_resource.h | 15 ++++++++----- src/gallium/drivers/nvc0/nvc0_state_validate.c | 4 ++-- 3 files changed, 30 insertions(+), 18 deletions(-) (limited to 'src/gallium/drivers/nvc0/nvc0_resource.h') diff --git a/src/gallium/drivers/nvc0/nvc0_miptree.c b/src/gallium/drivers/nvc0/nvc0_miptree.c index 70b10a0fc3..7c7e134146 100644 --- a/src/gallium/drivers/nvc0/nvc0_miptree.c +++ b/src/gallium/drivers/nvc0/nvc0_miptree.c @@ -58,17 +58,18 @@ get_tile_dims(unsigned nx, unsigned ny, unsigned nz) } static INLINE unsigned -get_zslice_offset(uint32_t tile_mode, unsigned z, unsigned pitch, unsigned nbh) +calc_zslice_offset(uint32_t tile_mode, unsigned z, unsigned pitch, unsigned nbh) { unsigned tile_h = NVC0_TILE_HEIGHT(tile_mode); - unsigned tile_d = NVC0_TILE_DEPTH(tile_mode); + unsigned tile_d_shift = NVC0_TILE_DIM_SHIFT(tile_mode, 2); + unsigned tile_d = 1 << tile_d_shift; - /* pitch_2d == to next slice within this volume tile */ - /* pitch_3d == size (in bytes) of a volume tile */ - unsigned pitch_2d = tile_h * NVC0_TILE_PITCH(tile_mode); - unsigned pitch_3d = tile_d * align(nbh, tile_h) * pitch; + /* stride_2d == to next slice within this volume tile */ + /* stride_3d == size (in bytes) of a volume tile */ + unsigned stride_2d = tile_h * NVC0_TILE_PITCH(tile_mode); + unsigned stride_3d = tile_d * align(nbh, tile_h) * pitch; - return (z % tile_d) * pitch_2d + (z / tile_d) * pitch_3d; + return (z & (tile_d - 1)) * stride_2d + (z >> tile_d_shift) * stride_3d; } static void @@ -298,10 +299,16 @@ nvc0_miptree_surface_new(struct pipe_context *pipe, ps->height = ns->height; if (mt->layout_3d) { - ns->offset += get_zslice_offset(lvl->tile_mode, ps->u.tex.first_layer, - lvl->pitch, - util_format_get_nblocksy(pt->format, - ns->height)); + unsigned zslice = ps->u.tex.first_layer; + + /* TODO: re-layout the texture to use only depth 1 tiles in this case: */ + if (ns->depth > 1 && (zslice & (NVC0_TILE_DEPTH(lvl->tile_mode) - 1))) + NOUVEAU_ERR("Creating unsupported 3D surface of slices [%u:%u].\n", + zslice, ps->u.tex.last_layer); + + ns->offset += calc_zslice_offset(lvl->tile_mode, zslice, lvl->pitch, + util_format_get_nblocksy(pt->format, + ns->height)); } else { ns->offset += mt->layer_stride * ps->u.tex.first_layer; } diff --git a/src/gallium/drivers/nvc0/nvc0_resource.h b/src/gallium/drivers/nvc0/nvc0_resource.h index 0ff7a03198..9384f1905d 100644 --- a/src/gallium/drivers/nvc0/nvc0_resource.h +++ b/src/gallium/drivers/nvc0/nvc0_resource.h @@ -66,12 +66,17 @@ nvc0_resource_unmap(struct nvc0_resource *res) nouveau_bo_unmap(res->bo); } -#define NVC0_TILE_PITCH(m) (64 << ((m) & 0xf)) -#define NVC0_TILE_HEIGHT(m) (8 << (((m) >> 4) & 0xf)) -#define NVC0_TILE_DEPTH(m) (1 << ((m) >> 8)) +#define NVC0_TILE_DIM_SHIFT(m, d) (((m) >> (d * 4)) & 0xf) -#define NVC0_TILE_SIZE(m) \ - (NVC0_TILE_PITCH(m) * NVC0_TILE_HEIGHT(m) * NVC0_TILE_DEPTH(m)) +#define NVC0_TILE_PITCH(m) (64 << NVC0_TILE_DIM_SHIFT(m, 0)) +#define NVC0_TILE_HEIGHT(m) ( 8 << NVC0_TILE_DIM_SHIFT(m, 1)) +#define NVC0_TILE_DEPTH(m) ( 1 << NVC0_TILE_DIM_SHIFT(m, 2)) + +#define NVC0_TILE_SIZE_2D(m) (((64 * 8) << \ + NVC0_TILE_DIM_SHIFT(m, 0)) << \ + NVC0_TILE_DIM_SHIFT(m, 1)) + +#define NVC0_TILE_SIZE(m) (NVC0_TILE_SIZE_2D(m) << NVC0_TILE_DIM_SHIFT(m, 2)) struct nvc0_miptree_level { uint32_t offset; diff --git a/src/gallium/drivers/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nvc0/nvc0_state_validate.c index f20e7e962c..25aec0244d 100644 --- a/src/gallium/drivers/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nvc0/nvc0_state_validate.c @@ -81,7 +81,7 @@ nvc0_validate_fb(struct nvc0_context *nvc0) OUT_RING (chan, (mt->layout_3d << 16) | mt->level[sf->base.u.tex.level].tile_mode); OUT_RING (chan, sf->depth); - OUT_RING (chan, mt->layer_stride); + OUT_RING (chan, mt->layer_stride >> 2); nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_FRAME, &mt->base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); @@ -99,7 +99,7 @@ nvc0_validate_fb(struct nvc0_context *nvc0) OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); OUT_RING (chan, nvc0_format_table[fb->zsbuf->format].rt); OUT_RING (chan, mt->level[sf->base.u.tex.level].tile_mode); - OUT_RING (chan, 0); + OUT_RING (chan, mt->layer_stride >> 2); BEGIN_RING(chan, RING_3D(ZETA_ENABLE), 1); OUT_RING (chan, 1); BEGIN_RING(chan, RING_3D(ZETA_HORIZ), 3); -- cgit v1.2.3 From e4349027f6842563555992a39add4d0b2283fbbb Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Mon, 27 Dec 2010 13:57:46 +0100 Subject: nvc0: implement VRAM buffer transfers with bounce buffers --- src/gallium/drivers/nvc0/nvc0_buffer.c | 302 ++++++++++++++++++++++++------- src/gallium/drivers/nvc0/nvc0_context.c | 35 ++-- src/gallium/drivers/nvc0/nvc0_context.h | 6 +- src/gallium/drivers/nvc0/nvc0_fence.c | 11 ++ src/gallium/drivers/nvc0/nvc0_fence.h | 1 + src/gallium/drivers/nvc0/nvc0_push.c | 10 +- src/gallium/drivers/nvc0/nvc0_resource.h | 58 ++++-- src/gallium/drivers/nvc0/nvc0_screen.c | 3 +- src/gallium/drivers/nvc0/nvc0_screen.h | 18 +- src/gallium/drivers/nvc0/nvc0_state.c | 4 + src/gallium/drivers/nvc0/nvc0_tex.c | 2 - src/gallium/drivers/nvc0/nvc0_winsys.h | 2 + 12 files changed, 336 insertions(+), 116 deletions(-) (limited to 'src/gallium/drivers/nvc0/nvc0_resource.h') diff --git a/src/gallium/drivers/nvc0/nvc0_buffer.c b/src/gallium/drivers/nvc0/nvc0_buffer.c index 93d7f5d303..8021e4348b 100644 --- a/src/gallium/drivers/nvc0/nvc0_buffer.c +++ b/src/gallium/drivers/nvc0/nvc0_buffer.c @@ -11,7 +11,15 @@ #include "nvc0_context.h" #include "nvc0_resource.h" -#define NVC0_BUFFER_STATUS_USER_MEMORY 0xff +struct nvc0_transfer { + struct pipe_transfer base; +}; + +static INLINE struct nvc0_transfer * +nvc0_transfer(struct pipe_transfer *transfer) +{ + return (struct nvc0_transfer *)transfer; +} static INLINE boolean nvc0_buffer_allocate(struct nvc0_screen *screen, struct nvc0_resource *buf, @@ -28,12 +36,13 @@ nvc0_buffer_allocate(struct nvc0_screen *screen, struct nvc0_resource *buf, &buf->offset); if (!buf->bo) return FALSE; - } else { - assert(!domain); - if (!buf->data) + } + if (domain != NOUVEAU_BO_GART) { + if (!buf->data) { buf->data = MALLOC(buf->base.width0); - if (!buf->data) - return FALSE; + if (!buf->data) + return FALSE; + } } buf->domain = domain; return TRUE; @@ -59,68 +68,199 @@ nvc0_buffer_destroy(struct pipe_screen *pscreen, if (res->mm) release_allocation(&res->mm, screen->fence.current); - if (res->status != NVC0_BUFFER_STATUS_USER_MEMORY && res->data) + if (res->data && !(res->status & NVC0_BUFFER_STATUS_USER_MEMORY)) FREE(res->data); FREE(res); } -static INLINE uint32_t -nouveau_buffer_rw_flags(unsigned pipe) +/* Maybe just migrate to GART right away if we actually need to do this. */ +boolean +nvc0_buffer_download(struct nvc0_context *nvc0, struct nvc0_resource *buf, + unsigned start, unsigned size) +{ + struct nvc0_mm_allocation *mm; + struct nouveau_bo *bounce = NULL; + uint32_t offset; + + assert(buf->domain == NOUVEAU_BO_VRAM); + + mm = nvc0_mm_allocate(nvc0->screen->mm_GART, size, &bounce, &offset); + if (!bounce) + return FALSE; + + nvc0_m2mf_copy_linear(nvc0, bounce, offset, NOUVEAU_BO_GART, + buf->bo, buf->offset + start, NOUVEAU_BO_VRAM, + size); + + if (nouveau_bo_map_range(bounce, offset, size, NOUVEAU_BO_RD)) + return FALSE; + memcpy(buf->data + start, bounce->map, size); + nouveau_bo_unmap(bounce); + + buf->status &= ~NVC0_BUFFER_STATUS_DIRTY; + + nouveau_bo_ref(NULL, &bounce); + if (mm) + nvc0_mm_free(mm); + return TRUE; +} + +static boolean +nvc0_buffer_upload(struct nvc0_context *nvc0, struct nvc0_resource *buf, + unsigned start, unsigned size) { - uint32_t flags = 0; + struct nvc0_mm_allocation *mm; + struct nouveau_bo *bounce = NULL; + uint32_t offset; - if (pipe & PIPE_TRANSFER_READ) - flags = NOUVEAU_BO_RD; - if (pipe & PIPE_TRANSFER_WRITE) - flags |= NOUVEAU_BO_WR; + mm = nvc0_mm_allocate(nvc0->screen->mm_GART, size, &bounce, &offset); + if (!bounce) + return FALSE; + + nouveau_bo_map_range(bounce, offset, size, + NOUVEAU_BO_WR | NOUVEAU_BO_NOSYNC); + memcpy(bounce->map, buf->data + start, size); + nouveau_bo_unmap(bounce); + + nvc0_m2mf_copy_linear(nvc0, buf->bo, buf->offset + start, NOUVEAU_BO_VRAM, + bounce, offset, NOUVEAU_BO_GART, size); + + nouveau_bo_ref(NULL, &bounce); + if (mm) + release_allocation(&mm, nvc0->screen->fence.current); + + if (start == 0 && size == buf->base.width0) + buf->status &= ~NVC0_BUFFER_STATUS_DIRTY; + return TRUE; +} + +static struct pipe_transfer * +nvc0_buffer_transfer_get(struct pipe_context *pipe, + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box) +{ + struct nvc0_resource *buf = nvc0_resource(resource); + struct nvc0_transfer *xfr = CALLOC_STRUCT(nvc0_transfer); + if (!xfr) + return NULL; + + xfr->base.resource = resource; + xfr->base.box.x = box->x; + xfr->base.box.width = box->width; + xfr->base.usage = usage; + + if (buf->domain == NOUVEAU_BO_VRAM) { + if (usage & PIPE_TRANSFER_READ) { + if (buf->status & NVC0_BUFFER_STATUS_DIRTY) + nvc0_buffer_download(nvc0_context(pipe), buf, 0, buf->base.width0); + } + } + + return &xfr->base; +} + +static void +nvc0_buffer_transfer_destroy(struct pipe_context *pipe, + struct pipe_transfer *transfer) +{ + struct nvc0_resource *buf = nvc0_resource(transfer->resource); + struct nvc0_transfer *xfr = nvc0_transfer(transfer); + + if (xfr->base.usage & PIPE_TRANSFER_WRITE) { + /* writing is worse */ + nvc0_buffer_adjust_score(nvc0_context(pipe), buf, -5000); + + if (buf->domain == NOUVEAU_BO_VRAM) { + nvc0_buffer_upload(nvc0_context(pipe), buf, + transfer->box.x, transfer->box.width); + } + + if (buf->domain != 0 && (buf->base.bind & (PIPE_BIND_VERTEX_BUFFER | + PIPE_BIND_INDEX_BUFFER))) + nvc0_context(pipe)->vbo_dirty = TRUE; + } - return flags; + FREE(xfr); +} + +static INLINE boolean +nvc0_buffer_sync(struct nvc0_resource *buf, unsigned rw) +{ + if (rw == PIPE_TRANSFER_READ) { + if (!buf->fence_wr) + return TRUE; + if (!nvc0_fence_wait(buf->fence_wr)) + return FALSE; + } else { + if (!buf->fence) + return TRUE; + if (!nvc0_fence_wait(buf->fence)) + return FALSE; + + nvc0_fence_reference(&buf->fence, NULL); + } + nvc0_fence_reference(&buf->fence_wr, NULL); + + return TRUE; +} + +static INLINE boolean +nvc0_buffer_busy(struct nvc0_resource *buf, unsigned rw) +{ + if (rw == PIPE_TRANSFER_READ) + return (buf->fence_wr && !nvc0_fence_signalled(buf->fence_wr)); + else + return (buf->fence && !nvc0_fence_signalled(buf->fence)); } static void * nvc0_buffer_transfer_map(struct pipe_context *pipe, struct pipe_transfer *transfer) { - struct nvc0_resource *res = nvc0_resource(transfer->resource); - struct nvc0_fence *fence; + struct nvc0_transfer *xfr = nvc0_transfer(transfer); + struct nvc0_resource *buf = nvc0_resource(transfer->resource); + struct nouveau_bo *bo = buf->bo; uint8_t *map; int ret; - uint32_t flags = nouveau_buffer_rw_flags(transfer->usage); + uint32_t offset = xfr->base.box.x; + uint32_t flags; - if ((res->base.bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) && - (flags & NOUVEAU_BO_WR)) - nvc0_context(pipe)->vbo_dirty = TRUE; + nvc0_buffer_adjust_score(nvc0_context(pipe), buf, -250); - if (res->domain == 0) - return res->data + transfer->box.x; + if (buf->domain != NOUVEAU_BO_GART) + return buf->data + offset; - if (res->domain == NOUVEAU_BO_VRAM) { - NOUVEAU_ERR("transfers to/from VRAM buffers are not allowed\n"); - /* if this happens, migrate back to GART */ - return NULL; - } + if (buf->mm) + flags = NOUVEAU_BO_NOSYNC | NOUVEAU_BO_RDWR; + else + flags = nouveau_screen_transfer_flags(xfr->base.usage); - if (res->score > -1024) - --res->score; + offset += buf->offset; - ret = nouveau_bo_map(res->bo, flags | NOUVEAU_BO_NOSYNC); + ret = nouveau_bo_map_range(buf->bo, offset, xfr->base.box.width, flags); if (ret) return NULL; - map = res->bo->map; - nouveau_bo_unmap(res->bo); - - fence = (flags == NOUVEAU_BO_RD) ? res->fence_wr : res->fence; - - if (fence) { - if (nvc0_fence_wait(fence) == FALSE) - NOUVEAU_ERR("failed to fence buffer\n"); - - nvc0_fence_reference(&res->fence, NULL); - nvc0_fence_reference(&res->fence_wr, NULL); + map = bo->map; + + /* Unmap right now. Since multiple buffers can share a single nouveau_bo, + * not doing so might make future maps fail or trigger "reloc while mapped" + * errors. For now, mappings to userspace are guaranteed to be persistent. + */ + nouveau_bo_unmap(bo); + + if (buf->mm) { + if (xfr->base.usage & PIPE_TRANSFER_DONTBLOCK) { + if (nvc0_buffer_busy(buf, xfr->base.usage & PIPE_TRANSFER_READ_WRITE)) + return NULL; + } else + if (!(xfr->base.usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { + nvc0_buffer_sync(buf, xfr->base.usage & PIPE_TRANSFER_READ_WRITE); + } } - - return map + transfer->box.x + res->offset; + return map; } @@ -131,26 +271,23 @@ nvc0_buffer_transfer_flush_region(struct pipe_context *pipe, const struct pipe_box *box) { struct nvc0_resource *res = nvc0_resource(transfer->resource); + struct nouveau_bo *bo = res->bo; + unsigned offset = res->offset + transfer->box.x + box->x; - if (!res->bo) + /* not using non-snoop system memory yet, no need for cflush */ + if (1) return; - nouveau_screen_bo_map_flush_range(pipe->screen, - res->bo, - res->offset + transfer->box.x + box->x, - box->width); + /* XXX: maybe need to upload for VRAM buffers here */ + + nouveau_screen_bo_map_flush_range(pipe->screen, bo, offset, box->width); } static void nvc0_buffer_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *transfer) { - struct nvc0_resource *res = nvc0_resource(transfer->resource); - - if (res->data) - return; - - /* nouveau_screen_bo_unmap(pipe->screen, res->bo); */ + /* we've called nouveau_bo_unmap right after map */ } const struct u_resource_vtbl nvc0_buffer_vtbl = @@ -158,8 +295,8 @@ const struct u_resource_vtbl nvc0_buffer_vtbl = u_default_resource_get_handle, /* get_handle */ nvc0_buffer_destroy, /* resource_destroy */ NULL, /* is_resource_referenced */ - u_default_get_transfer, /* get_transfer */ - u_default_transfer_destroy, /* transfer_destroy */ + nvc0_buffer_transfer_get, /* get_transfer */ + nvc0_buffer_transfer_destroy, /* transfer_destroy */ nvc0_buffer_transfer_map, /* transfer_map */ nvc0_buffer_transfer_flush_region, /* transfer_flush_region */ nvc0_buffer_transfer_unmap, /* transfer_unmap */ @@ -227,6 +364,23 @@ nvc0_user_buffer_create(struct pipe_screen *pscreen, return &buffer->base; } +static INLINE boolean +nvc0_buffer_fetch_data(struct nvc0_resource *buf, + struct nouveau_bo *bo, unsigned offset, unsigned size) +{ + if (!buf->data) { + buf->data = MALLOC(size); + if (!buf->data) + return FALSE; + } + if (nouveau_bo_map_range(bo, offset, size, NOUVEAU_BO_RD)) + return FALSE; + memcpy(buf->data, bo->map, size); + nouveau_bo_unmap(bo); + + return TRUE; +} + /* Migrate a linear buffer (vertex, index, constants) USER -> GART -> VRAM. */ boolean nvc0_buffer_migrate(struct nvc0_context *nvc0, @@ -235,38 +389,52 @@ nvc0_buffer_migrate(struct nvc0_context *nvc0, struct nvc0_screen *screen = nvc0_screen(buf->base.screen); struct nouveau_bo *bo; unsigned size = buf->base.width0; + unsigned offset; int ret; + assert(domain != buf->domain); + if (domain == NOUVEAU_BO_GART && buf->domain == 0) { if (!nvc0_buffer_allocate(screen, buf, domain)) return FALSE; - ret = nouveau_bo_map(buf->bo, NOUVEAU_BO_WR | NOUVEAU_BO_NOSYNC); + ret = nouveau_bo_map_range(buf->bo, buf->offset, size, NOUVEAU_BO_WR | + NOUVEAU_BO_NOSYNC); if (ret) return ret; - memcpy((uint8_t *)buf->bo->map + buf->offset, buf->data, size); + memcpy(buf->bo->map, buf->data, size); nouveau_bo_unmap(buf->bo); + FREE(buf->data); } else - if (domain == NOUVEAU_BO_VRAM && buf->domain == NOUVEAU_BO_GART) { + if (domain != 0 && buf->domain != 0) { struct nvc0_mm_allocation *mm = buf->mm; + if (domain == NOUVEAU_BO_VRAM) { + /* keep a system memory copy of our data in case we hit a fallback */ + if (!nvc0_buffer_fetch_data(buf, buf->bo, buf->offset, size)) + return FALSE; + debug_printf("migrating %u KiB to VRAM\n", size / 1024); + } + + offset = buf->offset; bo = buf->bo; buf->bo = NULL; buf->mm = NULL; nvc0_buffer_allocate(screen, buf, domain); - nvc0_m2mf_copy_linear(nvc0, buf->bo, 0, NOUVEAU_BO_VRAM, - bo, 0, NOUVEAU_BO_GART, buf->base.width0); + nvc0_m2mf_copy_linear(nvc0, buf->bo, buf->offset, domain, + bo, offset, buf->domain, buf->base.width0); - release_allocation(&mm, screen->fence.current); nouveau_bo_ref(NULL, &bo); + if (mm) + release_allocation(&mm, screen->fence.current); } else if (domain == NOUVEAU_BO_VRAM && buf->domain == 0) { - /* should use a scratch buffer instead here */ - if (!nvc0_buffer_migrate(nvc0, buf, NOUVEAU_BO_GART)) + if (!nvc0_buffer_allocate(screen, buf, NOUVEAU_BO_VRAM)) + return FALSE; + if (!nvc0_buffer_upload(nvc0, buf, 0, buf->base.width0)) return FALSE; - return nvc0_buffer_migrate(nvc0, buf, NOUVEAU_BO_VRAM); } else - return -1; + return FALSE; buf->domain = domain; diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c index d41ee297d4..b2b4fd62ee 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.c +++ b/src/gallium/drivers/nvc0/nvc0_context.c @@ -104,7 +104,7 @@ nvc0_create(struct pipe_screen *pscreen, void *priv) } struct resident { - struct nouveau_bo *bo; + struct nvc0_resource *res; uint32_t flags; }; @@ -112,12 +112,14 @@ void nvc0_bufctx_add_resident(struct nvc0_context *nvc0, int ctx, struct nvc0_resource *resource, uint32_t flags) { - struct resident rsd = { NULL, flags }; + struct resident rsd = { resource, flags }; if (!resource->bo) return; - nouveau_bo_ref(resource->bo, &rsd.bo); + /* We don't need to reference the resource here, it will be referenced + * in the context/state, and bufctx will be reset when state changes. + */ util_dynarray_append(&nvc0->residents[ctx], struct resident, rsd); } @@ -125,35 +127,24 @@ void nvc0_bufctx_del_resident(struct nvc0_context *nvc0, int ctx, struct nvc0_resource *resource) { - struct resident *rsd, rem; + struct resident *rsd, *top; unsigned i; for (i = 0; i < nvc0->residents[ctx].size / sizeof(struct resident); ++i) { rsd = util_dynarray_element(&nvc0->residents[ctx], struct resident, i); - if (rsd->bo == resource->bo) { - rem = util_dynarray_pop(&nvc0->residents[ctx], struct resident); - nouveau_bo_ref(NULL, &rem.bo); + if (rsd->res == resource) { + top = util_dynarray_pop_ptr(&nvc0->residents[ctx], struct resident); + if (rsd != top) + *rsd = *top; break; } } } -void -nvc0_bufctx_reset(struct nvc0_context *nvc0, int ctx) -{ - unsigned i; - - for (i = 0; i < nvc0->residents[ctx].size / sizeof(struct resident); ++i) - nouveau_bo_ref(NULL, &util_dynarray_element(&nvc0->residents[ctx], - struct resident, i)->bo); - util_dynarray_resize(&nvc0->residents[ctx], 0); -} - void nvc0_bufctx_emit_relocs(struct nvc0_context *nvc0) { - struct nouveau_channel *chan = nvc0->screen->base.channel; struct resident *rsd; struct util_dynarray *array; unsigned ctx, i; @@ -164,11 +155,9 @@ nvc0_bufctx_emit_relocs(struct nvc0_context *nvc0) for (i = 0; i < array->size / sizeof(struct resident); ++i) { rsd = util_dynarray_element(array, struct resident, i); - nouveau_bo_validate(chan, rsd->bo, rsd->flags); + nvc0_resource_validate(rsd->res, rsd->flags); } } - nouveau_bo_validate(chan, nvc0->screen->text, NOUVEAU_BO_RD); - nouveau_bo_validate(chan, nvc0->screen->uniforms, NOUVEAU_BO_RD); - nouveau_bo_validate(chan, nvc0->screen->txc, NOUVEAU_BO_RD); + nvc0_screen_make_buffers_resident(nvc0->screen); } diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h index 962a2c0cf2..83aff0a585 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nvc0/nvc0_context.h @@ -147,11 +147,15 @@ nvc0_surface(struct pipe_surface *ps) struct pipe_context *nvc0_create(struct pipe_screen *, void *); void nvc0_bufctx_emit_relocs(struct nvc0_context *); -void nvc0_bufctx_reset(struct nvc0_context *, int ctx); void nvc0_bufctx_add_resident(struct nvc0_context *, int ctx, struct nvc0_resource *, uint32_t flags); void nvc0_bufctx_del_resident(struct nvc0_context *, int ctx, struct nvc0_resource *); +static INLINE void +nvc0_bufctx_reset(struct nvc0_context *nvc0, int ctx) +{ + util_dynarray_resize(&nvc0->residents[ctx], 0); +} /* nvc0_draw.c */ extern struct draw_stage *nvc0_draw_render_stage(struct nvc0_context *); diff --git a/src/gallium/drivers/nvc0/nvc0_fence.c b/src/gallium/drivers/nvc0/nvc0_fence.c index dc2abe45bd..0387c5940b 100644 --- a/src/gallium/drivers/nvc0/nvc0_fence.c +++ b/src/gallium/drivers/nvc0/nvc0_fence.c @@ -138,6 +138,17 @@ nvc0_screen_fence_update(struct nvc0_screen *screen) #define NVC0_FENCE_MAX_SPINS (1 << 17) +boolean +nvc0_fence_signalled(struct nvc0_fence *fence) +{ + struct nvc0_screen *screen = fence->screen; + + if (fence->state == NVC0_FENCE_STATE_EMITTED) + nvc0_screen_fence_update(screen); + + return fence->state == NVC0_FENCE_STATE_SIGNALLED; +} + boolean nvc0_fence_wait(struct nvc0_fence *fence) { diff --git a/src/gallium/drivers/nvc0/nvc0_fence.h b/src/gallium/drivers/nvc0/nvc0_fence.h index 7b31f28808..e63c164bda 100644 --- a/src/gallium/drivers/nvc0/nvc0_fence.h +++ b/src/gallium/drivers/nvc0/nvc0_fence.h @@ -24,6 +24,7 @@ void nvc0_fence_emit(struct nvc0_fence *); void nvc0_fence_del(struct nvc0_fence *); boolean nvc0_fence_wait(struct nvc0_fence *); +boolean nvc0_fence_signalled(struct nvc0_fence *); static INLINE void nvc0_fence_reference(struct nvc0_fence **ref, struct nvc0_fence *fence) diff --git a/src/gallium/drivers/nvc0/nvc0_push.c b/src/gallium/drivers/nvc0/nvc0_push.c index 4bf259c646..779a477599 100644 --- a/src/gallium/drivers/nvc0/nvc0_push.c +++ b/src/gallium/drivers/nvc0/nvc0_push.c @@ -215,7 +215,8 @@ nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info) struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[i]; struct nvc0_resource *res = nvc0_resource(vb->buffer); - data = nvc0_resource_map_offset(res, vb->buffer_offset, NOUVEAU_BO_RD); + data = nvc0_resource_map_offset(nvc0, res, + vb->buffer_offset, NOUVEAU_BO_RD); if (info->indexed) data += info->index_bias * vb->stride; @@ -223,12 +224,11 @@ nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info) } if (info->indexed) { - ctx.idxbuf = pipe_buffer_map(&nvc0->pipe, nvc0->idxbuf.buffer, - PIPE_TRANSFER_READ, &transfer); + ctx.idxbuf = nvc0_resource_map_offset(nvc0, + nvc0_resource(nvc0->idxbuf.buffer), + nvc0->idxbuf.offset, NOUVEAU_BO_RD); if (!ctx.idxbuf) return; - ctx.idxbuf = (uint8_t *)ctx.idxbuf + nvc0->idxbuf.offset; - index_size = nvc0->idxbuf.index_size; ctx.primitive_restart = info->primitive_restart; ctx.restart_index = info->restart_index; diff --git a/src/gallium/drivers/nvc0/nvc0_resource.h b/src/gallium/drivers/nvc0/nvc0_resource.h index 9384f1905d..0ffb9e8fa6 100644 --- a/src/gallium/drivers/nvc0/nvc0_resource.h +++ b/src/gallium/drivers/nvc0/nvc0_resource.h @@ -12,6 +12,14 @@ struct pipe_resource; struct nouveau_bo; +struct nvc0_context; + +#define NVC0_BUFFER_SCORE_MIN -25000 +#define NVC0_BUFFER_SCORE_MAX 25000 +#define NVC0_BUFFER_SCORE_VRAM_THRESHOLD 20000 + +#define NVC0_BUFFER_STATUS_DIRTY (1 << 0) +#define NVC0_BUFFER_STATUS_USER_MEMORY (1 << 7) /* Resources, if mapped into the GPU's address space, are guaranteed to * have constant virtual addresses. @@ -21,7 +29,6 @@ struct nouveau_bo; struct nvc0_resource { struct pipe_resource base; const struct u_resource_vtbl *vtbl; - uint64_t address; uint8_t *data; struct nouveau_bo *bo; @@ -38,22 +45,55 @@ struct nvc0_resource { struct nvc0_mm_allocation *mm; }; +boolean +nvc0_buffer_download(struct nvc0_context *, struct nvc0_resource *, + unsigned start, unsigned size); + +boolean +nvc0_buffer_migrate(struct nvc0_context *, + struct nvc0_resource *, unsigned domain); + +static INLINE void +nvc0_buffer_adjust_score(struct nvc0_context *nvc0, struct nvc0_resource *res, + int16_t score) +{ + if (score < 0) { + if (res->score > NVC0_BUFFER_SCORE_MIN) + res->score += score; + } else + if (score > 0){ + if (res->score < NVC0_BUFFER_SCORE_MAX) + res->score += score; + if (res->domain == NOUVEAU_BO_GART && + res->score > NVC0_BUFFER_SCORE_VRAM_THRESHOLD) + nvc0_buffer_migrate(nvc0, res, NOUVEAU_BO_VRAM); + } +} + /* XXX: wait for fence (atm only using this for vertex push) */ static INLINE void * -nvc0_resource_map_offset(struct nvc0_resource *res, uint32_t offset, +nvc0_resource_map_offset(struct nvc0_context *nvc0, + struct nvc0_resource *res, uint32_t offset, uint32_t flags) { void *map; - if (res->domain == 0) + nvc0_buffer_adjust_score(nvc0, res, -250); + + if ((res->domain == NOUVEAU_BO_VRAM) && + (res->status & NVC0_BUFFER_STATUS_DIRTY)) + nvc0_buffer_download(nvc0, res, 0, res->base.width0); + + if (res->domain != NOUVEAU_BO_GART) return res->data + offset; + if (res->mm) + flags |= NOUVEAU_BO_NOSYNC; + if (nouveau_bo_map_range(res->bo, res->offset + offset, - res->base.width0, flags | NOUVEAU_BO_NOSYNC)) + res->base.width0, flags)) return NULL; - /* With suballocation, the same bo can be mapped several times, so unmap - * immediately. Maps are guaranteed to persist. */ map = res->bo->map; nouveau_bo_unmap(res->bo); return map; @@ -149,12 +189,6 @@ nvc0_miptree_surface_new(struct pipe_context *, void nvc0_miptree_surface_del(struct pipe_context *, struct pipe_surface *); -struct nvc0_context; - -boolean -nvc0_buffer_migrate(struct nvc0_context *, - struct nvc0_resource *, unsigned domain); - boolean nvc0_migrate_vertices(struct nvc0_resource *buf, unsigned base, unsigned size); diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c index 4ec73b07aa..0e80e2841a 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nvc0/nvc0_screen.c @@ -248,7 +248,7 @@ nvc0_screen_fence_signalled(struct pipe_screen *pscreen, struct pipe_fence_handle *fence, unsigned flags) { - return !(((struct nvc0_fence *)fence)->state == NVC0_FENCE_STATE_SIGNALLED); + return !(nvc0_fence_signalled(nvc0_fence(fence))); } static int @@ -622,6 +622,7 @@ nvc0_screen_make_buffers_resident(struct nvc0_screen *screen) const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD; nouveau_bo_validate(chan, screen->text, flags); + nouveau_bo_validate(chan, screen->uniforms, flags); nouveau_bo_validate(chan, screen->txc, flags); nouveau_bo_validate(chan, screen->tls, flags); nouveau_bo_validate(chan, screen->mp_stack_bo, flags); diff --git a/src/gallium/drivers/nvc0/nvc0_screen.h b/src/gallium/drivers/nvc0/nvc0_screen.h index 5b1b623356..efa5ff63f1 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.h +++ b/src/gallium/drivers/nvc0/nvc0_screen.h @@ -101,18 +101,26 @@ int nvc0_screen_tic_alloc(struct nvc0_screen *, void *); int nvc0_screen_tsc_alloc(struct nvc0_screen *, void *); static INLINE void -nvc0_resource_validate(struct nvc0_resource *res, uint32_t flags) +nvc0_resource_fence(struct nvc0_resource *res, uint32_t flags) { struct nvc0_screen *screen = nvc0_screen(res->base.screen); - assert(res->mm); + if (res->mm) { + nvc0_fence_reference(&res->fence, screen->fence.current); - nvc0_fence_reference(&res->fence, screen->fence.current); + if (flags & NOUVEAU_BO_WR) + nvc0_fence_reference(&res->fence_wr, screen->fence.current); + } +} - if (flags & NOUVEAU_BO_WR) - nvc0_fence_reference(&res->fence_wr, screen->fence.current); +static INLINE void +nvc0_resource_validate(struct nvc0_resource *res, uint32_t flags) +{ + struct nvc0_screen *screen = nvc0_screen(res->base.screen); nouveau_bo_validate(screen->base.channel, res->bo, flags); + + nvc0_resource_fence(res, flags); } diff --git a/src/gallium/drivers/nvc0/nvc0_state.c b/src/gallium/drivers/nvc0/nvc0_state.c index 62abaa7577..e77e95625b 100644 --- a/src/gallium/drivers/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nvc0/nvc0_state.c @@ -539,6 +539,8 @@ nvc0_stage_set_sampler_views(struct nvc0_context *nvc0, int s, nvc0->num_textures[s] = nr; + nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_TEXTURES); + nvc0->dirty |= NVC0_NEW_TEXTURES; } @@ -773,6 +775,8 @@ nvc0_set_vertex_buffers(struct pipe_context *pipe, memcpy(nvc0->vtxbuf, vb, sizeof(*vb) * count); nvc0->num_vtxbufs = count; + nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_VERTEX); + nvc0->dirty |= NVC0_NEW_ARRAYS; } diff --git a/src/gallium/drivers/nvc0/nvc0_tex.c b/src/gallium/drivers/nvc0/nvc0_tex.c index c9f929b942..b219f82c90 100644 --- a/src/gallium/drivers/nvc0/nvc0_tex.c +++ b/src/gallium/drivers/nvc0/nvc0_tex.c @@ -218,8 +218,6 @@ void nvc0_validate_textures(struct nvc0_context *nvc0) { boolean need_flush; - nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_TEXTURES); - need_flush = nvc0_validate_tic(nvc0, 0); need_flush |= nvc0_validate_tic(nvc0, 4); diff --git a/src/gallium/drivers/nvc0/nvc0_winsys.h b/src/gallium/drivers/nvc0/nvc0_winsys.h index af7711031f..1544fb7a1d 100644 --- a/src/gallium/drivers/nvc0/nvc0_winsys.h +++ b/src/gallium/drivers/nvc0/nvc0_winsys.h @@ -94,6 +94,8 @@ static INLINE int OUT_RESRCl(struct nouveau_channel *chan, struct nvc0_resource *res, unsigned delta, unsigned flags) { + if (flags & NOUVEAU_BO_WR) + res->status |= NVC0_BUFFER_STATUS_DIRTY; return OUT_RELOCl(chan, res->bo, res->offset + delta, res->domain | flags); } -- cgit v1.2.3 From c024c1d75fdce72fe2de2d6b987b796fc9561115 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sun, 2 Jan 2011 22:39:50 +0100 Subject: nvc0: fix resource unmap after vertex push --- src/gallium/drivers/nvc0/nvc0_push.c | 11 +++-------- src/gallium/drivers/nvc0/nvc0_push2.c | 4 ++++ src/gallium/drivers/nvc0/nvc0_resource.h | 3 +-- 3 files changed, 8 insertions(+), 10 deletions(-) (limited to 'src/gallium/drivers/nvc0/nvc0_resource.h') diff --git a/src/gallium/drivers/nvc0/nvc0_push.c b/src/gallium/drivers/nvc0/nvc0_push.c index 779a477599..ccbb776447 100644 --- a/src/gallium/drivers/nvc0/nvc0_push.c +++ b/src/gallium/drivers/nvc0/nvc0_push.c @@ -201,7 +201,6 @@ void nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info) { struct push_context ctx; - struct pipe_transfer *transfer = NULL; unsigned i, index_size; unsigned inst = info->instance_count; @@ -267,12 +266,8 @@ nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info) } if (info->indexed) - pipe_buffer_unmap(&nvc0->pipe, transfer); + nvc0_resource_unmap(nvc0_resource(nvc0->idxbuf.buffer)); - for (i = 0; i < nvc0->num_vtxbufs; ++i) { - struct nvc0_resource *res = nvc0_resource(nvc0->vtxbuf[i].buffer); - - if (res->bo) - nouveau_bo_unmap(res->bo); - } + for (i = 0; i < nvc0->num_vtxbufs; ++i) + nvc0_resource_unmap(nvc0_resource(nvc0->vtxbuf[i].buffer)); } diff --git a/src/gallium/drivers/nvc0/nvc0_push2.c b/src/gallium/drivers/nvc0/nvc0_push2.c index 1f4ba256c1..6f51600558 100644 --- a/src/gallium/drivers/nvc0/nvc0_push2.c +++ b/src/gallium/drivers/nvc0/nvc0_push2.c @@ -1,4 +1,6 @@ +#if 0 /* not used, kept for now to compare with util/translate */ + #include "pipe/p_context.h" #include "pipe/p_state.h" #include "util/u_inlines.h" @@ -327,3 +329,5 @@ nvc0_push_vbo2(struct nvc0_context *nvc0, const struct pipe_draw_info *info) prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; } } + +#endif diff --git a/src/gallium/drivers/nvc0/nvc0_resource.h b/src/gallium/drivers/nvc0/nvc0_resource.h index 0ffb9e8fa6..d33e2f0ed0 100644 --- a/src/gallium/drivers/nvc0/nvc0_resource.h +++ b/src/gallium/drivers/nvc0/nvc0_resource.h @@ -102,8 +102,7 @@ nvc0_resource_map_offset(struct nvc0_context *nvc0, static INLINE void nvc0_resource_unmap(struct nvc0_resource *res) { - if (res->domain != 0 && 0) - nouveau_bo_unmap(res->bo); + /* no-op */ } #define NVC0_TILE_DIM_SHIFT(m, d) (((m) >> (d * 4)) & 0xf) -- cgit v1.2.3