diff options
| -rw-r--r-- | src/gallium/drivers/nvc0/Makefile | 3 | ||||
| -rw-r--r-- | src/gallium/drivers/nvc0/SConscript | 1 | ||||
| -rw-r--r-- | src/gallium/drivers/nvc0/nvc0_buffer.c | 201 | ||||
| -rw-r--r-- | src/gallium/drivers/nvc0/nvc0_context.c | 2 | ||||
| -rw-r--r-- | src/gallium/drivers/nvc0/nvc0_context.h | 2 | ||||
| -rw-r--r-- | src/gallium/drivers/nvc0/nvc0_fence.c | 57 | ||||
| -rw-r--r-- | src/gallium/drivers/nvc0/nvc0_fence.h | 29 | ||||
| -rw-r--r-- | src/gallium/drivers/nvc0/nvc0_graph_macros.h | 2 | ||||
| -rw-r--r-- | src/gallium/drivers/nvc0/nvc0_mm.c | 245 | ||||
| -rw-r--r-- | src/gallium/drivers/nvc0/nvc0_push.c | 4 | ||||
| -rw-r--r-- | src/gallium/drivers/nvc0/nvc0_resource.h | 46 | ||||
| -rw-r--r-- | src/gallium/drivers/nvc0/nvc0_screen.c | 55 | ||||
| -rw-r--r-- | src/gallium/drivers/nvc0/nvc0_screen.h | 61 | ||||
| -rw-r--r-- | src/gallium/drivers/nvc0/nvc0_shader_state.c | 8 | ||||
| -rw-r--r-- | src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c | 6 | ||||
| -rw-r--r-- | src/gallium/drivers/nvc0/nvc0_transfer.c | 119 | ||||
| -rw-r--r-- | src/gallium/drivers/nvc0/nvc0_vbo.c | 97 | ||||
| -rw-r--r-- | src/gallium/drivers/nvc0/nvc0_winsys.h | 16 | 
18 files changed, 806 insertions, 148 deletions
| diff --git a/src/gallium/drivers/nvc0/Makefile b/src/gallium/drivers/nvc0/Makefile index 7aefd6f1f3..5c3d46d9ea 100644 --- a/src/gallium/drivers/nvc0/Makefile +++ b/src/gallium/drivers/nvc0/Makefile @@ -27,6 +27,7 @@ C_SOURCES = \  	nvc0_pc_regalloc.c \  	nvc0_push.c \  	nvc0_push2.c \ -	nvc0_fence.c +	nvc0_fence.c \ +	nvc0_mm.c  include ../../Makefile.template diff --git a/src/gallium/drivers/nvc0/SConscript b/src/gallium/drivers/nvc0/SConscript index 808d6895f1..ec529d44f5 100644 --- a/src/gallium/drivers/nvc0/SConscript +++ b/src/gallium/drivers/nvc0/SConscript @@ -29,6 +29,7 @@ nvc0 = env.ConvenienceLibrary(          'nvc0_push.c',          'nvc0_push2.c',          'nvc0_fence.c', +        'nvc0_mm'      ])  Export('nvc0') diff --git a/src/gallium/drivers/nvc0/nvc0_buffer.c b/src/gallium/drivers/nvc0/nvc0_buffer.c index 873016f0d5..93d7f5d303 100644 --- a/src/gallium/drivers/nvc0/nvc0_buffer.c +++ b/src/gallium/drivers/nvc0/nvc0_buffer.c @@ -11,46 +11,116 @@  #include "nvc0_context.h"  #include "nvc0_resource.h" +#define NVC0_BUFFER_STATUS_USER_MEMORY 0xff + +static INLINE boolean +nvc0_buffer_allocate(struct nvc0_screen *screen, struct nvc0_resource *buf, +                     unsigned domain) +{ +   if (domain == NOUVEAU_BO_VRAM) { +      buf->mm = nvc0_mm_allocate(screen->mm_VRAM, buf->base.width0, &buf->bo, +                                 &buf->offset); +      if (!buf->bo) +         return nvc0_buffer_allocate(screen, buf, NOUVEAU_BO_GART); +   } else +   if (domain == NOUVEAU_BO_GART) { +      buf->mm = nvc0_mm_allocate(screen->mm_GART, buf->base.width0, &buf->bo, +                                 &buf->offset); +      if (!buf->bo) +         return FALSE; +   } else { +      assert(!domain); +      if (!buf->data) +         buf->data = MALLOC(buf->base.width0); +      if (!buf->data) +         return FALSE; +   } +   buf->domain = domain; +   return TRUE; +} + +static INLINE void +release_allocation(struct nvc0_mm_allocation **mm, struct nvc0_fence *fence) +{ +   (*mm)->next = fence->buffers; +   fence->buffers = (*mm); +   (*mm) = NULL; +} +  static void  nvc0_buffer_destroy(struct pipe_screen *pscreen,                      struct pipe_resource *presource)  { +   struct nvc0_screen *screen = nvc0_screen(pscreen);     struct nvc0_resource *res = nvc0_resource(presource); -   if (res->bo) -      nouveau_screen_bo_release(pscreen, res->bo); +   nouveau_bo_ref(NULL, &res->bo); -   if (res->data) +   if (res->mm) +      release_allocation(&res->mm, screen->fence.current); + +   if (res->status != NVC0_BUFFER_STATUS_USER_MEMORY && res->data)        FREE(res->data);     FREE(res);  } +static INLINE uint32_t +nouveau_buffer_rw_flags(unsigned pipe) +{ +   uint32_t flags = 0; + +   if (pipe & PIPE_TRANSFER_READ) +      flags = NOUVEAU_BO_RD; +   if (pipe & PIPE_TRANSFER_WRITE) +      flags |= NOUVEAU_BO_WR; + +   return flags; +} +  static void *  nvc0_buffer_transfer_map(struct pipe_context *pipe,                           struct pipe_transfer *transfer)  {     struct nvc0_resource *res = nvc0_resource(transfer->resource); +   struct nvc0_fence *fence;     uint8_t *map; -   uint32_t flags; +   int ret; +   uint32_t flags = nouveau_buffer_rw_flags(transfer->usage); -   if (res->base.bind & PIPE_BIND_VERTEX_BUFFER) +   if ((res->base.bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) && +       (flags & NOUVEAU_BO_WR))        nvc0_context(pipe)->vbo_dirty = TRUE; -// #ifdef NOUVEAU_USERPSACE_MM -   if (res->base.bind & PIPE_BIND_CONSTANT_BUFFER) +   if (res->domain == 0)        return res->data + transfer->box.x; -// #endif -   flags = nouveau_screen_transfer_flags(transfer->usage); -   map = nouveau_screen_bo_map_range(pipe->screen, -                                     res->bo, -                                     transfer->box.x, transfer->box.width, -                                     flags); -   if (!map) +   if (res->domain == NOUVEAU_BO_VRAM) { +      NOUVEAU_ERR("transfers to/from VRAM buffers are not allowed\n"); +      /* if this happens, migrate back to GART */ +      return NULL; +   } + +   if (res->score > -1024) +      --res->score; + +   ret = nouveau_bo_map(res->bo, flags | NOUVEAU_BO_NOSYNC); +   if (ret)        return NULL; +   map = res->bo->map; +   nouveau_bo_unmap(res->bo); + +   fence = (flags == NOUVEAU_BO_RD) ? res->fence_wr : res->fence; + +   if (fence) { +      if (nvc0_fence_wait(fence) == FALSE) +         NOUVEAU_ERR("failed to fence buffer\n"); + +      nvc0_fence_reference(&res->fence, NULL); +      nvc0_fence_reference(&res->fence_wr, NULL); +   } -   return map + transfer->box.x; +   return map + transfer->box.x + res->offset;  } @@ -62,13 +132,12 @@ nvc0_buffer_transfer_flush_region(struct pipe_context *pipe,  {     struct nvc0_resource *res = nvc0_resource(transfer->resource); -#ifdef NOUVEAU_USERPSACE_MM     if (!res->bo)        return; -#endif +     nouveau_screen_bo_map_flush_range(pipe->screen,                                       res->bo, -                                     transfer->box.x + box->x, +                                     res->offset + transfer->box.x + box->x,                                       box->width);  } @@ -78,11 +147,10 @@ nvc0_buffer_transfer_unmap(struct pipe_context *pipe,  {     struct nvc0_resource *res = nvc0_resource(transfer->resource); -// #ifdef NOUVEAU_USERPSACE_MM     if (res->data)        return; -// #endif -   nouveau_screen_bo_unmap(pipe->screen, res->bo); + +   /* nouveau_screen_bo_unmap(pipe->screen, res->bo); */  }  const struct u_resource_vtbl nvc0_buffer_vtbl = @@ -102,7 +170,9 @@ struct pipe_resource *  nvc0_buffer_create(struct pipe_screen *pscreen,                     const struct pipe_resource *templ)  { +   struct nvc0_screen *screen = nvc0_screen(pscreen);     struct nvc0_resource *buffer; +   boolean ret;     buffer = CALLOC_STRUCT(nvc0_resource);     if (!buffer) @@ -114,14 +184,11 @@ nvc0_buffer_create(struct pipe_screen *pscreen,     buffer->base.screen = pscreen;     if (buffer->base.bind & PIPE_BIND_CONSTANT_BUFFER) -      buffer->data = MALLOC(buffer->base.width0); +      ret = nvc0_buffer_allocate(screen, buffer, 0); +   else +      ret = nvc0_buffer_allocate(screen, buffer, NOUVEAU_BO_GART); -   buffer->bo = nouveau_screen_bo_new(pscreen, -                                      16, -                                      buffer->base.usage, -                                      buffer->base.bind, -                                      buffer->base.width0); -   if (buffer->bo == NULL) +   if (ret == FALSE)        goto fail;     return &buffer->base; @@ -154,13 +221,77 @@ nvc0_user_buffer_create(struct pipe_screen *pscreen,     buffer->base.height0 = 1;     buffer->base.depth0 = 1; -   buffer->bo = nouveau_screen_bo_user(pscreen, ptr, bytes); -   if (!buffer->bo) -      goto fail; -	 +   buffer->data = ptr; +   buffer->status = NVC0_BUFFER_STATUS_USER_MEMORY; +     return &buffer->base; +} -fail: -   FREE(buffer); -   return NULL; +/* Migrate a linear buffer (vertex, index, constants) USER -> GART -> VRAM. */ +boolean +nvc0_buffer_migrate(struct nvc0_context *nvc0, +                    struct nvc0_resource *buf, unsigned domain) +{ +   struct nvc0_screen *screen = nvc0_screen(buf->base.screen); +   struct nouveau_bo *bo; +   unsigned size = buf->base.width0; +   int ret; + +   if (domain == NOUVEAU_BO_GART && buf->domain == 0) { +      if (!nvc0_buffer_allocate(screen, buf, domain)) +         return FALSE; +      ret = nouveau_bo_map(buf->bo, NOUVEAU_BO_WR | NOUVEAU_BO_NOSYNC); +      if (ret) +         return ret; +      memcpy((uint8_t *)buf->bo->map + buf->offset, buf->data, size); +      nouveau_bo_unmap(buf->bo); +   } else +   if (domain == NOUVEAU_BO_VRAM && buf->domain == NOUVEAU_BO_GART) { +      struct nvc0_mm_allocation *mm = buf->mm; + +      bo = buf->bo; +      buf->bo = NULL; +      buf->mm = NULL; +      nvc0_buffer_allocate(screen, buf, domain); + +      nvc0_m2mf_copy_linear(nvc0, buf->bo, 0, NOUVEAU_BO_VRAM, +                            bo, 0, NOUVEAU_BO_GART, buf->base.width0); + +      release_allocation(&mm, screen->fence.current); +      nouveau_bo_ref(NULL, &bo); +   } else +   if (domain == NOUVEAU_BO_VRAM && buf->domain == 0) { +      /* should use a scratch buffer instead here */ +      if (!nvc0_buffer_migrate(nvc0, buf, NOUVEAU_BO_GART)) +         return FALSE; +      return nvc0_buffer_migrate(nvc0, buf, NOUVEAU_BO_VRAM); +   } else +      return -1; + +   buf->domain = domain; + +   return TRUE; +} + +/* Migrate data from glVertexAttribPointer(non-VBO) user buffers to GART. + * MUST NOT FLUSH THE PUSH BUFFER, we could be in the middle of a method. + */ +boolean +nvc0_migrate_vertices(struct nvc0_resource *buf, unsigned base, unsigned size) +{ +   struct nvc0_screen *screen = nvc0_screen(buf->base.screen); +   int ret; + +   assert(buf->data && !buf->domain); + +   if (!nvc0_buffer_allocate(screen, buf, NOUVEAU_BO_GART)) +      return FALSE; +   ret = nouveau_bo_map_range(buf->bo, base + buf->offset, size, +                              NOUVEAU_BO_WR | NOUVEAU_BO_NOSYNC); +   if (ret) +      return FALSE; +   memcpy(buf->bo->map, buf->data + base, size); +   nouveau_bo_unmap(buf->bo); + +   return TRUE;  } diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c index f718feccd7..a05408a678 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.c +++ b/src/gallium/drivers/nvc0/nvc0_context.c @@ -49,6 +49,8 @@ nvc0_flush(struct pipe_context *pipe, unsigned flags,     if (flags & (PIPE_FLUSH_SWAPBUFFERS | PIPE_FLUSH_FRAME)) {        FIRE_RING(chan); + +      nvc0_screen_fence_next(nvc0->screen);     }  } diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h index d0f78a564d..26263309d8 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nvc0/nvc0_context.h @@ -118,6 +118,7 @@ struct nvc0_context {     unsigned sample_mask;     boolean vbo_dirty; +   boolean vbo_push_hint;     struct draw_context *draw;  }; @@ -150,6 +151,7 @@ static INLINE void  nvc0_make_buffer_resident(struct nvc0_context *nvc0,                            struct nvc0_resource *res, unsigned flags)  { +   nvc0_resource_validate(res, flags);     nvc0_make_bo_resident(nvc0, res->bo, flags);  } diff --git a/src/gallium/drivers/nvc0/nvc0_fence.c b/src/gallium/drivers/nvc0/nvc0_fence.c index 3587097449..dc2abe45bd 100644 --- a/src/gallium/drivers/nvc0/nvc0_fence.c +++ b/src/gallium/drivers/nvc0/nvc0_fence.c @@ -30,14 +30,14 @@  boolean  nvc0_screen_fence_new(struct nvc0_screen *screen, struct nvc0_fence **fence, -		      boolean emit) +                      boolean emit)  {     *fence = CALLOC_STRUCT(nvc0_fence);     if (!*fence)        return FALSE;     (*fence)->screen = screen; -   pipe_reference_init(&(*fence)->reference, 1); +   (*fence)->ref = 1;     if (emit)        nvc0_fence_emit(*fence); @@ -53,15 +53,15 @@ nvc0_fence_emit(struct nvc0_fence *fence)     fence->sequence = ++screen->fence.sequence; -   assert(!(fence->state & NVC0_FENCE_STATE_EMITTED)); +   assert(fence->state == NVC0_FENCE_STATE_AVAILABLE);     BEGIN_RING(chan, RING_3D(QUERY_ADDRESS_HIGH), 4);     OUT_RELOCh(chan, screen->fence.bo, 0, NOUVEAU_BO_WR);     OUT_RELOCl(chan, screen->fence.bo, 0, NOUVEAU_BO_WR);     OUT_RING  (chan, fence->sequence); -   OUT_RING  (chan, 0x1000f010); +   OUT_RING  (chan, NVC0_3D_QUERY_GET_FENCE); -   pipe_reference(NULL, &fence->reference); +   ++fence->ref;     if (screen->fence.tail)        screen->fence.tail->next = fence; @@ -95,6 +95,18 @@ nvc0_fence_del(struct nvc0_fence *fence)  }  static void +nvc0_fence_trigger_release_buffers(struct nvc0_fence *fence) +{ +   struct nvc0_mm_allocation *alloc = fence->buffers; + +   while (alloc) { +      struct nvc0_mm_allocation *next = alloc->next; +      nvc0_mm_free(alloc); +      alloc = next; +   }; +} + +static void  nvc0_screen_fence_update(struct nvc0_screen *screen)  {     struct nvc0_fence *fence; @@ -110,10 +122,12 @@ nvc0_screen_fence_update(struct nvc0_screen *screen)        sequence = fence->sequence;        fence->state = NVC0_FENCE_STATE_SIGNALLED; -      if (fence->trigger.func) -         fence->trigger.func(fence->trigger.arg); + +      if (fence->buffers) +         nvc0_fence_trigger_release_buffers(fence);        nvc0_fence_reference(&fence, NULL); +        if (sequence == screen->fence.sequence_ack)           break;     } @@ -122,24 +136,45 @@ nvc0_screen_fence_update(struct nvc0_screen *screen)        screen->fence.tail = NULL;  } +#define NVC0_FENCE_MAX_SPINS (1 << 17) +  boolean  nvc0_fence_wait(struct nvc0_fence *fence)  {     struct nvc0_screen *screen = fence->screen;     int spins = 0; -   if (fence->state != NVC0_FENCE_STATE_EMITTED) -      return TRUE; +   if (fence->state == NVC0_FENCE_STATE_AVAILABLE) { +      nvc0_fence_emit(fence); + +      FIRE_RING(screen->base.channel); + +      if (fence == screen->fence.current) +         nvc0_screen_fence_new(screen, &screen->fence.current, FALSE); +   } +     do {        nvc0_screen_fence_update(screen);        if (fence->state == NVC0_FENCE_STATE_SIGNALLED)           return TRUE; +      spins++;  #ifdef PIPE_OS_UNIX -      if ((spins & 7) == 7) /* spend a few cycles */ +      if (!(spins % 8)) /* donate a few cycles */           sched_yield();  #endif -   } while (++spins < 10000); +   } while (spins < NVC0_FENCE_MAX_SPINS); + +   if (spins > 9000) +      NOUVEAU_ERR("fence %x: been spinning too long\n", fence->sequence);     return FALSE;  } + +void +nvc0_screen_fence_next(struct nvc0_screen *screen) +{ +   nvc0_fence_emit(screen->fence.current); +   nvc0_screen_fence_new(screen, &screen->fence.current, FALSE); +   nvc0_screen_fence_update(screen); +} diff --git a/src/gallium/drivers/nvc0/nvc0_fence.h b/src/gallium/drivers/nvc0/nvc0_fence.h index 513ac07c1a..7b31f28808 100644 --- a/src/gallium/drivers/nvc0/nvc0_fence.h +++ b/src/gallium/drivers/nvc0/nvc0_fence.h @@ -3,24 +3,21 @@  #define __NVC0_FENCE_H__  #include "util/u_inlines.h" +#include "util/u_double_list.h" -struct nvc0_fence_trigger { -   void (*func)(void *); -   void *arg; -   struct nvc0_fence_trigger *next; -}; - +#define NVC0_FENCE_STATE_AVAILABLE 0  #define NVC0_FENCE_STATE_EMITTED   1  #define NVC0_FENCE_STATE_SIGNALLED 2 -/* reference first, so pipe_reference works directly */ +struct nvc0_mm_allocation; +  struct nvc0_fence { -   struct pipe_reference reference;     struct nvc0_fence *next;     struct nvc0_screen *screen;     int state; +   int ref;     uint32_t sequence; -   struct nvc0_fence_trigger trigger; +   struct nvc0_mm_allocation *buffers;  };  void nvc0_fence_emit(struct nvc0_fence *); @@ -31,10 +28,20 @@ boolean nvc0_fence_wait(struct nvc0_fence *);  static INLINE void  nvc0_fence_reference(struct nvc0_fence **ref, struct nvc0_fence *fence)  { -   if (pipe_reference(&(*ref)->reference, &fence->reference)) -      nvc0_fence_del(*ref); +   if (*ref) { +      if (--(*ref)->ref == 0) +         nvc0_fence_del(*ref); +   } +   if (fence) +      ++fence->ref;     *ref = fence;  } +static INLINE struct nvc0_fence * +nvc0_fence(struct pipe_fence_handle *fence) +{ +   return (struct nvc0_fence *)fence; +} +  #endif // __NVC0_FENCE_H__ diff --git a/src/gallium/drivers/nvc0/nvc0_graph_macros.h b/src/gallium/drivers/nvc0/nvc0_graph_macros.h index b8b89de28e..7db09287ab 100644 --- a/src/gallium/drivers/nvc0/nvc0_graph_macros.h +++ b/src/gallium/drivers/nvc0/nvc0_graph_macros.h @@ -63,7 +63,7 @@ static const uint32_t nvc0_9097_vertex_array_select[] =  static const uint32_t nvc0_9097_color_mask_brdc[] =  { -   0x05a00021, /* maddr [0x1680] */ +   0x05a00021, /* maddr [0x1a00, increment = 4] */     0x00000841, /* send $r1 */     0x00000841, /* send $r1 */     0x00000841, /* send $r1 */ diff --git a/src/gallium/drivers/nvc0/nvc0_mm.c b/src/gallium/drivers/nvc0/nvc0_mm.c new file mode 100644 index 0000000000..e031fb393a --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_mm.c @@ -0,0 +1,245 @@ + +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "util/u_double_list.h" + +#include "nvc0_screen.h" + +#define MM_MIN_ORDER 7 +#define MM_MAX_ORDER 20 + +#define MM_NUM_BUCKETS (MM_MAX_ORDER - MM_MIN_ORDER + 1) + +#define MM_MIN_SIZE (1 << MM_MIN_ORDER) +#define MM_MAX_SIZE (1 << MM_MAX_ORDER) + +struct mm_bucket { +   struct list_head free; +   struct list_head used; +   struct list_head full; +   int num_free; +}; + +struct nvc0_mman { +   struct nouveau_device *dev; +   struct mm_bucket bucket[MM_NUM_BUCKETS]; +   uint32_t storage_type; +   uint32_t domain; +   uint64_t allocated; +}; + +struct mm_slab { +   struct list_head head; +   struct nouveau_bo *bo; +   struct nvc0_mman *cache; +   int order; +   int count; +   int free; +   uint32_t bits[0]; +}; + +static int +mm_slab_alloc(struct mm_slab *slab) +{ +   int i, n, b; + +   if (slab->free == 0) +      return -1; + +   for (i = 0; i < (slab->count + 31) / 32; ++i) { +      b = ffs(slab->bits[i]) - 1; +      if (b >= 0) { +         n = i * 32 + b; +         assert(n < slab->count); +         slab->free--; +         slab->bits[i] &= ~(1 << b); +         return n; +      } +   } +   return -1; +} + +static INLINE void +mm_slab_free(struct mm_slab *slab, int i) +{ +   assert(i < slab->count); +   slab->bits[i / 32] |= 1 << (i % 32); +   slab->free++; +   assert(slab->free <= slab->count); +} + +static INLINE int +mm_get_order(uint32_t size) +{ +   int s = __builtin_clz(size) ^ 31; + +   if (size > (1 << s)) +      s += 1; +   return s; +} + +static struct mm_bucket * +mm_bucket_by_order(struct nvc0_mman *cache, int order) +{ +   if (order > MM_MAX_ORDER) +      return NULL; +   return &cache->bucket[MAX2(order, MM_MIN_ORDER) - MM_MIN_ORDER]; +} + +static struct mm_bucket * +mm_bucket_by_size(struct nvc0_mman *cache, unsigned size) +{ +   return mm_bucket_by_order(cache, mm_get_order(size)); +} + +/* size of bo allocation for slab with chunks of (1 << chunk_order) bytes */ +static INLINE uint32_t +mm_default_slab_size(unsigned chunk_order) +{ +   assert(chunk_order <= MM_MAX_ORDER && chunk_order >= MM_MIN_ORDER); + +   static const int8_t slab_order[MM_MAX_ORDER - MM_MIN_ORDER + 1] = +   { +      12, 12, 13, 14, 14, 17, 17, 17, 17, 19, 19, 20, 21, 22 +   }; + +   return 1 << slab_order[chunk_order - MM_MIN_ORDER]; +} + +static int +mm_slab_new(struct nvc0_mman *cache, int chunk_order) +{ +   struct mm_slab *slab; +   int words, ret; +   const uint32_t size = mm_default_slab_size(chunk_order); + +   words = ((size >> chunk_order) + 31) / 32; +   assert(words); + +   slab = MALLOC(sizeof(struct mm_slab) + words * 4); +   if (!slab) +      return PIPE_ERROR_OUT_OF_MEMORY; + +   memset(&slab->bits[0], ~0, words * 4); + +   slab->bo = NULL; +   ret = nouveau_bo_new_tile(cache->dev, cache->domain, 0, size, +                             0, cache->storage_type, &slab->bo); +   if (ret) { +      FREE(slab); +      return PIPE_ERROR_OUT_OF_MEMORY; +   } + +   LIST_INITHEAD(&slab->head); + +   slab->cache = cache; +   slab->order = chunk_order; +   slab->count = slab->free = size >> chunk_order; + +   LIST_ADD(&slab->head, &mm_bucket_by_order(cache, chunk_order)->free); + +   cache->allocated += size; + +   debug_printf("MM: new slab, total memory = %lu KiB\n", +                cache->allocated / 1024); + +   return PIPE_OK; +} + +/* @return token to identify slab or NULL if we just allocated a new bo */ +struct nvc0_mm_allocation * +nvc0_mm_allocate(struct nvc0_mman *cache, +                 uint32_t size, struct nouveau_bo **bo, uint32_t *offset) +{ +   struct mm_bucket *bucket; +   struct mm_slab *slab; +   struct nvc0_mm_allocation *alloc; +   int ret; + +   bucket = mm_bucket_by_size(cache, size); +   if (!bucket) { +      ret = nouveau_bo_new_tile(cache->dev, cache->domain, 0, size, +                                0, cache->storage_type, bo); +      if (ret) +         debug_printf("bo_new(%x, %x): %i\n", size, cache->storage_type, ret); + +      *offset = 0; +      return NULL; +   } + +   if (!LIST_IS_EMPTY(&bucket->used)) { +      slab = LIST_ENTRY(struct mm_slab, bucket->used.next, head); +   } else { +      if (LIST_IS_EMPTY(&bucket->free)) { +         mm_slab_new(cache, MAX2(mm_get_order(size), MM_MIN_ORDER)); +      } +      slab = LIST_ENTRY(struct mm_slab, bucket->free.next, head); + +      LIST_DEL(&slab->head); +      LIST_ADD(&slab->head, &bucket->used); +   } + +   *offset = mm_slab_alloc(slab) << slab->order; + +   alloc = MALLOC_STRUCT(nvc0_mm_allocation); +   if (!alloc) +      return NULL; + +   nouveau_bo_ref(slab->bo, bo); + +   if (slab->free == 0) { +      LIST_DEL(&slab->head); +      LIST_ADD(&slab->head, &bucket->full); +   } + +   alloc->next = NULL; +   alloc->offset = *offset; +   alloc->priv = (void *)slab; + +   return alloc; +} + +void +nvc0_mm_free(struct nvc0_mm_allocation *alloc) +{ +   struct mm_slab *slab = (struct mm_slab *)alloc->priv; +   struct mm_bucket *bucket = mm_bucket_by_order(slab->cache, slab->order); + +   mm_slab_free(slab, alloc->offset >> slab->order); + +   if (slab->free == 1) { +      LIST_DEL(&slab->head); + +      if (slab->count > 1) +         LIST_ADDTAIL(&slab->head, &bucket->used); +      else +         LIST_ADDTAIL(&slab->head, &bucket->free); +   } + +   FREE(alloc); +} + +struct nvc0_mman * +nvc0_mm_create(struct nouveau_device *dev, uint32_t domain, +               uint32_t storage_type) +{ +   struct nvc0_mman *cache = MALLOC_STRUCT(nvc0_mman); +   int i; + +   if (!cache) +      return NULL; + +   cache->dev = dev; +   cache->domain = domain; +   cache->storage_type = storage_type; +   cache->allocated = 0; + +   for (i = 0; i < MM_NUM_BUCKETS; ++i) { +      LIST_INITHEAD(&cache->bucket[i].free); +      LIST_INITHEAD(&cache->bucket[i].used); +      LIST_INITHEAD(&cache->bucket[i].full); +   } + +   return cache; +} + diff --git a/src/gallium/drivers/nvc0/nvc0_push.c b/src/gallium/drivers/nvc0/nvc0_push.c index 8b8fe610e2..1bdc8e88a7 100644 --- a/src/gallium/drivers/nvc0/nvc0_push.c +++ b/src/gallium/drivers/nvc0/nvc0_push.c @@ -215,9 +215,7 @@ nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info)        struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[i];        struct nvc0_resource *res = nvc0_resource(vb->buffer); -      if (nouveau_bo_map(res->bo, NOUVEAU_BO_RD)) -         return; -      data = (uint8_t *)res->bo->map + vb->buffer_offset; +      data = nvc0_resource_map_offset(res, vb->buffer_offset, NOUVEAU_BO_RD);        if (info->indexed)           data += info->index_bias * vb->stride; diff --git a/src/gallium/drivers/nvc0/nvc0_resource.h b/src/gallium/drivers/nvc0/nvc0_resource.h index 9c6895ea81..b9f3f7b5d8 100644 --- a/src/gallium/drivers/nvc0/nvc0_resource.h +++ b/src/gallium/drivers/nvc0/nvc0_resource.h @@ -29,10 +29,43 @@ struct nvc0_resource {     uint8_t status;     uint8_t domain; + +   int16_t score; /* low if mapped very often, if high can move to VRAM */ +     struct nvc0_fence *fence; -   struct list_head list; +   struct nvc0_fence *fence_wr; + +   struct nvc0_mm_allocation *mm;  }; +/* XXX: wait for fence (atm only using this for vertex push) */ +static INLINE void * +nvc0_resource_map_offset(struct nvc0_resource *res, uint32_t offset, +                         uint32_t flags) +{ +   void *map; + +   if (res->domain == 0) +      return res->data + offset; + +   if (nouveau_bo_map_range(res->bo, res->offset + offset, +                            res->base.width0, flags | NOUVEAU_BO_NOSYNC)) +      return NULL; + +   /* With suballocation, the same bo can be mapped several times, so unmap +    * immediately. Maps are guaranteed to persist. */ +   map = res->bo->map; +   nouveau_bo_unmap(res->bo); +   return map; +} + +static INLINE void +nvc0_resource_unmap(struct nvc0_resource *res) +{ +   if (res->domain != 0 && 0) +      nouveau_bo_unmap(res->bo); +} +  #define NVC0_TILE_H(m) (8 << ((m >> 4) & 0xf))  #define NVC0_TILE_D(m) (1 << (m >> 8)) @@ -67,7 +100,7 @@ nvc0_resource(struct pipe_resource *resource)  static INLINE boolean  nvc0_resource_mapped_by_gpu(struct pipe_resource *resource)  { -   return nvc0_resource(resource)->bo->offset != 0ULL; +   return nvc0_resource(resource)->domain != 0;  }  void @@ -106,4 +139,13 @@ nvc0_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_resource *pt,  void  nvc0_miptree_surface_del(struct pipe_surface *ps); +struct nvc0_context; + +boolean +nvc0_buffer_migrate(struct nvc0_context *, +                    struct nvc0_resource *, unsigned domain); + +boolean +nvc0_migrate_vertices(struct nvc0_resource *buf, unsigned base, unsigned size); +  #endif diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c index 616a990337..107d50d7f2 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nvc0/nvc0_screen.c @@ -229,12 +229,28 @@ nvc0_graph_set_macro(struct nvc0_screen *screen, uint32_t m, unsigned pos,     return pos + size;  } +static void +nvc0_screen_fence_reference(struct pipe_screen *pscreen, +                            struct pipe_fence_handle **ptr, +                            struct pipe_fence_handle *fence) +{ +   nvc0_fence_reference((struct nvc0_fence **)ptr, nvc0_fence(fence)); +} + +static int +nvc0_screen_fence_signalled(struct pipe_screen *pscreen, +                            struct pipe_fence_handle *fence, +                            unsigned flags) +{ +   return !(((struct nvc0_fence *)fence)->state == NVC0_FENCE_STATE_SIGNALLED); +} +  static int  nvc0_screen_fence_finish(struct pipe_screen *pscreen, -                         struct pipe_fence_handle *pfence, +                         struct pipe_fence_handle *fence,                           unsigned flags)  { -   return nvc0_fence_wait((struct nvc0_fence *)pfence) != TRUE; +   return nvc0_fence_wait((struct nvc0_fence *)fence) != TRUE;  }  static void @@ -339,6 +355,8 @@ nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)     pscreen->get_param = nvc0_screen_get_param;     pscreen->get_shader_param = nvc0_screen_get_shader_param;     pscreen->get_paramf = nvc0_screen_get_paramf; +   pscreen->fence_reference = nvc0_screen_fence_reference; +   pscreen->fence_signalled = nvc0_screen_fence_signalled;     pscreen->fence_finish = nvc0_screen_fence_finish;     nvc0_screen_init_resource_functions(pscreen); @@ -353,6 +371,18 @@ nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)     screen->fence.map = screen->fence.bo->map;     nouveau_bo_unmap(screen->fence.bo); +   for (i = 0; i < NVC0_SCRATCH_NR_BUFFERS; ++i) { +      ret = nouveau_bo_new(dev, NOUVEAU_BO_GART, 0, NVC0_SCRATCH_SIZE, +                           &screen->scratch.bo[i]); +      if (ret) +         goto fail; +   } + +   for (i = 0; i < 8; ++i) { +      BEGIN_RING(chan, (i << 13) | (0x0000 >> 2), 1); +      OUT_RING  (chan, 0x0000); +   } +     BEGIN_RING(chan, RING_MF_(0x0000), 1);     OUT_RING  (chan, 0x9039);     BEGIN_RING(chan, RING_MF(NOTIFY_ADDRESS_HIGH), 3); @@ -510,15 +540,11 @@ nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)     BEGIN_RING(chan, RING_3D(RASTERIZE_ENABLE), 1);     OUT_RING  (chan, 1); -   // BEGIN_RING(chan, RING_3D(GP_SELECT), 1); -   // OUT_RING  (chan, 0x40); -   BEGIN_RING(chan, RING_3D(SP_SELECT(4)), 1); +   BEGIN_RING(chan, RING_3D(GP_SELECT), 1);     OUT_RING  (chan, 0x40);     BEGIN_RING(chan, RING_3D(GP_BUILTIN_RESULT_EN), 1);     OUT_RING  (chan, 0); -   // BEGIN_RING(chan, RING_3D(TEP_SELECT), 1); -   // OUT_RING  (chan, 0x30); -   BEGIN_RING(chan, RING_3D(SP_SELECT(3)), 1); +   BEGIN_RING(chan, RING_3D(TEP_SELECT), 1);     OUT_RING  (chan, 0x30);     BEGIN_RING(chan, RING_3D(PATCH_VERTICES), 1);     OUT_RING  (chan, 3); @@ -538,18 +564,19 @@ nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)     BEGIN_RING(chan, RING_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2);     OUT_RING  (chan, 0xab);     OUT_RING  (chan, 0x00000000); -   BEGIN_RING(chan, RING_3D_(0x07e8), 2); -   OUT_RING  (chan, 0xac); -   OUT_RING  (chan, 0x00000000); -   BEGIN_RING(chan, RING_3D_(0x07f0), 2); -   OUT_RING  (chan, 0xac); -   OUT_RING  (chan, 0x00000000);     FIRE_RING (chan);     screen->tic.entries = CALLOC(4096, sizeof(void *));     screen->tsc.entries = screen->tic.entries + 2048; +   screen->mm_GART = nvc0_mm_create(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, +                                    0x000); +   screen->mm_VRAM = nvc0_mm_create(dev, NOUVEAU_BO_VRAM, 0x000); +   screen->mm_VRAM_fe0 = nvc0_mm_create(dev, NOUVEAU_BO_VRAM, 0xfe0); + +   nvc0_screen_fence_new(screen, &screen->fence.current, FALSE); +     return pscreen;  fail: diff --git a/src/gallium/drivers/nvc0/nvc0_screen.h b/src/gallium/drivers/nvc0/nvc0_screen.h index 989e183bde..12cea658a5 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.h +++ b/src/gallium/drivers/nvc0/nvc0_screen.h @@ -10,9 +10,13 @@  #define NVC0_TIC_MAX_ENTRIES 2048  #define NVC0_TSC_MAX_ENTRIES 2048 +struct nvc0_mman;  struct nvc0_context;  struct nvc0_fence; +#define NVC0_SCRATCH_SIZE (2 << 20) +#define NVC0_SCRATCH_NR_BUFFERS 2 +  struct nvc0_screen {     struct nouveau_screen base;     struct nouveau_winsys *nvws; @@ -30,6 +34,13 @@ struct nvc0_screen {     struct nouveau_resource *text_heap;     struct { +      struct nouveau_bo *bo[NVC0_SCRATCH_NR_BUFFERS]; +      uint8_t *buf; +      int index; +      uint32_t offset; +   } scratch; + +   struct {        void **entries;        int next;        uint32_t lock[NVC0_TIC_MAX_ENTRIES / 32]; @@ -50,6 +61,10 @@ struct nvc0_screen {        uint32_t sequence_ack;        struct nouveau_bo *bo;     } fence; + +   struct nvc0_mman *mm_GART; +   struct nvc0_mman *mm_VRAM; +   struct nvc0_mman *mm_VRAM_fe0;  };  static INLINE struct nvc0_screen * @@ -58,14 +73,60 @@ nvc0_screen(struct pipe_screen *screen)     return (struct nvc0_screen *)screen;  } +/* Since a resource can be migrated, we need to decouple allocations from + * them. This struct is linked with fences for delayed freeing of allocs. + */ +struct nvc0_mm_allocation { +   struct nvc0_mm_allocation *next; +   void *priv; +   uint32_t offset; +}; + +extern struct nvc0_mman * +nvc0_mm_create(struct nouveau_device *, uint32_t domain, uint32_t storage_type); + +extern struct nvc0_mm_allocation * +nvc0_mm_allocate(struct nvc0_mman *, +                 uint32_t size, struct nouveau_bo **, uint32_t *offset); +extern void +nvc0_mm_free(struct nvc0_mm_allocation *); +  void nvc0_screen_make_buffers_resident(struct nvc0_screen *);  int nvc0_screen_tic_alloc(struct nvc0_screen *, void *);  int nvc0_screen_tsc_alloc(struct nvc0_screen *, void *); +static INLINE void +nvc0_resource_validate(struct nvc0_resource *res, uint32_t flags) +{ +   struct nvc0_screen *screen = nvc0_screen(res->base.screen); + +   assert(res->mm); + +   nvc0_fence_reference(&res->fence, screen->fence.current); + +   if (flags & NOUVEAU_BO_WR) +      nvc0_fence_reference(&res->fence_wr, screen->fence.current); + +   nouveau_reloc_emit(screen->base.channel, +                      NULL, 0, NULL, res->bo, 0, 0, NOUVEAU_BO_RDWR, 0, 0); +} + +  boolean  nvc0_screen_fence_new(struct nvc0_screen *, struct nvc0_fence **, boolean emit); +void +nvc0_screen_fence_next(struct nvc0_screen *); + +static INLINE boolean +nvc0_screen_fence_emit(struct nvc0_screen *screen) +{ +   nvc0_fence_emit(screen->fence.current); + +   return nvc0_screen_fence_new(screen, &screen->fence.current, FALSE); +} +  struct nvc0_format {     uint32_t rt;     uint32_t tic; diff --git a/src/gallium/drivers/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nvc0/nvc0_shader_state.c index 932848ea94..25a9ef722b 100644 --- a/src/gallium/drivers/nvc0/nvc0_shader_state.c +++ b/src/gallium/drivers/nvc0/nvc0_shader_state.c @@ -76,10 +76,10 @@ nvc0_vertprog_validate(struct nvc0_context *nvc0)     BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(1)), 1);     OUT_RING  (chan, vp->max_gpr); -   BEGIN_RING(chan, RING_3D_(0x163c), 1); -   OUT_RING  (chan, 0); -   BEGIN_RING(chan, RING_3D_(0x2600), 1); -   OUT_RING  (chan, 1); +   // BEGIN_RING(chan, RING_3D_(0x163c), 1); +   // OUT_RING  (chan, 0); +   // BEGIN_RING(chan, RING_3D_(0x2600), 1); +   // OUT_RING  (chan, 1);  }  void diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index 1b161f66dc..a6797db9c5 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -22,6 +22,8 @@  #include <unistd.h> +#define NOUVEAU_DEBUG 1 +  #include "pipe/p_shader_tokens.h"  #include "tgsi/tgsi_parse.h"  #include "tgsi/tgsi_util.h" @@ -194,7 +196,7 @@ static INLINE void  bld_warn_uninitialized(struct bld_context *bld, int kind,                         struct bld_register *reg, struct nv_basic_block *b)  { -#ifdef NOUVEAU_DEBUG_BITS +#ifdef NOUVEAU_DEBUG     long i = (reg - &bld->tvs[0][0]) / 4;     long c = (reg - &bld->tvs[0][0]) & 3; @@ -1359,7 +1361,7 @@ bld_instruction(struct bld_context *bld,     uint opcode = translate_opcode(insn->Instruction.Opcode);     uint8_t mask = insn->Dst[0].Register.WriteMask; -#ifdef NOUVEAU_DEBUG_BITS +#ifdef NOUVEAU_DEBUG     debug_printf("bld_instruction:"); tgsi_dump_instruction(insn, 1);  #endif diff --git a/src/gallium/drivers/nvc0/nvc0_transfer.c b/src/gallium/drivers/nvc0/nvc0_transfer.c index adf9192943..56c5fe12c2 100644 --- a/src/gallium/drivers/nvc0/nvc0_transfer.c +++ b/src/gallium/drivers/nvc0/nvc0_transfer.c @@ -111,6 +111,8 @@ nvc0_m2mf_push_linear(struct nvc0_context *nvc0,     uint32_t *src = (uint32_t *)data;     unsigned count = (size + 3) / 4; +   MARK_RING (chan, 8, 2); +     BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2);     OUT_RELOCh(chan, dst, offset, domain | NOUVEAU_BO_WR);     OUT_RELOCl(chan, dst, offset, domain | NOUVEAU_BO_WR); @@ -125,6 +127,7 @@ nvc0_m2mf_push_linear(struct nvc0_context *nvc0,        if (nr < 9) {           FIRE_RING(chan); +         nvc0_make_bo_resident(nvc0, dst, NOUVEAU_BO_WR);           continue;        }        nr = MIN2(count, nr - 1); @@ -138,53 +141,90 @@ nvc0_m2mf_push_linear(struct nvc0_context *nvc0,     }  } +void +nvc0_m2mf_copy_linear(struct nvc0_context *nvc0, +                      struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom, +                      struct nouveau_bo *src, unsigned srcoff, unsigned srcdom, +                      unsigned size) +{ +   struct nouveau_channel *chan = nvc0->screen->base.channel; + +   while (size) { +      unsigned bytes = MIN2(size, 1 << 17); + +      MARK_RING (chan, 11, 4); + +      BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2); +      OUT_RELOCh(chan, dst, dstoff, dstdom | NOUVEAU_BO_WR); +      OUT_RELOCl(chan, dst, dstoff, dstdom | NOUVEAU_BO_WR); +      BEGIN_RING(chan, RING_MF(OFFSET_IN_HIGH), 2); +      OUT_RELOCh(chan, src, srcoff, srcdom | NOUVEAU_BO_RD); +      OUT_RELOCl(chan, src, srcoff, srcdom | NOUVEAU_BO_RD); +      BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2); +      OUT_RING  (chan, bytes); +      OUT_RING  (chan, 1); +      BEGIN_RING(chan, RING_MF(EXEC), 1); +      OUT_RING  (chan, (1 << NVC0_M2MF_EXEC_INC__SHIFT) | +                 NVC0_M2MF_EXEC_LINEAR_IN | NVC0_M2MF_EXEC_LINEAR_OUT); + +      srcoff += bytes; +      dstoff += bytes; +      size -= bytes; +   } +} +  static void -nvc0_sifc_push_rect(struct pipe_screen *pscreen, -                    const struct nvc0_m2mf_rect *dst, unsigned dst_format, -                    unsigned src_format, unsigned src_pitch, void *src, +nvc0_m2mf_push_rect(struct pipe_screen *pscreen, +                    const struct nvc0_m2mf_rect *dst, +                    const void *data,                      unsigned nblocksx, unsigned nblocksy)  {     struct nouveau_channel *chan; +   const uint8_t *src = (const uint8_t *)data; +   const int cpp = dst->cpp; +   const int line_len = nblocksx * cpp; +   int dy = dst->y; -   if (dst->bo->tile_flags) { -      BEGIN_RING(chan, RING_2D(DST_FORMAT), 5); -      OUT_RING  (chan, dst_format); -      OUT_RING  (chan, 0); -      OUT_RING  (chan, dst->tile_mode); -      OUT_RING  (chan, 1); -      OUT_RING  (chan, 0); -   } else { -      BEGIN_RING(chan, RING_2D(DST_FORMAT), 2); -      OUT_RING  (chan, NV50_SURFACE_FORMAT_A8R8G8B8_UNORM); -      OUT_RING  (chan, 1); -      BEGIN_RING(chan, RING_2D(DST_PITCH), 1); -      OUT_RING  (chan, dst->pitch); -   } +   assert(dst->bo->tile_flags); -   BEGIN_RING(chan, RING_2D(DST_WIDTH), 4); -   OUT_RING  (chan, dst->width); +   BEGIN_RING(chan, RING_MF(TILING_MODE_OUT), 5); +   OUT_RING  (chan, dst->tile_mode); +   OUT_RING  (chan, dst->width * cpp);     OUT_RING  (chan, dst->height); -   OUT_RELOCh(chan, dst->bo, dst->base, dst->domain | NOUVEAU_BO_WR); -   OUT_RELOCl(chan, dst->bo, dst->base, dst->domain | NOUVEAU_BO_WR); - -   BEGIN_RING(chan, RING_2D(SIFC_BITMAP_ENABLE), 2); -   OUT_RING  (chan, 0); -   OUT_RING  (chan, src_format); -   BEGIN_RING(chan, RING_2D(SIFC_WIDTH), 10); -   OUT_RING  (chan, nblocksx); -   OUT_RING  (chan, nblocksy); -   OUT_RING  (chan, 0); -   OUT_RING  (chan, 1); -   OUT_RING  (chan, 0); -   OUT_RING  (chan, 1); -   OUT_RING  (chan, 0); -   OUT_RING  (chan, dst->x); -   OUT_RING  (chan, 0); -   OUT_RING  (chan, dst->y); +   OUT_RING  (chan, dst->depth); +   OUT_RING  (chan, dst->z);     while (nblocksy) { +      int line_count, words; +      int size = MIN2(AVAIL_RING(chan), NV04_PFIFO_MAX_PACKET_LEN); + +      if (size < (12 + words)) { +         FIRE_RING(chan); +         continue; +      } +      line_count = (size * 4) / line_len; +      words = (line_count * line_len + 3) / 4; -      src = (uint8_t *)src + src_pitch; +      BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2); +      OUT_RELOCh(chan, dst->bo, dst->base, dst->domain | NOUVEAU_BO_WR); +      OUT_RELOCl(chan, dst->bo, dst->base, dst->domain | NOUVEAU_BO_WR); + +      BEGIN_RING(chan, RING_MF(TILING_POSITION_OUT_X), 2); +      OUT_RING  (chan, dst->x * cpp); +      OUT_RING  (chan, dy); +      BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2); +      OUT_RING  (chan, line_len); +      OUT_RING  (chan, line_count); +      BEGIN_RING(chan, RING_MF(EXEC), 1); +      OUT_RING  (chan, (1 << NVC0_M2MF_EXEC_INC__SHIFT) | +                 NVC0_M2MF_EXEC_PUSH | NVC0_M2MF_EXEC_LINEAR_IN); + +      BEGIN_RING(chan, RING_MF(DATA), words); +      OUT_RINGp (chan, src, words); + +      dy += line_count; +      src += line_len * line_count; +      nblocksy -= line_count;     }  } @@ -242,6 +282,11 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx,     tx->rect[0].pitch = lvl->pitch;     tx->rect[0].domain = NOUVEAU_BO_VRAM; +   if (!(usage & PIPE_TRANSFER_READ) && +       (res->depth0 == 1) && (tx->nblocksy * tx->base.stride < 512 * 4)) { +      /* don't allocate scratch buffer, upload through FIFO */ +   } +     ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0,                          tx->nblocksy * tx->base.stride, &tx->rect[1].bo);     if (ret) { diff --git a/src/gallium/drivers/nvc0/nvc0_vbo.c b/src/gallium/drivers/nvc0/nvc0_vbo.c index b8529e632d..84951ed945 100644 --- a/src/gallium/drivers/nvc0/nvc0_vbo.c +++ b/src/gallium/drivers/nvc0/nvc0_vbo.c @@ -131,8 +131,16 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0)        ve = &vertex->element[i];        vb = &nvc0->vtxbuf[ve->pipe.vertex_buffer_index]; -      if (!nvc0_resource_mapped_by_gpu(vb->buffer)) -         nvc0->vbo_fifo |= 1 << i; +      if (!nvc0_resource_mapped_by_gpu(vb->buffer)) { +         if (nvc0->vbo_push_hint) { +            nvc0->vbo_fifo |= 1 << i; +         } else { +            nvc0_migrate_vertices(nvc0_resource(vb->buffer), +                                  vb->buffer_offset, +                                  vb->buffer->width0 - vb->buffer_offset); +            nvc0->vbo_dirty = TRUE; +         } +      }        if (1 || likely(vb->stride)) {           OUT_RING(chan, ve->state); @@ -142,7 +150,7 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0)     }     for (i = 0; i < vertex->num_elements; ++i) { -      struct nouveau_bo *bo; +      struct nvc0_resource *res;        unsigned size, offset;        ve = &vertex->element[i]; @@ -158,7 +166,7 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0)           continue;        } -      bo = nvc0_resource(vb->buffer)->bo; +      res = nvc0_resource(vb->buffer);        size = vb->buffer->width0;        offset = ve->pipe.src_offset + vb->buffer_offset; @@ -173,17 +181,16 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0)           INLIN_RING(chan, RING_3D(VERTEX_ARRAY_PER_INSTANCE(i)), 0);        } -      nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_VERTEX, -                               nvc0_resource(vb->buffer), NOUVEAU_BO_RD); +      nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_VERTEX, res, NOUVEAU_BO_RD);        BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FETCH(i)), 1);        OUT_RING  (chan, (1 << 12) | vb->stride);        BEGIN_RING_1I(chan, RING_3D(VERTEX_ARRAY_SELECT), 5);        OUT_RING  (chan, i); -      OUT_RELOCh(chan, bo, size, NOUVEAU_BO_GART | NOUVEAU_BO_RD); -      OUT_RELOCl(chan, bo, size, NOUVEAU_BO_GART | NOUVEAU_BO_RD); -      OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD); -      OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD); +      OUT_RESRCh(chan, res, size, NOUVEAU_BO_RD); +      OUT_RESRCl(chan, res, size, NOUVEAU_BO_RD); +      OUT_RESRCh(chan, res, offset, NOUVEAU_BO_RD); +      OUT_RESRCl(chan, res, offset, NOUVEAU_BO_RD);     }     for (; i < nvc0->state.num_vtxelts; ++i) {        BEGIN_RING(chan, RING_3D(VERTEX_ATTRIB_FORMAT(i)), 1); @@ -231,8 +238,6 @@ nvc0_draw_vbo_flush_notify(struct nouveau_channel *chan)     struct nvc0_context *nvc0 = chan->user_private;     nvc0_bufctx_emit_relocs(nvc0); - -   debug_printf("%s(%p)\n", __FUNCTION__, nvc0);  }  #if 0 @@ -325,7 +330,7 @@ nvc0_draw_elements_inline_u08(struct nouveau_channel *chan, uint8_t *map,        count &= ~3;     }     while (count) { -      unsigned i, nr = MIN2(count, (NV04_PFIFO_MAX_PACKET_LEN & ~3) * 4) / 4; +      unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 4) / 4;        BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U8), nr);        for (i = 0; i < nr; ++i) { @@ -333,7 +338,7 @@ nvc0_draw_elements_inline_u08(struct nouveau_channel *chan, uint8_t *map,                    (map[3] << 24) | (map[2] << 16) | (map[1] << 8) | map[0]);           map += 4;        } -      count -= nr; +      count -= nr * 4;     }  } @@ -349,14 +354,14 @@ nvc0_draw_elements_inline_u16(struct nouveau_channel *chan, uint16_t *map,        OUT_RING  (chan, *map++);     }     while (count) { -      unsigned i, nr = MIN2(count, (NV04_PFIFO_MAX_PACKET_LEN & ~1) * 2) / 2; +      unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 2) / 2;        BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U16), nr);        for (i = 0; i < nr; ++i) {           OUT_RING(chan, (map[1] << 16) | map[0]);           map += 2;        } -      count -= nr; +      count -= nr * 2;     }  } @@ -367,18 +372,41 @@ nvc0_draw_elements_inline_u32(struct nouveau_channel *chan, uint32_t *map,     map += start;     while (count) { -      unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN); +      const unsigned nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN);        BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U32), nr); -      for (i = 0; i < nr; ++i) -         OUT_RING(chan, *map++); +      OUT_RINGp    (chan, map, nr); +      map += nr;        count -= nr;     }  }  static void -nvc0_draw_elements(struct nvc0_context *nvc0, +nvc0_draw_elements_inline_u32_short(struct nouveau_channel *chan, uint32_t *map, +                                    unsigned start, unsigned count) +{ +   map += start; + +   if (count & 1) { +      count--; +      BEGIN_RING(chan, RING_3D(VB_ELEMENT_U32), 1); +      OUT_RING  (chan, *map++); +   } +   while (count) { +      unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 2) / 2; + +      BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U16), nr); +      for (i = 0; i < nr; ++i) { +         OUT_RING(chan, (map[1] << 16) | map[0]); +         map += 2; +      } +      count -= nr * 2; +   } +} + +static void +nvc0_draw_elements(struct nvc0_context *nvc0, boolean shorten,                     unsigned mode, unsigned start, unsigned count,                     unsigned instance_count, int32_t index_bias)  { @@ -400,7 +428,7 @@ nvc0_draw_elements(struct nvc0_context *nvc0,     }     if (nvc0_resource_mapped_by_gpu(nvc0->idxbuf.buffer)) { -      struct nouveau_bo *bo = nvc0_resource(nvc0->idxbuf.buffer)->bo; +      struct nvc0_resource *res = nvc0_resource(nvc0->idxbuf.buffer);        unsigned offset = nvc0->idxbuf.offset;        unsigned limit = nvc0->idxbuf.buffer->width0 - 1; @@ -415,10 +443,10 @@ nvc0_draw_elements(struct nvc0_context *nvc0,           BEGIN_RING(chan, RING_3D(VERTEX_BEGIN_GL), 1);           OUT_RING  (chan, mode);           BEGIN_RING(chan, RING_3D(INDEX_ARRAY_START_HIGH), 7); -         OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART); -         OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART); -         OUT_RELOCh(chan, bo, limit, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART); -         OUT_RELOCl(chan, bo, limit, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART); +         OUT_RESRCh(chan, res, offset, NOUVEAU_BO_RD); +         OUT_RESRCl(chan, res, offset, NOUVEAU_BO_RD); +         OUT_RESRCh(chan, res, limit, NOUVEAU_BO_RD); +         OUT_RESRCl(chan, res, limit, NOUVEAU_BO_RD);           OUT_RING  (chan, index_size);           OUT_RING  (chan, start);           OUT_RING  (chan, count); @@ -443,7 +471,10 @@ nvc0_draw_elements(struct nvc0_context *nvc0,              nvc0_draw_elements_inline_u16(chan, data, start, count);              break;           case 4: -            nvc0_draw_elements_inline_u32(chan, data, start, count); +            if (shorten) +               nvc0_draw_elements_inline_u32_short(chan, data, start, count); +            else +               nvc0_draw_elements_inline_u32(chan, data, start, count);              break;           default:              assert(0); @@ -464,6 +495,13 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)     struct nvc0_context *nvc0 = nvc0_context(pipe);     struct nouveau_channel *chan = nvc0->screen->base.channel; +   /* For picking only a few vertices from a large user buffer, push is better, +    * if index count is larger and we expect repeated vertices, suggest upload. +    */ +   nvc0->vbo_push_hint = /* the 64 is heuristic */ +      !(info->indexed && +        ((info->max_index - info->min_index + 64) < info->count)); +     nvc0_state_validate(nvc0);     if (nvc0->state.instance_base != info->start_instance) { @@ -488,6 +526,8 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)                         info->mode, info->start, info->count,                         info->instance_count);     } else { +      boolean shorten = info->max_index <= 65535; +        assert(nvc0->idxbuf.buffer);        if (info->primitive_restart != nvc0->state.prim_restart) { @@ -495,6 +535,9 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)              BEGIN_RING(chan, RING_3D(PRIM_RESTART_ENABLE), 2);              OUT_RING  (chan, 1);              OUT_RING  (chan, info->restart_index); + +            if (info->restart_index > 65535) +               shorten = FALSE;           } else {              INLIN_RING(chan, RING_3D(PRIM_RESTART_ENABLE), 0);           } @@ -505,7 +548,7 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)           OUT_RING  (chan, info->restart_index);        } -      nvc0_draw_elements(nvc0, +      nvc0_draw_elements(nvc0, shorten,                           info->mode, info->start, info->count,                           info->instance_count, info->index_bias);     } diff --git a/src/gallium/drivers/nvc0/nvc0_winsys.h b/src/gallium/drivers/nvc0/nvc0_winsys.h index 48ea87613b..e97ca8e90d 100644 --- a/src/gallium/drivers/nvc0/nvc0_winsys.h +++ b/src/gallium/drivers/nvc0/nvc0_winsys.h @@ -12,6 +12,8 @@  #include "nouveau/nouveau_resource.h"  #include "nouveau/nouveau_reloc.h" +#include "nvc0_resource.h" /* OUT_RESRC */ +  #ifndef NV04_PFIFO_MAX_PACKET_LEN  #define NV04_PFIFO_MAX_PACKET_LEN 2047  #endif @@ -143,6 +145,20 @@ OUT_RELOCh(struct nouveau_channel *chan, struct nouveau_bo *bo,     return OUT_RELOC(chan, bo, delta, flags | NOUVEAU_BO_HIGH, 0, 0);  } +static INLINE int +OUT_RESRCh(struct nouveau_channel *chan, struct nvc0_resource *res, +           unsigned delta, unsigned flags) +{ +   return OUT_RELOCh(chan, res->bo, res->offset + delta, res->domain | flags); +} + +static INLINE int +OUT_RESRCl(struct nouveau_channel *chan, struct nvc0_resource *res, +           unsigned delta, unsigned flags) +{ +   return OUT_RELOCl(chan, res->bo, res->offset + delta, res->domain | flags); +} +  static INLINE void  FIRE_RING(struct nouveau_channel *chan)  { | 
