diff options
Diffstat (limited to 'src/gallium/winsys/r600/drm/radeon_ctx.c')
-rw-r--r-- | src/gallium/winsys/r600/drm/radeon_ctx.c | 313 |
1 files changed, 138 insertions, 175 deletions
diff --git a/src/gallium/winsys/r600/drm/radeon_ctx.c b/src/gallium/winsys/r600/drm/radeon_ctx.c index bd050c4cf9..b8ba9b552d 100644 --- a/src/gallium/winsys/r600/drm/radeon_ctx.c +++ b/src/gallium/winsys/r600/drm/radeon_ctx.c @@ -26,54 +26,40 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <unistd.h> #include "radeon_priv.h" #include "radeon_drm.h" #include "bof.h" -int radeon_ctx_set_bo_new(struct radeon_ctx *ctx, struct radeon_bo *bo) +static int radeon_ctx_set_bo_new(struct radeon_ctx *ctx, struct radeon_bo *bo, unsigned state_id) { - void *ptr; - - ptr = realloc(ctx->bo, sizeof(struct radeon_bo) * (ctx->nbo + 1)); - if (ptr == NULL) { - return -ENOMEM; - } - ctx->bo = ptr; - ctx->bo[ctx->nbo] = bo; + ctx->bo[ctx->nbo].bo = bo; + ctx->bo[ctx->nbo].bo_flushed = 0; + ctx->bo[ctx->nbo].state_id = state_id; ctx->nbo++; return 0; } -struct radeon_bo *radeon_ctx_get_bo(struct radeon_ctx *ctx, unsigned reloc) +void radeon_ctx_clear(struct radeon_ctx *ctx) { - struct radeon_cs_reloc *greloc; unsigned i; - greloc = (void *)(((u8 *)ctx->reloc) + reloc * 4); - for (i = 0; i < ctx->nbo; i++) { - if (ctx->bo[i]->handle == greloc->handle) { - return radeon_bo_incref(ctx->radeon, ctx->bo[i]); - } + /* FIXME somethings is wrong, it should be safe to + * delete bo here, kernel should postpone bo deletion + * until bo is no longer referenced by cs (through the + * fence association) + */ + for (i = 0; i < 50; i++) { + usleep(10); } - fprintf(stderr, "%s no bo for reloc[%d 0x%08X] %d\n", __func__, reloc, greloc->handle, ctx->nbo); - return NULL; -} - -void radeon_ctx_get_placement(struct radeon_ctx *ctx, unsigned reloc, u32 *placement) -{ - struct radeon_cs_reloc *greloc; - unsigned i; - - placement[0] = 0; - placement[1] = 0; - greloc = (void *)(((u8 *)ctx->reloc) + reloc * 4); for (i = 0; i < ctx->nbo; i++) { - if (ctx->bo[i]->handle == greloc->handle) { - placement[0] = greloc->read_domain | greloc->write_domain; - placement[1] = placement[0]; - return; - } + ctx->bo[i].bo = radeon_bo_decref(ctx->radeon, ctx->bo[i].bo); } + ctx->id = 0; + ctx->npm4 = RADEON_CTX_MAX_PM4; + ctx->nreloc = 0; + ctx->nbo = 0; + memset(ctx->state_crc32, 0, ctx->radeon->nstate * 4); } struct radeon_ctx *radeon_ctx(struct radeon *radeon) @@ -86,6 +72,25 @@ struct radeon_ctx *radeon_ctx(struct radeon *radeon) if (ctx == NULL) return NULL; ctx->radeon = radeon_incref(radeon); + ctx->max_bo = 4096; + ctx->max_reloc = 4096; + ctx->pm4 = malloc(RADEON_CTX_MAX_PM4 * 4); + if (ctx->pm4 == NULL) { + return radeon_ctx_decref(ctx); + } + ctx->state_crc32 = malloc(ctx->radeon->nstate * 4); + if (ctx->state_crc32 == NULL) { + return radeon_ctx_decref(ctx); + } + ctx->bo = malloc(ctx->max_bo * sizeof(struct radeon_ctx_bo)); + if (ctx->bo == NULL) { + return radeon_ctx_decref(ctx); + } + ctx->reloc = malloc(ctx->max_reloc * sizeof(struct radeon_cs_reloc)); + if (ctx->reloc == NULL) { + return radeon_ctx_decref(ctx); + } + radeon_ctx_clear(ctx); return ctx; } @@ -97,31 +102,33 @@ struct radeon_ctx *radeon_ctx_incref(struct radeon_ctx *ctx) struct radeon_ctx *radeon_ctx_decref(struct radeon_ctx *ctx) { - unsigned i; - if (ctx == NULL) return NULL; if (--ctx->refcount > 0) { return NULL; } - for (i = 0; i < ctx->ndraw; i++) { - ctx->draw[i] = radeon_draw_decref(ctx->draw[i]); - } - for (i = 0; i < ctx->nbo; i++) { - ctx->bo[i] = radeon_bo_decref(ctx->radeon, ctx->bo[i]); - } ctx->radeon = radeon_decref(ctx->radeon); - free(ctx->state); - free(ctx->draw); free(ctx->bo); free(ctx->pm4); free(ctx->reloc); + free(ctx->state_crc32); memset(ctx, 0, sizeof(*ctx)); free(ctx); return NULL; } +static int radeon_ctx_bo_id(struct radeon_ctx *ctx, struct radeon_bo *bo) +{ + unsigned i; + + for (i = 0; i < ctx->nbo; i++) { + if (bo == ctx->bo[i].bo) + return i; + } + return -1; +} + static int radeon_ctx_state_bo(struct radeon_ctx *ctx, struct radeon_state *state) { unsigned i, j; @@ -131,12 +138,15 @@ static int radeon_ctx_state_bo(struct radeon_ctx *ctx, struct radeon_state *stat return 0; for (i = 0; i < state->nbo; i++) { for (j = 0; j < ctx->nbo; j++) { - if (state->bo[i] == ctx->bo[j]) + if (state->bo[i] == ctx->bo[j].bo) break; } if (j == ctx->nbo) { + if (ctx->nbo >= ctx->max_bo) { + return -EBUSY; + } radeon_bo_incref(ctx->radeon, state->bo[i]); - r = radeon_ctx_set_bo_new(ctx, state->bo[i]); + r = radeon_ctx_set_bo_new(ctx, state->bo[i], state->id); if (r) return r; } @@ -144,7 +154,6 @@ static int radeon_ctx_state_bo(struct radeon_ctx *ctx, struct radeon_state *stat return 0; } - int radeon_ctx_submit(struct radeon_ctx *ctx) { struct drm_radeon_cs drmib; @@ -152,17 +161,17 @@ int radeon_ctx_submit(struct radeon_ctx *ctx) uint64_t chunk_array[2]; int r = 0; - if (!ctx->cpm4) + if (!ctx->id) return 0; #if 0 - for (r = 0; r < ctx->cpm4; r++) { + for (r = 0; r < ctx->id; r++) { fprintf(stderr, "0x%08X\n", ctx->pm4[r]); } #endif drmib.num_chunks = 2; drmib.chunks = (uint64_t)(uintptr_t)chunk_array; chunks[0].chunk_id = RADEON_CHUNK_ID_IB; - chunks[0].length_dw = ctx->cpm4; + chunks[0].length_dw = ctx->id; chunks[0].chunk_data = (uint64_t)(uintptr_t)ctx->pm4; chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS; chunks[1].length_dw = ctx->nreloc * sizeof(struct radeon_cs_reloc) / 4; @@ -176,11 +185,10 @@ int radeon_ctx_submit(struct radeon_ctx *ctx) return r; } -static int radeon_ctx_reloc(struct radeon_ctx *ctx, struct radeon_bo *bo, +int radeon_ctx_reloc(struct radeon_ctx *ctx, struct radeon_bo *bo, unsigned id, unsigned *placement) { unsigned i; - struct radeon_cs_reloc *ptr; for (i = 0; i < ctx->nreloc; i++) { if (ctx->reloc[i].handle == bo->handle) { @@ -188,14 +196,13 @@ static int radeon_ctx_reloc(struct radeon_ctx *ctx, struct radeon_bo *bo, return 0; } } - ptr = realloc(ctx->reloc, sizeof(struct radeon_cs_reloc) * (ctx->nreloc + 1)); - if (ptr == NULL) - return -ENOMEM; - ctx->reloc = ptr; - ptr[ctx->nreloc].handle = bo->handle; - ptr[ctx->nreloc].read_domain = placement[0] | placement [1]; - ptr[ctx->nreloc].write_domain = placement[0] | placement [1]; - ptr[ctx->nreloc].flags = 0; + if (ctx->nreloc >= ctx->max_reloc) { + return -EBUSY; + } + ctx->reloc[ctx->nreloc].handle = bo->handle; + ctx->reloc[ctx->nreloc].read_domain = placement[0] | placement [1]; + ctx->reloc[ctx->nreloc].write_domain = placement[0] | placement [1]; + ctx->reloc[ctx->nreloc].flags = 0; ctx->pm4[id] = ctx->nreloc * sizeof(struct radeon_cs_reloc) / 4; ctx->nreloc++; return 0; @@ -203,75 +210,90 @@ static int radeon_ctx_reloc(struct radeon_ctx *ctx, struct radeon_bo *bo, static int radeon_ctx_state_schedule(struct radeon_ctx *ctx, struct radeon_state *state) { - unsigned i, rid, bid, cid; - int r; + unsigned i, rid, cid; + u32 flags; + int r, bo_id[4]; if (state == NULL) return 0; - memcpy(&ctx->pm4[ctx->id], state->pm4, state->cpm4 * 4); - for (i = 0; i < state->nreloc; i++) { + for (i = 0; i < state->nbo; i++) { + bo_id[i] = radeon_ctx_bo_id(ctx, state->bo[i]); + if (bo_id[i] < 0) { + return -EINVAL; + } + flags = (~ctx->bo[bo_id[i]].bo_flushed) & ctx->radeon->type[state->id].flush_flags; + if (flags) { + r = ctx->radeon->bo_flush(ctx, state->bo[i], flags, &state->placement[i * 2]); + if (r) { + return r; + } + } + ctx->bo[bo_id[i]].bo_flushed |= ctx->radeon->type[state->id].flush_flags; + } + if ((ctx->radeon->type[state->id].header_cpm4 + state->cpm4) > ctx->npm4) { + /* need to flush */ + return -EBUSY; + } + memcpy(&ctx->pm4[ctx->id], ctx->radeon->type[state->id].header_pm4, ctx->radeon->type[state->id].header_cpm4 * 4); + ctx->id += ctx->radeon->type[state->id].header_cpm4; + ctx->npm4 -= ctx->radeon->type[state->id].header_cpm4; + memcpy(&ctx->pm4[ctx->id], state->states, state->cpm4 * 4); + for (i = 0; i < state->nbo; i++) { rid = state->reloc_pm4_id[i]; - bid = state->reloc_bo_id[i]; cid = ctx->id + rid; - r = radeon_ctx_reloc(ctx, state->bo[bid], cid, - &state->placement[bid * 2]); + r = radeon_ctx_reloc(ctx, state->bo[i], cid, + &state->placement[i * 2]); if (r) { - fprintf(stderr, "%s state %d failed to reloc\n", __func__, state->type); + fprintf(stderr, "%s state %d failed to reloc\n", __func__, state->id); return r; } } ctx->id += state->cpm4; + ctx->npm4 -= state->cpm4; + for (i = 0; i < state->nbo; i++) { + ctx->bo[bo_id[i]].bo_flushed &= ~ctx->radeon->type[state->id].dirty_flags; + } return 0; } int radeon_ctx_set_query_state(struct radeon_ctx *ctx, struct radeon_state *state) { - void *tmp; + unsigned ndw = 0; int r = 0; - /* !!! ONLY ACCEPT QUERY STATE HERE !!! */ - if (state->type != R600_QUERY_BEGIN_TYPE && state->type != R600_QUERY_END_TYPE) { - return -EINVAL; - } r = radeon_state_pm4(state); if (r) return r; - if ((ctx->draw_cpm4 + state->cpm4) > RADEON_CTX_MAX_PM4) { - /* need to flush */ - return -EBUSY; - } - if (state->cpm4 >= RADEON_CTX_MAX_PM4) { - fprintf(stderr, "%s single state too big %d, max %d\n", - __func__, state->cpm4, RADEON_CTX_MAX_PM4); + + /* !!! ONLY ACCEPT QUERY STATE HERE !!! */ + ndw = state->cpm4 + ctx->radeon->type[state->id].header_cpm4; + switch (state->id) { + case R600_QUERY_BEGIN: + /* account QUERY_END at same time of QUERY_BEGIN so we know we + * have room left for QUERY_END + */ + if ((ndw * 2) > ctx->npm4) { + /* need to flush */ + return -EBUSY; + } + ctx->npm4 -= ndw; + break; + case R600_QUERY_END: + /* add again ndw from previous accounting */ + ctx->npm4 += ndw; + break; + default: return -EINVAL; } - tmp = realloc(ctx->state, (ctx->nstate + 1) * sizeof(void*)); - if (tmp == NULL) - return -ENOMEM; - ctx->state = tmp; - ctx->state[ctx->nstate++] = radeon_state_incref(state); - /* BEGIN/END query are balanced in the same cs so account for END - * END query when scheduling BEGIN query - */ - if (state->type == R600_QUERY_BEGIN_TYPE) { - ctx->draw_cpm4 += state->cpm4 * 2; - } - return 0; + + return radeon_ctx_state_schedule(ctx, state); } -int radeon_ctx_set_draw_new(struct radeon_ctx *ctx, struct radeon_draw *draw) +int radeon_ctx_set_draw(struct radeon_ctx *ctx, struct radeon_draw *draw) { - struct radeon_draw *pdraw = NULL; - struct radeon_draw **ndraw; - struct radeon_state *nstate, *ostate; - unsigned cpm4, i, cstate; - void *tmp; + unsigned i, previous_id; int r = 0; - ndraw = realloc(ctx->draw, sizeof(void*) * (ctx->ndraw + 1)); - if (ndraw == NULL) - return -ENOMEM; - ctx->draw = ndraw; for (i = 0; i < draw->nstate; i++) { r = radeon_ctx_state_bo(ctx, draw->state[i]); if (r) @@ -285,76 +307,17 @@ int radeon_ctx_set_draw_new(struct radeon_ctx *ctx, struct radeon_draw *draw) __func__, draw->cpm4, RADEON_CTX_MAX_PM4); return -EINVAL; } - tmp = realloc(ctx->state, (ctx->nstate + draw->nstate) * sizeof(void*)); - if (tmp == NULL) - return -ENOMEM; - ctx->state = tmp; - pdraw = ctx->cdraw; - for (i = 0, cpm4 = 0, cstate = ctx->nstate; i < draw->nstate - 1; i++) { - nstate = draw->state[i]; - if (nstate) { - if (pdraw && pdraw->state[i]) { - ostate = pdraw->state[i]; - if (ostate->pm4_crc != nstate->pm4_crc) { - ctx->state[cstate++] = nstate; - cpm4 += nstate->cpm4; - } - } else { - ctx->state[cstate++] = nstate; - cpm4 += nstate->cpm4; + previous_id = ctx->id; + for (i = 0; i < draw->nstate; i++) { + /* FIXME always force draw state to schedule */ + if (draw->state[i] && draw->state[i]->pm4_crc != ctx->state_crc32[draw->state[i]->id]) { + r = radeon_ctx_state_schedule(ctx, draw->state[i]); + if (r) { + ctx->id = previous_id; + return r; } } } - /* The last state is the draw state always add it */ - if (draw->state[i] == NULL) { - fprintf(stderr, "%s no draw command\n", __func__); - return -EINVAL; - } - ctx->state[cstate++] = draw->state[i]; - cpm4 += draw->state[i]->cpm4; - if ((ctx->draw_cpm4 + cpm4) > RADEON_CTX_MAX_PM4) { - /* need to flush */ - return -EBUSY; - } - ctx->draw_cpm4 += cpm4; - ctx->nstate = cstate; - ctx->draw[ctx->ndraw++] = draw; - ctx->cdraw = draw; - return 0; -} - -int radeon_ctx_set_draw(struct radeon_ctx *ctx, struct radeon_draw *draw) -{ - int r; - - radeon_draw_incref(draw); - r = radeon_ctx_set_draw_new(ctx, draw); - if (r) - radeon_draw_decref(draw); - return r; -} - -int radeon_ctx_pm4(struct radeon_ctx *ctx) -{ - unsigned i; - int r; - - free(ctx->pm4); - ctx->cpm4 = 0; - ctx->pm4 = malloc(ctx->draw_cpm4 * 4); - if (ctx->pm4 == NULL) - return -EINVAL; - for (i = 0, ctx->id = 0; i < ctx->nstate; i++) { - r = radeon_ctx_state_schedule(ctx, ctx->state[i]); - if (r) - return r; - } - if (ctx->id != ctx->draw_cpm4) { - fprintf(stderr, "%s miss predicted pm4 size %d for %d\n", - __func__, ctx->draw_cpm4, ctx->id); - return -EINVAL; - } - ctx->cpm4 = ctx->draw_cpm4; return 0; } @@ -384,8 +347,8 @@ printf("%d relocs\n", ctx->nreloc); bof_decref(blob); blob = NULL; /* dump cs */ -printf("%d pm4\n", ctx->cpm4); - blob = bof_blob(ctx->cpm4 * 4, ctx->pm4); +printf("%d pm4\n", ctx->id); + blob = bof_blob(ctx->id * 4, ctx->pm4); if (blob == NULL) goto out_err; if (bof_object_set(root, "pm4", blob)) @@ -400,23 +363,23 @@ printf("%d pm4\n", ctx->cpm4); bo = bof_object(); if (bo == NULL) goto out_err; - size = bof_int32(ctx->bo[i]->size); + size = bof_int32(ctx->bo[i].bo->size); if (size == NULL) goto out_err; if (bof_object_set(bo, "size", size)) goto out_err; bof_decref(size); size = NULL; - handle = bof_int32(ctx->bo[i]->handle); + handle = bof_int32(ctx->bo[i].bo->handle); if (handle == NULL) goto out_err; if (bof_object_set(bo, "handle", handle)) goto out_err; bof_decref(handle); handle = NULL; - radeon_bo_map(ctx->radeon, ctx->bo[i]); - blob = bof_blob(ctx->bo[i]->size, ctx->bo[i]->data); - radeon_bo_unmap(ctx->radeon, ctx->bo[i]); + radeon_bo_map(ctx->radeon, ctx->bo[i].bo); + blob = bof_blob(ctx->bo[i].bo->size, ctx->bo[i].bo->data); + radeon_bo_unmap(ctx->radeon, ctx->bo[i].bo); if (blob == NULL) goto out_err; if (bof_object_set(bo, "data", blob)) |