From 1f0348c4a2ba4f8ca812a2daccf3b01c3f984ef1 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 4 Dec 2010 04:38:15 +0100 Subject: r300g: rework command submission and resource space checking The motivation behind this rework is to get some speed by reducing CPU overhead. The performance increase depends on many factors, but it's measurable (I think it's about 10% increase in Torcs). This commit replaces libdrm's radeon_cs_gem with our own implemention. It's optimized specifically for r300g, but r600g could use it as well. Reloc writes and space checking are faster and simpler than their counterparts in libdrm (the time complexity of all the functions is O(1) in nearly all scenarios, thanks to hashing). (libdrm's radeon_bo_gem is still being used in the driver.) It works like this: cs_add_reloc(cs, buf, read_domain, write_domain) adds a new relocation and also adds the size of 'buf' to the used_gart and used_vram winsys variables based on the domains, which are simply or'd for the accounting purposes. The adding is skipped if the reloc is already present in the list, but it accounts any newly-referenced domains. cs_validate is then called, which just checks: used_vram/gart < vram/gart_size * 0.8 The 0.8 number allows for some memory fragmentation. If the validation fails, the pipe driver flushes CS and tries do the validation again, i.e. it validates only that one operation. If it fails again, it drops the operation on the floor and prints some nasty message to stderr. cs_write_reloc(cs, buf) just writes a reloc that has been added using cs_add_reloc. The read_domain and write_domain parameters have been removed, because we already specify them in cs_add_reloc. The space checking has been tested by putting small values in vram/gart_size variables. --- src/gallium/drivers/r300/r300_cs.h | 16 ++-- src/gallium/drivers/r300/r300_emit.c | 129 +++++++++++++++++---------------- src/gallium/drivers/r300/r300_flush.c | 4 + src/gallium/drivers/r300/r300_render.c | 3 +- src/gallium/drivers/r300/r300_winsys.h | 41 ++++------- 5 files changed, 96 insertions(+), 97 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index 67fb0096a8..6726f100e1 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -106,26 +106,26 @@ * Writing relocations. */ -#define OUT_CS_RELOC(bo, offset, rd, wd) do { \ +#define OUT_CS_RELOC(bo, offset) do { \ assert(bo); \ OUT_CS(offset); \ - cs_winsys->cs_write_reloc(cs_copy, bo, rd, wd); \ + cs_winsys->cs_write_reloc(cs_copy, bo); \ CS_DEBUG(cs_count -= 2;) \ } while (0) -#define OUT_CS_BUF_RELOC(bo, offset, rd, wd) do { \ +#define OUT_CS_BUF_RELOC(bo, offset) do { \ assert(bo); \ - OUT_CS_RELOC(r300_buffer(bo)->cs_buf, offset, rd, wd); \ + OUT_CS_RELOC(r300_buffer(bo)->cs_buf, offset); \ } while (0) -#define OUT_CS_TEX_RELOC(tex, offset, rd, wd) do { \ +#define OUT_CS_TEX_RELOC(tex, offset) do { \ assert(tex); \ - OUT_CS_RELOC(tex->cs_buffer, offset, rd, wd); \ + OUT_CS_RELOC(tex->cs_buffer, offset); \ } while (0) -#define OUT_CS_BUF_RELOC_NO_OFFSET(bo, rd, wd) do { \ +#define OUT_CS_BUF_RELOC_NO_OFFSET(bo) do { \ assert(bo); \ - cs_winsys->cs_write_reloc(cs_copy, r300_buffer(bo)->cs_buf, rd, wd); \ + cs_winsys->cs_write_reloc(cs_copy, r300_buffer(bo)->cs_buf); \ CS_DEBUG(cs_count -= 2;) \ } while (0) diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 41a12708ce..d14cdcbbaf 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -353,10 +353,10 @@ void r300_emit_aa_state(struct r300_context *r300, unsigned size, void *state) if (aa->dest) { OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_OFFSET, 1); - OUT_CS_RELOC(aa->dest->cs_buffer, aa->dest->offset, 0, aa->dest->domain); + OUT_CS_RELOC(aa->dest->cs_buffer, aa->dest->offset); OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_PITCH, 1); - OUT_CS_RELOC(aa->dest->cs_buffer, aa->dest->pitch, 0, aa->dest->domain); + OUT_CS_RELOC(aa->dest->cs_buffer, aa->dest->pitch); } OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, aa->aaresolve_ctl); @@ -392,10 +392,10 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) surf = r300_surface(fb->cbufs[i]); OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0 + (4 * i), 1); - OUT_CS_RELOC(surf->cs_buffer, surf->offset, 0, surf->domain); + OUT_CS_RELOC(surf->cs_buffer, surf->offset); OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0 + (4 * i), 1); - OUT_CS_RELOC(surf->cs_buffer, surf->pitch, 0, surf->domain); + OUT_CS_RELOC(surf->cs_buffer, surf->pitch); } /* Set up the ZB part of the CBZB clear. */ @@ -405,10 +405,10 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_REG(R300_ZB_FORMAT, surf->cbzb_format); OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1); - OUT_CS_RELOC(surf->cs_buffer, surf->cbzb_midpoint_offset, 0, surf->domain); + OUT_CS_RELOC(surf->cs_buffer, surf->cbzb_midpoint_offset); OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); - OUT_CS_RELOC(surf->cs_buffer, surf->cbzb_pitch, 0, surf->domain); + OUT_CS_RELOC(surf->cs_buffer, surf->cbzb_pitch); DBG(r300, DBG_CBZB, "CBZB clearing cbuf %08x %08x\n", surf->cbzb_format, @@ -421,10 +421,10 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_REG(R300_ZB_FORMAT, surf->format); OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1); - OUT_CS_RELOC(surf->cs_buffer, surf->offset, 0, surf->domain); + OUT_CS_RELOC(surf->cs_buffer, surf->offset); OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); - OUT_CS_RELOC(surf->cs_buffer, surf->pitch, 0, surf->domain); + OUT_CS_RELOC(surf->cs_buffer, surf->pitch); if (can_hyperz) { uint32_t surf_pitch; @@ -589,28 +589,24 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300, /* pipe 3 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 3); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 3) * 4, - 0, query->domain); + OUT_CS_RELOC(buf, (query->num_results + 3) * 4); case 3: /* pipe 2 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 2); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 2) * 4, - 0, query->domain); + OUT_CS_RELOC(buf, (query->num_results + 2) * 4); case 2: /* pipe 1 only */ /* As mentioned above, accomodate RV380 and older. */ OUT_CS_REG(R300_SU_REG_DEST, 1 << (caps->high_second_pipe ? 3 : 1)); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 1) * 4, - 0, query->domain); + OUT_CS_RELOC(buf, (query->num_results + 1) * 4); case 1: /* pipe 0 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 0); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 0) * 4, - 0, query->domain); + OUT_CS_RELOC(buf, (query->num_results + 0) * 4); break; default: fprintf(stderr, "r300: Implementation error: Chipset reports %d" @@ -632,7 +628,7 @@ static void rv530_emit_query_end_single_z(struct r300_context *r300, BEGIN_CS(8); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, query->num_results * 4, 0, query->domain); + OUT_CS_RELOC(buf, query->num_results * 4); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); END_CS; } @@ -646,10 +642,10 @@ static void rv530_emit_query_end_double_z(struct r300_context *r300, BEGIN_CS(14); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 0) * 4, 0, query->domain); + OUT_CS_RELOC(buf, (query->num_results + 0) * 4); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_1); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 1) * 4, 0, query->domain); + OUT_CS_RELOC(buf, (query->num_results + 1) * 4); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); END_CS; } @@ -810,8 +806,7 @@ void r300_emit_textures_state(struct r300_context *r300, OUT_CS_REG(R300_TX_FORMAT2_0 + (i * 4), texstate->format.format2); OUT_CS_REG_SEQ(R300_TX_OFFSET_0 + (i * 4), 1); - OUT_CS_TEX_RELOC(tex, texstate->format.tile_config, tex->domain, - 0); + OUT_CS_TEX_RELOC(tex, texstate->format.tile_config); } } END_CS; @@ -899,7 +894,7 @@ void r300_emit_vertex_arrays(struct r300_context* r300, int offset, boolean inde for (i = 0; i < vertex_array_count; i++) { buf = r300_buffer(valid_vbuf[velem[i].vertex_buffer_index]); - OUT_CS_BUF_RELOC_NO_OFFSET(&buf->b.b, buf->domain, 0); + OUT_CS_BUF_RELOC_NO_OFFSET(&buf->b.b); } END_CS; } @@ -924,7 +919,7 @@ void r300_emit_vertex_arrays_swtcl(struct r300_context *r300, boolean indexed) OUT_CS(r300->vertex_info.size | (r300->vertex_info.size << 8)); OUT_CS(r300->draw_vbo_offset); - OUT_CS_BUF_RELOC(r300->vbo, 0, r300_buffer(r300->vbo)->domain, 0); + OUT_CS_BUF_RELOC(r300->vbo, 0); END_CS; } @@ -1220,65 +1215,77 @@ boolean r300_emit_buffer_validate(struct r300_context *r300, boolean do_validate_vertex_buffers, struct pipe_resource *index_buffer) { - struct pipe_framebuffer_state* fb = + struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; struct r300_textures_state *texstate = (struct r300_textures_state*)r300->textures_state.state; - struct r300_texture* tex; - struct pipe_resource **vbuf = r300->valid_vertex_buffer; + struct r300_texture *tex; unsigned i; - - /* Clean out BOs. */ - r300->rws->cs_reset_buffers(r300->cs); - - /* Color buffers... */ - for (i = 0; i < fb->nr_cbufs; i++) { - tex = r300_texture(fb->cbufs[i]->texture); - assert(tex && tex->buffer && "cbuf is marked, but NULL!"); - r300->rws->cs_add_buffer(r300->cs, tex->cs_buffer, 0, - r300_surface(fb->cbufs[i])->domain); - } - /* ...depth buffer... */ - if (fb->zsbuf) { - tex = r300_texture(fb->zsbuf->texture); - assert(tex && tex->buffer && "zsbuf is marked, but NULL!"); - r300->rws->cs_add_buffer(r300->cs, tex->cs_buffer, 0, - r300_surface(fb->zsbuf)->domain); - } - /* ...textures... */ - for (i = 0; i < texstate->count; i++) { - if (!(texstate->tx_enable & (1 << i))) { - continue; + boolean flushed = FALSE; + +validate: + if (r300->fb_state.dirty) { + /* Color buffers... */ + for (i = 0; i < fb->nr_cbufs; i++) { + tex = r300_texture(fb->cbufs[i]->texture); + assert(tex && tex->buffer && "cbuf is marked, but NULL!"); + r300->rws->cs_add_reloc(r300->cs, tex->cs_buffer, 0, + r300_surface(fb->cbufs[i])->domain); } + /* ...depth buffer... */ + if (fb->zsbuf) { + tex = r300_texture(fb->zsbuf->texture); + assert(tex && tex->buffer && "zsbuf is marked, but NULL!"); + r300->rws->cs_add_reloc(r300->cs, tex->cs_buffer, 0, + r300_surface(fb->zsbuf)->domain); + } + } + if (r300->textures_state.dirty) { + /* ...textures... */ + for (i = 0; i < texstate->count; i++) { + if (!(texstate->tx_enable & (1 << i))) { + continue; + } - tex = r300_texture(texstate->sampler_views[i]->base.texture); - r300->rws->cs_add_buffer(r300->cs, tex->cs_buffer, tex->domain, 0); + tex = r300_texture(texstate->sampler_views[i]->base.texture); + r300->rws->cs_add_reloc(r300->cs, tex->cs_buffer, tex->domain, 0); + } } /* ...occlusion query buffer... */ if (r300->query_current) - r300->rws->cs_add_buffer(r300->cs, r300->query_current->cs_buffer, - 0, r300->query_current->domain); + r300->rws->cs_add_reloc(r300->cs, r300->query_current->cs_buffer, + 0, r300->query_current->domain); /* ...vertex buffer for SWTCL path... */ if (r300->vbo) - r300->rws->cs_add_buffer(r300->cs, r300_buffer(r300->vbo)->cs_buf, - r300_buffer(r300->vbo)->domain, 0); + r300->rws->cs_add_reloc(r300->cs, r300_buffer(r300->vbo)->cs_buf, + r300_buffer(r300->vbo)->domain, 0); /* ...vertex buffers for HWTCL path... */ if (do_validate_vertex_buffers) { - for (i = 0; i < r300->vertex_buffer_count; i++) { - if (!vbuf[i]) + struct pipe_resource **buf = r300->valid_vertex_buffer; + struct pipe_resource **last = r300->valid_vertex_buffer + + r300->vertex_buffer_count; + for (; buf != last; buf++) { + if (!*buf) continue; - r300->rws->cs_add_buffer(r300->cs, r300_buffer(vbuf[i])->cs_buf, - r300_buffer(vbuf[i])->domain, 0); + r300->rws->cs_add_reloc(r300->cs, r300_buffer(*buf)->cs_buf, + r300_buffer(*buf)->domain, 0); } } /* ...and index buffer for HWTCL path. */ if (index_buffer) - r300->rws->cs_add_buffer(r300->cs, r300_buffer(index_buffer)->cs_buf, - r300_buffer(index_buffer)->domain, 0); + r300->rws->cs_add_reloc(r300->cs, r300_buffer(index_buffer)->cs_buf, + r300_buffer(index_buffer)->domain, 0); + /* Now do the validation. */ if (!r300->rws->cs_validate(r300->cs)) { - return FALSE; + /* Ooops, an infinite loop, give up. */ + if (flushed) + return FALSE; + + r300->context.flush(&r300->context, 0, NULL); + flushed = TRUE; + goto validate; } return TRUE; diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index 7a26a86a91..b250532ba9 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -72,6 +72,10 @@ static void r300_flush(struct pipe_context* pipe, r300->validate_buffers = TRUE; r300->upload_vb_validated = FALSE; r300->upload_ib_validated = FALSE; + } else { + /* Even if hw is not dirty, we should at least reset the CS in case + * the space checking failed for the first draw operation. */ + r300->rws->cs_flush(r300->cs); } /* reset flushed query */ diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index dfa3045d64..e660ca68f1 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -537,8 +537,7 @@ static void r300_emit_draw_elements(struct r300_context *r300, OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2) | (0 << R300_INDX_BUFFER_SKIP_SHIFT)); OUT_CS(offset_dwords << 2); - OUT_CS_BUF_RELOC(indexBuffer, count_dwords, - r300_buffer(indexBuffer)->domain, 0); + OUT_CS_BUF_RELOC(indexBuffer, count_dwords); END_CS; } diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index b8324afe51..460da77a4f 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -42,8 +42,8 @@ struct r300_winsys_buffer; /* for map/unmap etc. */ struct r300_winsys_cs_buffer; /* for write_reloc etc. */ struct r300_winsys_cs { - unsigned cdw; /* Number of used dwords. */ - uint32_t *buf; /* The command buffer. */ + unsigned cdw; /* Number of used dwords. */ + uint32_t buf[R300_MAX_CMDBUF_DWORDS]; /* The command buffer. */ }; enum r300_value_id { @@ -239,23 +239,22 @@ struct r300_winsys_screen { void (*cs_destroy)(struct r300_winsys_cs *cs); /** - * Add a buffer object to the list of buffers to validate. + * Add a new buffer relocation. Every relocation must first be added + * before it can be written. * - * \param cs A command stream to add buffer for validation against. - * \param buf A winsys buffer to validate. - * \param rd A read domain containing a bitmask - * of the R300_DOMAIN_* flags. - * \param wd A write domain containing a bitmask - * of the R300_DOMAIN_* flags. + * \param cs A command stream to add buffer for validation against. + * \param buf A winsys buffer to validate. + * \param rd A read domain containing a bitmask of the R300_DOMAIN_* flags. + * \param wd A write domain containing a bitmask of the R300_DOMAIN_* flags. */ - void (*cs_add_buffer)(struct r300_winsys_cs *cs, - struct r300_winsys_cs_buffer *buf, - enum r300_buffer_domain rd, - enum r300_buffer_domain wd); + void (*cs_add_reloc)(struct r300_winsys_cs *cs, + struct r300_winsys_cs_buffer *buf, + enum r300_buffer_domain rd, + enum r300_buffer_domain wd); /** - * Revalidate all currently set up winsys buffers. - * Returns TRUE if a flush is required. + * Return TRUE if there is enough memory in VRAM and GTT for the relocs + * added so far. * * \param cs A command stream to validate. */ @@ -270,9 +269,7 @@ struct r300_winsys_screen { * \param wd A write domain containing a bitmask of the R300_DOMAIN_* flags. */ void (*cs_write_reloc)(struct r300_winsys_cs *cs, - struct r300_winsys_cs_buffer *buf, - enum r300_buffer_domain rd, - enum r300_buffer_domain wd); + struct r300_winsys_cs_buffer *buf); /** * Flush a command stream. @@ -293,14 +290,6 @@ struct r300_winsys_screen { void (*flush)(void *), void *user); - /** - * Reset the list of buffer objects to validate, usually called - * prior to adding buffer objects for validation. - * - * \param cs A command stream to reset buffers for. - */ - void (*cs_reset_buffers)(struct r300_winsys_cs *cs); - /** * Return TRUE if a buffer is referenced by a command stream or by hardware * (i.e. is busy), based on the domain parameter. -- cgit v1.2.3