From 112239e9a66a155d36fe2ad0ab130e6f26eff298 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Mon, 18 Jan 2010 00:15:52 +0100 Subject: r300g,radeong: finish and enable the immediate mode Nearly 100% performance increase in glxgears. --- src/gallium/drivers/r300/r300_emit.c | 22 ----- src/gallium/drivers/r300/r300_render.c | 141 ++++++++++++++++++++++++++------- src/gallium/drivers/r300/r300_state.c | 22 +++++ 3 files changed, 133 insertions(+), 52 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 36d2c64b58..badbf3715c 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -772,22 +772,6 @@ void r300_emit_texture(struct r300_context* r300, END_CS; } -static boolean r300_validate_aos(struct r300_context *r300) -{ - struct pipe_vertex_buffer *vbuf = r300->vertex_buffer; - struct pipe_vertex_element *velem = r300->vertex_element; - int i; - - /* Check if formats and strides are aligned to the size of DWORD. */ - for (i = 0; i < r300->vertex_element_count; i++) { - if (vbuf[velem[i].vertex_buffer_index].stride % 4 != 0 || - util_format_get_blocksize(velem[i].src_format) % 4 != 0) { - return FALSE; - } - } - return TRUE; -} - void r300_emit_aos(struct r300_context* r300, unsigned offset) { struct pipe_vertex_buffer *vb1, *vb2, *vbuf = r300->vertex_buffer; @@ -797,12 +781,6 @@ void r300_emit_aos(struct r300_context* r300, unsigned offset) unsigned packet_size = (aos_count * 3 + 1) / 2; CS_LOCALS(r300); - /* XXX Move this checking to a more approriate place. */ - if (!r300_validate_aos(r300)) { - /* XXX We should fallback using Draw. */ - assert(0); - } - BEGIN_CS(2 + packet_size + aos_count * 2); OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size); OUT_CS(aos_count); diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 677031ef04..7f095bffe7 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -28,6 +28,7 @@ #include "pipe/p_inlines.h" +#include "util/u_format.h" #include "util/u_memory.h" #include "util/u_prim.h" @@ -114,20 +115,53 @@ static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300, return color_control; } -static void r300_emit_draw_immediate(struct r300_context *r300, - unsigned mode, - unsigned start, - unsigned count) + +static void r300_emit_draw_arrays_immediate(struct r300_context *r300, + unsigned mode, + unsigned start, + unsigned count) { - struct pipe_buffer* vbo = r300->vertex_buffer[0].buffer; - unsigned vertex_size = r300->vertex_buffer[0].stride / sizeof(float); - unsigned i; - uint32_t* map; + struct pipe_vertex_element* velem; + struct pipe_vertex_buffer* vbuf; + unsigned vertex_element_count = r300->vertex_element_count; + unsigned i, v, vbi, dw, elem_offset; + + /* Size of the vertex, in dwords. */ + unsigned vertex_size = 0; + + /* Offsets of the attribute, in dwords, from the start of the vertex. */ + unsigned offset[PIPE_MAX_ATTRIBS]; + + /* Size of the vertex element, in dwords. */ + unsigned size[PIPE_MAX_ATTRIBS]; + + /* Stride to the same attrib in the next vertex in the vertex buffer, + * in dwords. */ + unsigned stride[PIPE_MAX_ATTRIBS]; + + /* Mapped vertex buffers. */ + uint32_t* map[PIPE_MAX_ATTRIBS] = {0}; + CS_LOCALS(r300); - map = (uint32_t*)pipe_buffer_map_range(r300->context.screen, vbo, - start * vertex_size, count * vertex_size, - PIPE_BUFFER_USAGE_CPU_READ); + /* Calculate the vertex size, offsets, strides etc. and map the buffers. */ + for (i = 0; i < vertex_element_count; i++) { + velem = &r300->vertex_element[i]; + offset[i] = velem->src_offset >> 2; + size[i] = util_format_get_blocksize(velem->src_format) >> 2; + vertex_size += size[i]; + vbi = velem->vertex_buffer_index; + + /* Map the buffer. */ + if (!map[vbi]) { + vbuf = &r300->vertex_buffer[vbi]; + map[vbi] = (uint32_t*)pipe_buffer_map(r300->context.screen, + vbuf->buffer, + PIPE_BUFFER_USAGE_CPU_READ); + map[vbi] += vbuf->buffer_offset >> 2; + stride[vbi] = vbuf->stride >> 2; + } + } BEGIN_CS(10 + count * vertex_size); OUT_CS_REG(R300_GA_COLOR_CONTROL, @@ -138,18 +172,31 @@ static void r300_emit_draw_immediate(struct r300_context *r300, OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) | r300_translate_primitive(mode)); - //debug_printf("r300: Immd %d verts, %d attrs\n", count, vertex_size); - for (i = 0; i < count * vertex_size; i++) { - if (i % vertex_size == 0) { - //debug_printf("r300: -- vert --\n"); + + /* Emit vertices. */ + for (v = 0; v < count; v++) { + for (i = 0; i < vertex_element_count; i++) { + velem = &r300->vertex_element[i]; + vbi = velem->vertex_buffer_index; + elem_offset = offset[i] + stride[vbi] * (v + start); + + for (dw = 0; dw < size[i]; dw++) { + OUT_CS(map[vbi][elem_offset + dw]); + } } - //debug_printf("r300: 0x%08x\n", *map); - OUT_CS(*map); - map++; } END_CS; - pipe_buffer_unmap(r300->context.screen, vbo); + /* Unmap buffers. */ + for (i = 0; i < vertex_element_count; i++) { + vbi = r300->vertex_element[i].vertex_buffer_index; + + if (map[vbi]) { + vbuf = &r300->vertex_buffer[vbi]; + pipe_buffer_unmap(r300->context.screen, vbuf->buffer); + map[vbi] = 0; + } + } } static void r300_emit_draw_arrays(struct r300_context *r300, @@ -222,16 +269,49 @@ static void r300_emit_draw_elements(struct r300_context *r300, } +static boolean r300_setup_local_vertex_buffers(struct r300_context *r300) +{ + struct pipe_vertex_buffer *vb; + boolean found_local_bo = FALSE, found_managed_bo = FALSE; + unsigned i; + + /* See what buffers we got. */ + for (i = 0; i < r300->vertex_element_count; i++) { + vb = &r300->vertex_buffer[r300->vertex_element[i].vertex_buffer_index]; + if (r300->winsys->buffer_is_local(r300->winsys, vb->buffer)) { + found_local_bo = TRUE; + } else { + found_managed_bo = TRUE; + } + } + + /* If we found both local and managed buffers, make local buffers managed + * because we shouldn't use the immediate mode in case a managed buffer is + * present, due to performance reasons. */ + if (found_local_bo && found_managed_bo) { + for (i = 0; i < r300->vertex_element_count; i++) { + vb = &r300->vertex_buffer[r300->vertex_element[i].vertex_buffer_index]; + if (r300->winsys->buffer_is_local(r300->winsys, vb->buffer)) { + r300->winsys->buffer_make_managed(r300->winsys, vb->buffer); + } + } + } + + return !found_managed_bo; +} + static boolean r300_setup_vertex_buffers(struct r300_context *r300) { struct pipe_vertex_buffer *vbuf = r300->vertex_buffer; struct pipe_vertex_element *velem = r300->vertex_element; + struct pipe_buffer *pbuf; validate: for (int i = 0; i < r300->vertex_element_count; i++) { - if (!r300->winsys->add_buffer(r300->winsys, - vbuf[velem[i].vertex_buffer_index].buffer, - RADEON_GEM_DOMAIN_GTT, 0)) { + pbuf = vbuf[velem[i].vertex_buffer_index].buffer; + + if (!r300->winsys->add_buffer(r300->winsys, pbuf, + RADEON_GEM_DOMAIN_GTT, 0)) { r300->context.flush(&r300->context, 0, NULL); goto validate; } @@ -245,6 +325,7 @@ validate: return TRUE; } + static void r300_shorten_ubyte_elts(struct r300_context* r300, struct pipe_buffer** elts, unsigned count) @@ -365,15 +446,15 @@ void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, r300_emit_buffer_validate(r300); - if (!r300_setup_vertex_buffers(r300)) { - return; - } - - r300_emit_dirty_state(r300); - - if (FALSE && count <= 4 && r300->vertex_buffer_count == 1) { - r300_emit_draw_immediate(r300, mode, start, count); + if (r300_setup_local_vertex_buffers(r300)) { + r300_emit_dirty_state(r300); + r300_emit_draw_arrays_immediate(r300, mode, start, count); } else { + if (!r300_setup_vertex_buffers(r300)) { + return; + } + + r300_emit_dirty_state(r300); r300_emit_aos(r300, start); r300_emit_draw_arrays(r300, mode, count); } diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index e2ec0bc5bd..641e95e7fc 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -924,6 +924,22 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe, r300->dirty_state |= R300_NEW_VERTEX_FORMAT; } +static boolean r300_validate_aos(struct r300_context *r300) +{ + struct pipe_vertex_buffer *vbuf = r300->vertex_buffer; + struct pipe_vertex_element *velem = r300->vertex_element; + int i; + + /* Check if formats and strides are aligned to the size of DWORD. */ + for (i = 0; i < r300->vertex_element_count; i++) { + if (vbuf[velem[i].vertex_buffer_index].stride % 4 != 0 || + util_format_get_blocksize(velem[i].src_format) % 4 != 0) { + return FALSE; + } + } + return TRUE; +} + static void r300_set_vertex_elements(struct pipe_context* pipe, unsigned count, const struct pipe_vertex_element* elements) @@ -939,6 +955,12 @@ static void r300_set_vertex_elements(struct pipe_context* pipe, draw_flush(r300->draw); draw_set_vertex_elements(r300->draw, count, elements); } + + if (!r300_validate_aos(r300)) { + /* XXX We should fallback using draw. */ + assert(0); + abort(); + } } static void* r300_create_vs_state(struct pipe_context* pipe, -- cgit v1.2.3