From eb7ef433bbbeabda963e74adf0ef61c47883f292 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Tue, 22 Jun 2010 06:41:15 +0200 Subject: r300g: optimize the immediate mode emission path a bit --- src/gallium/drivers/r300/r300_cb.h | 3 +++ src/gallium/drivers/r300/r300_context.h | 3 +++ src/gallium/drivers/r300/r300_render.c | 40 ++++++++++++++--------------- src/gallium/drivers/r300/r300_state.c | 4 ++- src/gallium/drivers/r300/r300_winsys.h | 5 ++++ src/gallium/winsys/radeon/drm/radeon_r300.c | 12 +++++++++ 6 files changed, 45 insertions(+), 22 deletions(-) diff --git a/src/gallium/drivers/r300/r300_cb.h b/src/gallium/drivers/r300/r300_cb.h index 9d3d4fc1b1..6987471244 100644 --- a/src/gallium/drivers/r300/r300_cb.h +++ b/src/gallium/drivers/r300/r300_cb.h @@ -89,6 +89,9 @@ CB_DEBUG(cs_count = size;) \ } while (0) +#define BEGIN_CS_AS_CB(r300, size) \ + BEGIN_CB(r300->rws->get_cs_pointer(r300->rws, dwords), dwords) + #define END_CB do { \ CB_DEBUG(if (cs_count != 0) \ debug_printf("r300: Warning: cs_count off by %d at (%s, %s:%i)\n", \ diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index fdbdb4b192..8d0b4bb3d3 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -363,6 +363,9 @@ struct r300_vertex_element_state { enum pipe_format hw_format[PIPE_MAX_ATTRIBS]; unsigned hw_format_size[PIPE_MAX_ATTRIBS]; + /* The size of the vertex, in dwords. */ + unsigned vertex_size_dwords; + /* This might mean two things: * - src_format != hw_format, as discussed above. * - src_offset % 4 != 0. */ diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 655819001c..4afd124c0e 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -35,6 +35,7 @@ #include "util/u_prim.h" #include "r300_cs.h" +#include "r300_cb.h" #include "r300_context.h" #include "r300_screen_buffer.h" #include "r300_emit.h" @@ -43,6 +44,8 @@ #include +#define IMMD_DWORDS 32 + static uint32_t r300_translate_primitive(unsigned prim) { switch (prim) { @@ -269,7 +272,7 @@ static boolean immd_is_good_idea(struct r300_context *r300, return FALSE; } - if (count > 10) { + if (count * r300->velems->vertex_size_dwords > IMMD_DWORDS) { return FALSE; } @@ -308,10 +311,10 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300, struct pipe_vertex_element* velem; struct pipe_vertex_buffer* vbuf; unsigned vertex_element_count = r300->velems->count; - unsigned i, v, vbi, dw, elem_offset, dwords; + unsigned i, v, vbi, dwords; /* Size of the vertex, in dwords. */ - unsigned vertex_size = 0; + unsigned vertex_size = r300->velems->vertex_size_dwords; /* Offsets of the attribute, in dwords, from the start of the vertex. */ unsigned offset[PIPE_MAX_ATTRIBS]; @@ -327,14 +330,13 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300, uint32_t* map[PIPE_MAX_ATTRIBS] = {0}; struct pipe_transfer* transfer[PIPE_MAX_ATTRIBS] = {NULL}; - CS_LOCALS(r300); + CB_LOCALS; /* Calculate the vertex size, offsets, strides etc. and map the buffers. */ for (i = 0; i < vertex_element_count; i++) { velem = &r300->velems->velem[i]; offset[i] = velem->src_offset / 4; size[i] = r300->velems->hw_format_size[i] / 4; - vertex_size += size[i]; vbi = velem->vertex_buffer_index; /* Map the buffer. */ @@ -344,8 +346,8 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300, vbuf->buffer, PIPE_TRANSFER_READ, &transfer[vbi]); - map[vbi] += vbuf->buffer_offset / 4; stride[vbi] = vbuf->stride / 4; + map[vbi] += vbuf->buffer_offset / 4 + stride[vbi] * start; } } @@ -353,30 +355,26 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300, r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0, NULL); - BEGIN_CS(dwords); - OUT_CS_REG(R300_GA_COLOR_CONTROL, + BEGIN_CS_AS_CB(r300, dwords); + OUT_CB_REG(R300_GA_COLOR_CONTROL, r300_provoking_vertex_fixes(r300, mode)); - OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size); - OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2); - OUT_CS(count - 1); - OUT_CS(0); - OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size); - OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) | + OUT_CB_REG(R300_VAP_VTX_SIZE, vertex_size); + OUT_CB_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2); + OUT_CB(count - 1); + OUT_CB(0); + OUT_CB_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size); + OUT_CB(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) | r300_translate_primitive(mode)); /* Emit vertices. */ for (v = 0; v < count; v++) { for (i = 0; i < vertex_element_count; i++) { - velem = &r300->velems->velem[i]; - vbi = velem->vertex_buffer_index; - elem_offset = offset[i] + stride[vbi] * (v + start); + vbi = r300->velems->velem[i].vertex_buffer_index; - for (dw = 0; dw < size[i]; dw++) { - OUT_CS(map[vbi][elem_offset + dw]); - } + OUT_CB_TABLE(&map[vbi][offset[i] + stride[vbi] * v], size[i]); } } - END_CS; + END_CB; /* Unmap buffers. */ for (i = 0; i < vertex_element_count; i++) { diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 3f0acd9948..bc2b62ba54 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -1500,11 +1500,13 @@ static void* r300_create_vertex_elements_state(struct pipe_context* pipe, /* Align the formats to the size of DWORD. * We only care about the blocksizes of the formats since - * swizzles are already set up. */ + * swizzles are already set up. + * Also compute the vertex size. */ for (i = 0; i < count; i++) { /* This is OK because we check for aligned strides too. */ velems->hw_format_size[i] = align(util_format_get_blocksize(velems->hw_format[i]), 4); + velems->vertex_size_dwords += velems->hw_format_size[i] / 4; } } } diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index 334ec9fa84..77c1c13ef9 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -105,6 +105,11 @@ struct r300_winsys_screen { /* Return the number of free dwords in CS. */ unsigned (*get_cs_free_dwords)(struct r300_winsys_screen *winsys); + /* Return the pointer to the first free dword in CS and assume a pipe + * driver wants to fill "count" dwords. */ + uint32_t *(*get_cs_pointer)(struct r300_winsys_screen *winsys, + unsigned count); + /* Write a dword to the command buffer. */ void (*write_cs_dword)(struct r300_winsys_screen* winsys, uint32_t dword); diff --git a/src/gallium/winsys/radeon/drm/radeon_r300.c b/src/gallium/winsys/radeon/drm/radeon_r300.c index 237fdc8bac..d2d317dc20 100644 --- a/src/gallium/winsys/radeon/drm/radeon_r300.c +++ b/src/gallium/winsys/radeon/drm/radeon_r300.c @@ -192,6 +192,17 @@ static unsigned radeon_get_cs_free_dwords(struct r300_winsys_screen *rws) return cs->ndw - cs->cdw; } +static uint32_t *radeon_get_cs_pointer(struct r300_winsys_screen *rws, + unsigned count) +{ + struct radeon_libdrm_winsys *ws = radeon_winsys_screen(rws); + struct radeon_cs *cs = ws->cs; + uint32_t *ptr = cs->packets + cs->cdw; + + cs->cdw += count; + return ptr; +} + static void radeon_write_cs_dword(struct r300_winsys_screen *rws, uint32_t dword) { @@ -316,6 +327,7 @@ radeon_setup_winsys(int fd, struct radeon_libdrm_winsys* ws) ws->base.validate = radeon_validate; ws->base.destroy = radeon_winsys_destroy; ws->base.get_cs_free_dwords = radeon_get_cs_free_dwords; + ws->base.get_cs_pointer = radeon_get_cs_pointer; ws->base.write_cs_dword = radeon_write_cs_dword; ws->base.write_cs_table = radeon_write_cs_table; ws->base.write_cs_reloc = radeon_write_cs_reloc; -- cgit v1.2.3