From c1bee7bdea470b6b5dcebef9aacc8fe4feca687c Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 14 Oct 2009 16:53:12 +1000 Subject: r300g: fixup arb occulsion query support. 1: add rv530 support - num z pipes cap - add proper start/finish query options for rv530 2: convert to use linked list properly. 3: add flushing required check. 4: initial Z top disabling support. TODO: make it actually work on my rv530. --- src/gallium/drivers/r300/r300_chipset.h | 2 + src/gallium/drivers/r300/r300_context.c | 10 ++-- src/gallium/drivers/r300/r300_context.h | 4 +- src/gallium/drivers/r300/r300_emit.c | 69 ++++++++++++++++++++---- src/gallium/drivers/r300/r300_flush.c | 10 +++- src/gallium/drivers/r300/r300_query.c | 42 ++++++++------- src/gallium/drivers/r300/r300_reg.h | 15 ++++-- src/gallium/drivers/r300/r300_screen.c | 1 + src/gallium/drivers/r300/r300_state.c | 11 ++-- src/gallium/drivers/r300/r300_winsys.h | 3 ++ src/gallium/winsys/drm/radeon/core/radeon_r300.c | 10 ++++ 11 files changed, 134 insertions(+), 43 deletions(-) diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h index 322d4a57e4..f015a4243d 100644 --- a/src/gallium/drivers/r300/r300_chipset.h +++ b/src/gallium/drivers/r300/r300_chipset.h @@ -36,6 +36,8 @@ struct r300_capabilities { int num_vert_fpus; /* The number of fragment pipes */ int num_frag_pipes; + /* The number of z pipes */ + int num_z_pipes; /* Whether or not TCL is physically present */ boolean has_tcl; /* Whether or not this is an RV515 or newer; R500s have many differences diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index e6bc80e48f..7a9c098e30 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -99,11 +99,9 @@ static void r300_destroy_context(struct pipe_context* context) { context->screen->buffer_destroy(r300->oqbo); /* If there are any queries pending or not destroyed, remove them now. */ - if (r300->query_list) { - foreach_s(query, temp, r300->query_list) { - remove_from_list(query); - FREE(query); - } + foreach_s(query, temp, &r300->query_list) { + remove_from_list(query); + FREE(query); } FREE(r300->blend_color_state); @@ -201,6 +199,6 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->winsys->set_flush_cb(r300->winsys, r300_flush_cb, r300); r300->dirty_state = R300_NEW_KITCHEN_SINK; r300->dirty_hw++; - + make_empty_list(&r300->query_list); return &r300->context; } diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 086633f732..9b0094b63c 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -172,6 +172,8 @@ struct r300_query { unsigned int count; /* The offset of this query into the query buffer, in bytes. */ unsigned offset; + /* if we've flushed the query */ + boolean flushed; /* Linked list members. */ struct r300_query* prev; struct r300_query* next; @@ -237,7 +239,7 @@ struct r300_context { /* Occlusion query buffer. */ struct pipe_buffer* oqbo; /* Query list. */ - struct r300_query* query_list; + struct r300_query query_list; /* Various CSO state objects. */ /* Blend state. */ diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 64748ad8f8..3d28249c16 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -323,28 +323,30 @@ void r300_emit_fb_state(struct r300_context* r300, void r300_emit_query_begin(struct r300_context* r300, struct r300_query* query) { + struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps; CS_LOCALS(r300); /* XXX This will almost certainly not return good results * for overlapping queries. */ - BEGIN_CS(2); + BEGIN_CS(4); + if (caps->family == CHIP_FAMILY_RV530) { + OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); + } else { + OUT_CS_REG(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_ALL); + } OUT_CS_REG(R300_ZB_ZPASS_DATA, 0); END_CS; } -void r300_emit_query_end(struct r300_context* r300, - struct r300_query* query) + +static void r300_emit_query_finish(struct r300_context *r300, + struct r300_query *query) { struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps; CS_LOCALS(r300); - if (!r300->winsys->add_buffer(r300->winsys, r300->oqbo, - 0, RADEON_GEM_DOMAIN_GTT)) { - debug_printf("r300: There wasn't room for the OQ buffer!?" - " Oh noes!\n"); - } - assert(caps->num_frag_pipes); + BEGIN_CS(6 * caps->num_frag_pipes + 2); /* I'm not so sure I like this switch, but it's hard to be elegant * when there's so many special cases... @@ -394,6 +396,55 @@ void r300_emit_query_end(struct r300_context* r300, } +static void rv530_emit_query_single(struct r300_context *r300, + struct r300_query *query) +{ + CS_LOCALS(r300); + + BEGIN_CS(8); + OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); + OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_CS_RELOC(r300->oqbo, query->offset, 0, RADEON_GEM_DOMAIN_GTT, 0); + OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); + END_CS; +} + +static void rv530_emit_query_double(struct r300_context *r300, + struct r300_query *query) +{ + CS_LOCALS(r300); + + BEGIN_CS(14); + OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); + OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_CS_RELOC(r300->oqbo, query->offset, 0, RADEON_GEM_DOMAIN_GTT, 0); + OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_1); + OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_CS_RELOC(r300->oqbo, query->offset + sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0); + OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); + END_CS; +} + +void r300_emit_query_end(struct r300_context* r300, + struct r300_query* query) +{ + struct r300_capabilities *caps = r300_screen(r300->context.screen)->caps; + + if (!r300->winsys->add_buffer(r300->winsys, r300->oqbo, + 0, RADEON_GEM_DOMAIN_GTT)) { + debug_printf("r300: There wasn't room for the OQ buffer!?" + " Oh noes!\n"); + } + + if (caps->family == CHIP_FAMILY_RV530) { + if (caps->num_z_pipes == 2) + rv530_emit_query_double(r300, query); + else + rv530_emit_query_single(r300, query); + } else + r300_emit_query_finish(r300, query); +} + void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs) { CS_LOCALS(r300); diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index 0dff1c6f4f..a8ab0d7212 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -26,9 +26,10 @@ static void r300_flush(struct pipe_context* pipe, unsigned flags, struct pipe_fence_handle** fence) { - struct r300_context* r300 = r300_context(pipe); - CS_LOCALS(r300); + struct r300_context *r300 = r300_context(pipe); + struct r300_query *query; + CS_LOCALS(r300); /* We probably need to flush Draw, but we may have been called from * within Draw. This feels kludgy, but it might be the best thing. */ if (!r300->draw->flushing) { @@ -41,8 +42,13 @@ static void r300_flush(struct pipe_context* pipe, r300->dirty_state = R300_NEW_KITCHEN_SINK; r300->dirty_hw = 0; } + /* reset flushed query */ + foreach(query, &r300->query_list) { + query->flushed = TRUE; + } } + void r300_init_flush_functions(struct r300_context* r300) { r300->context.flush = r300_flush; diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c index 2880d34877..b01313648b 100644 --- a/src/gallium/drivers/r300/r300_query.c +++ b/src/gallium/drivers/r300/r300_query.c @@ -24,13 +24,13 @@ #include "r300_emit.h" -static struct pipe_query* r300_create_query(struct pipe_context* pipe, +static struct pipe_query *r300_create_query(struct pipe_context *pipe, unsigned query_type) { - struct r300_context* r300 = r300_context(pipe); - struct r300_screen* r300screen = r300_screen(r300->context.screen); - unsigned query_size = r300screen->caps->num_frag_pipes * 4; - struct r300_query* q, * qptr; + struct r300_context *r300 = r300_context(pipe); + struct r300_screen *r300screen = r300_screen(r300->context.screen); + unsigned query_size; + struct r300_query *q, *qptr; q = CALLOC_STRUCT(r300_query); @@ -39,13 +39,16 @@ static struct pipe_query* r300_create_query(struct pipe_context* pipe, q->active = FALSE; - if (!r300->query_list) { - r300->query_list = q; - } else if (!is_empty_list(r300->query_list)) { - qptr = last_elem(r300->query_list); + if (r300screen->caps->family == CHIP_FAMILY_RV530) + query_size = r300screen->caps->num_z_pipes * sizeof(uint32_t); + else + query_size = r300screen->caps->num_frag_pipes * sizeof(uint32_t); + + if (!is_empty_list(&r300->query_list)) { + qptr = last_elem(&r300->query_list); q->offset = qptr->offset + query_size; - insert_at_tail(r300->query_list, q); } + insert_at_tail(&r300->query_list, q); /* XXX */ if (q->offset >= 4096) { @@ -74,9 +77,10 @@ static void r300_begin_query(struct pipe_context* pipe, map = pipe->screen->buffer_map(pipe->screen, r300->oqbo, PIPE_BUFFER_USAGE_CPU_WRITE); map += q->offset / 4; - *map = ~0; + *map = ~0U; pipe->screen->buffer_unmap(pipe->screen, r300->oqbo); + q->flushed = FALSE; r300_emit_dirty_state(r300); r300_emit_query_begin(r300, q); } @@ -98,28 +102,30 @@ static boolean r300_get_query_result(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); struct r300_screen* r300screen = r300_screen(r300->context.screen); - struct r300_query* q = (struct r300_query*)query; + struct r300_query *q = (struct r300_query*)query; unsigned flags = PIPE_BUFFER_USAGE_CPU_READ; uint32_t* map; - uint32_t temp; + uint32_t temp = 0; unsigned i; - if (wait) { + if (q->flushed == FALSE) pipe->flush(pipe, 0, NULL); - } else { + if (!wait) { flags |= PIPE_BUFFER_USAGE_DONTBLOCK; } map = pipe->screen->buffer_map(pipe->screen, r300->oqbo, flags); + if (!map) + return FALSE; map += q->offset / 4; for (i = 0; i < r300screen->caps->num_frag_pipes; i++) { - if (*map == ~0) { + if (*map == ~0U) { /* Looks like our results aren't ready yet. */ if (wait) { debug_printf("r300: Despite waiting, OQ results haven't" " come in yet.\n"); } - temp = ~0; + temp = ~0U; break; } temp += *map; @@ -127,7 +133,7 @@ static boolean r300_get_query_result(struct pipe_context* pipe, } pipe->screen->buffer_unmap(pipe->screen, r300->oqbo); - if (temp == ~0) { + if (temp == ~0U) { /* Our results haven't been written yet... */ return FALSE; } diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index 3abff5db62..ae94bb9b9f 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -1172,6 +1172,13 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* SU Depth Offset value */ #define R300_SU_DEPTH_OFFSET 0x42c4 +#define R300_SU_REG_DEST 0x42c8 +# define R300_RASTER_PIPE_SELECT_0 (1 << 0) +# define R300_RASTER_PIPE_SELECT_1 (1 << 1) +# define R300_RASTER_PIPE_SELECT_2 (1 << 2) +# define R300_RASTER_PIPE_SELECT_3 (1 << 3) +# define R300_RASTER_PIPE_SELECT_ALL 0xf + /* BEGIN: Rasterization / Interpolators - many guesses */ @@ -2095,6 +2102,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R500_FG_ALPHA_VALUE 0x4be0 # define R500_FG_ALPHA_VALUE_MASK 0x0000ffff +#define RV530_FG_ZBREG_DEST 0x4be8 +# define RV530_FG_ZBREG_DEST_PIPE_SELECT_0 (1 << 0) +# define RV530_FG_ZBREG_DEST_PIPE_SELECT_1 (1 << 1) +# define RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL (3 << 0) /* gap */ /* Fragment program parameters in 7.16 floating point */ @@ -3313,10 +3324,6 @@ enum { #define R200_3D_DRAW_IMMD_2 0xC0003500 -/* XXX Oh look, stuff not brought over from docs yet */ - -#define R300_SU_REG_DEST 0x42C8 - #endif /* _R300_REG_H */ /* *INDENT-ON* */ diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 7d154576e0..5381651c77 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -395,6 +395,7 @@ struct pipe_screen* r300_create_screen(struct r300_winsys* r300_winsys) caps->pci_id = r300_winsys->pci_id; caps->num_frag_pipes = r300_winsys->gb_pipes; + caps->num_z_pipes = r300_winsys->z_pipes; r300_parse_chipset(caps); diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 3cef285dee..d8533ac168 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -190,6 +190,7 @@ static void* r300_create_dsa_state(struct pipe_context* pipe, const struct pipe_depth_stencil_alpha_state* state) { + struct r300_context* r300 = r300_context(pipe); struct r300_dsa_state* dsa = CALLOC_STRUCT(r300_dsa_state); /* Depth test setup. */ @@ -247,11 +248,15 @@ static void* R300_FG_ALPHA_FUNC_ENABLE; dsa->alpha_reference = CLAMP(state->alpha.ref_value * 1023.0f, 0, 1023); - } else { - /* XXX need to fix this to be dynamically set - dsa->z_buffer_top = R300_ZTOP_ENABLE; */ } + dsa->z_buffer_top = R300_ZTOP_ENABLE; + /* XXX TODO: add frag prog rules for ztop disable */ + if (state->alpha.enabled && state->alpha.func != PIPE_FUNC_ALWAYS) + dsa->z_buffer_top = R300_ZTOP_DISABLE; + if (!is_empty_list(&r300->query_list)) + dsa->z_buffer_top = R300_ZTOP_DISABLE; + return (void*)dsa; } diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index 540f8eca92..864a6146b2 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -48,6 +48,9 @@ struct r300_winsys { /* GB pipe count */ uint32_t gb_pipes; + /* Z pipe count (rv530 only) */ + uint32_t z_pipes; + /* GART size. */ uint32_t gart_size; diff --git a/src/gallium/winsys/drm/radeon/core/radeon_r300.c b/src/gallium/winsys/drm/radeon/core/radeon_r300.c index 3587892e00..7ea5d1fb4e 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_r300.c +++ b/src/gallium/winsys/drm/radeon/core/radeon_r300.c @@ -164,6 +164,16 @@ static void do_ioctls(struct r300_winsys* winsys, int fd) } winsys->gb_pipes = target; + /* get Z pipes */ + info.request = RADEON_INFO_NUM_Z_PIPES; + retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)); + if (retval) { + fprintf(stderr, "%s: Failed to get GB pipe count, " + "error number %d\n", __FUNCTION__, retval); + exit(1); + } + winsys->z_pipes = target; + /* Then, get PCI ID */ info.request = RADEON_INFO_DEVICE_ID; retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)); -- cgit v1.2.3