From db299a9f8244d53d9041fcdbd396a77ebe1f9e3e Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Tue, 25 Jan 2011 05:37:52 +0100 Subject: r300g: fix some bugs with zbuffer compression (v4) This drops the memblock manager for ZMASK. Instead, only one zbuffer can be compressed at a time. Note that this does not necessarily have to be slower. When there is a large number of zbuffers, compression might be used more often than it was before. It's also easier to debug. How it works: 1) 'clear' turns the compression on. 2) If some other zbuffer is set or the currently-bound zbuffer is used for texturing, the driver decompresses it and then turns the compression off. Notes: - The ZMASK clear has been refactored, so that only one packet3 is used to clear ZMASK. - The 8x8 compression mode is disabled. I couldn't make it work without issues. - Also removed driver-specific stuff from u_blitter. Driver status: - RV530 and R580 appear to just work (finally). - RV570 should work, but there may be an issue that we don't correctly calculate the number of dwords to clear, resulting in a partially uninitialized zbuffer. - RS690 misrenders as if no ZMASK clear happened. No idea what's going on. - RV350 may even hardlock. This issue was already present and this patch doesn't fix it. I think we are still missing some hardware info we need to make the zbuffer compression work fully. Note that there is also an issue with HiZ, resulting in a sort of blocky zigzagged corruption around some objects. --- src/gallium/drivers/r300/r300_blit.c | 227 ++++++++++++++------------ src/gallium/drivers/r300/r300_chipset.c | 2 + src/gallium/drivers/r300/r300_chipset.h | 13 +- src/gallium/drivers/r300/r300_context.c | 17 +- src/gallium/drivers/r300/r300_context.h | 25 +-- src/gallium/drivers/r300/r300_emit.c | 88 +++++----- src/gallium/drivers/r300/r300_hyperz.c | 120 +++----------- src/gallium/drivers/r300/r300_hyperz.h | 2 +- src/gallium/drivers/r300/r300_state.c | 66 +++++--- src/gallium/drivers/r300/r300_state_derived.c | 28 +++- src/gallium/drivers/r300/r300_texture.c | 2 - 11 files changed, 299 insertions(+), 291 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index e195128d26..f24d5582e1 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -58,8 +58,9 @@ static void r300_blitter_begin(struct r300_context* r300, enum r300_blitter_op o util_blitter_save_vertex_buffers(r300->blitter, r300->vertex_buffer_count, r300->vertex_buffer); - if (op & (R300_CLEAR_SURFACE | R300_COPY)) + if (op & (R300_CLEAR_SURFACE | R300_COPY)) { util_blitter_save_framebuffer(r300->blitter, r300->fb_state.state); + } if (op & R300_COPY) { struct r300_textures_state* state = @@ -108,6 +109,23 @@ static boolean r300_cbzb_clear_allowed(struct r300_context *r300, return r300_surface(fb->cbufs[0])->cbzb_allowed; } +static boolean r300_fast_zclear_allowed(struct r300_context *r300) +{ + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + + /* Cannot decompress zmask with a 16-bit zbuffer. + * Also compression causes a hung. */ + if (util_format_get_blocksizebits(fb->zsbuf->texture->format) == 16) + return FALSE; + + /* Cannot use compression with a linear zbuffer. */ + if (!r300_texture(fb->zsbuf->texture)->desc.microtile) + return FALSE; + + return TRUE; +} + static uint32_t r300_depth_clear_value(enum pipe_format format, double depth, unsigned stencil) { @@ -132,37 +150,46 @@ static void r300_clear(struct pipe_context* pipe, double depth, unsigned stencil) { - /* My notes about fastfill: + /* My notes about Zbuffer compression: * - * 1) Only the zbuffer is cleared. + * 1) The zbuffer must be micro-tiled and whole microtiles must be + * written if compression is enabled. If microtiling is disabled, + * it locks up. * - * 2) The zbuffer must be micro-tiled and whole microtiles must be - * written. If microtiling is disabled, it locks up. + * 2) There is ZMASK RAM which contains a compressed zbuffer. + * Each dword of the Z Mask contains compression information + * for 16 4x4 pixel tiles, that is 2 bits for each tile. + * On chips with 2 Z pipes, every other dword maps to a different + * pipe. On newer chipsets, there is a new compression mode + * with 8x8 pixel tiles per 2 bits. * - * 3) There is Z Mask RAM which contains a compressed zbuffer and - * it interacts with fastfill. We should figure out how to use it - * to get more performance. - * This is what we know about the Z Mask: + * 3) The FASTFILL bit has nothing to do with filling. It only tells hw + * it should look in the ZMASK RAM first before fetching from a real + * zbuffer. * - * Each dword of the Z Mask contains compression information - * for 16 4x4 pixel blocks, that is 2 bits for each block. - * On chips with 2 Z pipes, every other dword maps to a different - * pipe. + * 4) If a pixel is in a cleared state, ZB_DEPTHCLEARVALUE is returned + * during zbuffer reads instead of the value that is actually stored + * in the zbuffer memory. A pixel is in a cleared state when its ZMASK + * is equal to 0. Therefore, if you clear ZMASK with zeros, you may + * leave the zbuffer memory uninitialized, but then you must enable + * compression, so that the ZMASK RAM is actually used. * - * 4) ZB_DEPTHCLEARVALUE is used to clear the zbuffer and the Z Mask must - * be equal to 0. (clear the Z Mask RAM with zeros) + * 5) Each 4x4 (or 8x8) tile is automatically decompressed and recompressed + * during zbuffer updates. A special decompressing operation should be + * used to fully decompress a zbuffer, which basically just stores all + * compressed tiles in ZMASK to the zbuffer memory. * - * 5) For 16-bit zbuffer, compression causes a hung with one or + * 6) For a 16-bit zbuffer, compression causes a hung with one or * two samples and should not be used. * - * 6) FORCE_COMPRESSED_STENCIL_VALUE should be enabled for stencil clears + * 7) FORCE_COMPRESSED_STENCIL_VALUE should be enabled for stencil clears * to avoid needless decompression. * - * 7) Fastfill must not be used if reading of compressed Z data is disabled + * 8) Fastfill must not be used if reading of compressed Z data is disabled * and writing of compressed Z data is enabled (RD/WR_COMP_ENABLE), * i.e. it cannot be used to compress the zbuffer. * - * 8) ZB_CB_CLEAR does not interact with fastfill in any way. + * 9) ZB_CB_CLEAR does not interact with zbuffer compression in any way. * * - Marek */ @@ -179,25 +206,23 @@ static void r300_clear(struct pipe_context* pipe, boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); uint32_t hyperz_dcv = hyperz->zb_depthclearvalue; - /* Decompress zbuffers that are bound as textures. If we didn't flush here, - * it would happen inside the blitter when updating derived state, - * causing a blitter operation to be called from inside the blitter, - * which would overwrite saved states and they would never get restored. */ - r300_flush_depth_textures(r300); - /* Enable fast Z clear. * The zbuffer must be in micro-tiled mode, otherwise it locks up. */ if ((buffers & PIPE_CLEAR_DEPTHSTENCIL) && can_hyperz) { hyperz_dcv = hyperz->zb_depthclearvalue = r300_depth_clear_value(fb->zsbuf->format, depth, stencil); - r300_mark_fb_state_dirty(r300, R300_CHANGED_ZCLEAR_FLAG); - if (zstex->zmask_mem[fb->zsbuf->u.tex.level]) { + if (r300_fast_zclear_allowed(r300)) { r300_mark_atom_dirty(r300, &r300->zmask_clear); buffers &= ~PIPE_CLEAR_DEPTHSTENCIL; } + if (zstex->hiz_mem[fb->zsbuf->u.tex.level]) r300_mark_atom_dirty(r300, &r300->hiz_clear); + + /* XXX Change this to r300_mark_atom_dirty(r300, &r300->hyperz_state); + * once hiz offset is constant. */ + r300_mark_fb_state_dirty(r300, R300_CHANGED_HYPERZ_FLAG); } /* Enable CBZB clear. */ @@ -211,7 +236,7 @@ static void r300_clear(struct pipe_context* pipe, height = surf->cbzb_height; r300->cbzb_clear = TRUE; - r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG); + r300_mark_fb_state_dirty(r300, R300_CHANGED_HYPERZ_FLAG); } /* Clear. */ @@ -225,7 +250,7 @@ static void r300_clear(struct pipe_context* pipe, buffers, rgba, depth, stencil); r300_blitter_end(r300); } else if (r300->zmask_clear.dirty) { - /* Just clear zmask and hiz now, this does not use a standard draw + /* Just clear zmask and hiz now, this does not use the standard draw * procedure. */ unsigned dwords; @@ -257,16 +282,15 @@ static void r300_clear(struct pipe_context* pipe, if (r300->cbzb_clear) { r300->cbzb_clear = FALSE; hyperz->zb_depthclearvalue = hyperz_dcv; - r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG); + r300_mark_fb_state_dirty(r300, R300_CHANGED_HYPERZ_FLAG); } /* Enable fastfill and/or hiz. * * If we cleared zmask/hiz, it's in use now. The Hyper-Z state update * looks if zmask/hiz is in use and enables fastfill accordingly. */ - if (zstex && - (zstex->zmask_in_use[fb->zsbuf->u.tex.level] || - zstex->hiz_in_use[fb->zsbuf->u.tex.level])) { + if (r300->zmask_in_use || + (zstex && zstex->hiz_in_use[fb->zsbuf->u.tex.level])) { r300_mark_atom_dirty(r300, &r300->hyperz_state); } } @@ -280,16 +304,16 @@ static void r300_clear_render_target(struct pipe_context *pipe, { struct r300_context *r300 = r300_context(pipe); - /* Decompress zbuffers that are bound as textures. If we didn't flush here, - * it would happen inside the blitter when updating derived state, - * causing a blitter operation to be called from inside the blitter, - * which would overwrite saved states and they would never get restored. */ - r300_flush_depth_textures(r300); + r300->zmask_locked = TRUE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); r300_blitter_begin(r300, R300_CLEAR_SURFACE); util_blitter_clear_render_target(r300->blitter, dst, rgba, dstx, dsty, width, height); r300_blitter_end(r300); + + r300->zmask_locked = FALSE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); } /* Clear a region of a depth stencil surface. */ @@ -302,83 +326,70 @@ static void r300_clear_depth_stencil(struct pipe_context *pipe, unsigned width, unsigned height) { struct r300_context *r300 = r300_context(pipe); + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; - /* Decompress zbuffers that are bound as textures. If we didn't flush here, - * it would happen inside the blitter when updating derived state, - * causing a blitter operation to be called from inside the blitter, - * which would overwrite saved states and they would never get restored. */ - r300_flush_depth_textures(r300); + if (r300->zmask_in_use && !r300->zmask_locked) { + if (fb->zsbuf->texture == dst->texture) { + r300_decompress_zmask(r300); + } else { + r300->zmask_locked = TRUE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); + } + } r300_blitter_begin(r300, R300_CLEAR_SURFACE); util_blitter_clear_depth_stencil(r300->blitter, dst, clear_flags, depth, stencil, dstx, dsty, width, height); r300_blitter_end(r300); + + if (r300->zmask_locked) { + r300->zmask_locked = FALSE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); + } } -/* Flush a depth stencil buffer. */ -static void r300_flush_depth_stencil(struct pipe_context *pipe, - struct pipe_resource *dst, - unsigned level, - unsigned layer) +void r300_decompress_zmask(struct r300_context *r300) { - struct r300_context *r300 = r300_context(pipe); - struct pipe_surface *dstsurf, surf_tmpl; - struct r300_texture *tex = r300_texture(dst); + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; - if (!tex->zmask_mem[level]) - return; - if (!tex->zmask_in_use[level]) + if (!r300->zmask_in_use || r300->zmask_locked) return; - surf_tmpl.format = dst->format; - surf_tmpl.usage = PIPE_BIND_DEPTH_STENCIL; - surf_tmpl.u.tex.level = level; - surf_tmpl.u.tex.first_layer = layer; - surf_tmpl.u.tex.last_layer = layer; - dstsurf = pipe->create_surface(pipe, dst, &surf_tmpl); - - r300->z_decomp_rd = TRUE; + r300->zmask_decompress = TRUE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); - r300_blitter_begin(r300, R300_CLEAR_SURFACE); - util_blitter_flush_depth_stencil(r300->blitter, dstsurf); + r300_blitter_begin(r300, R300_CLEAR); + util_blitter_clear_depth_custom(r300->blitter, fb->width, fb->height, 0, + r300->dsa_decompress_zmask); r300_blitter_end(r300); - r300->z_decomp_rd = FALSE; - tex->zmask_in_use[level] = FALSE; - pipe_surface_reference(&dstsurf, NULL); + r300->zmask_decompress = FALSE; + r300->zmask_in_use = FALSE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); } -/* We can't use compressed zbuffers as samplers. */ -void r300_flush_depth_textures(struct r300_context *r300) +void r300_decompress_zmask_locked_unsafe(struct r300_context *r300) { - struct r300_textures_state *state = - (struct r300_textures_state*)r300->textures_state.state; - unsigned i, level; - unsigned count = MIN2(state->sampler_view_count, - state->sampler_state_count); - - if (r300->z_decomp_rd) - return; - - for (i = 0; i < count; i++) - if (state->sampler_views[i] && state->sampler_states[i]) { - struct pipe_resource *tex = state->sampler_views[i]->base.texture; - - if (tex->target == PIPE_TEXTURE_3D || - tex->target == PIPE_TEXTURE_CUBE) - continue; + struct pipe_framebuffer_state fb = {0}; + fb.width = r300->locked_zbuffer->width; + fb.height = r300->locked_zbuffer->height; + fb.nr_cbufs = 0; + fb.zsbuf = r300->locked_zbuffer; + + r300->context.set_framebuffer_state(&r300->context, &fb); + r300_decompress_zmask(r300); +} - /* Ignore non-depth textures. - * Also ignore reinterpreted depth textures, e.g. resource_copy. */ - if (!util_format_is_depth_or_stencil(tex->format)) - continue; +void r300_decompress_zmask_locked(struct r300_context *r300) +{ + struct pipe_framebuffer_state saved_fb = {0}; - for (level = 0; level <= tex->last_level; level++) - if (r300_texture(tex)->zmask_in_use[level]) { - /* We don't handle 3D textures and cubemaps yet. */ - r300_flush_depth_stencil(&r300->context, tex, level, 0); - } - } + util_copy_framebuffer_state(&saved_fb, r300->fb_state.state); + r300_decompress_zmask_locked_unsafe(r300); + r300->context.set_framebuffer_state(&r300->context, &saved_fb); + util_unreference_framebuffer_state(&saved_fb); } /* Copy a block of pixels from one surface to another using HW. */ @@ -393,8 +404,6 @@ static void r300_hw_copy_region(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); r300_blitter_begin(r300, R300_COPY); - - /* Do a copy */ util_blitter_copy_region(r300->blitter, dst, dst_level, dstx, dsty, dstz, src, src_level, src_box, TRUE); r300_blitter_end(r300); @@ -409,10 +418,22 @@ static void r300_resource_copy_region(struct pipe_context *pipe, unsigned src_level, const struct pipe_box *src_box) { + struct r300_context *r300 = r300_context(pipe); + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; enum pipe_format old_format = dst->format; enum pipe_format new_format = old_format; const struct util_format_description *desc = util_format_description(old_format); - boolean is_depth; + + if (r300->zmask_in_use && !r300->zmask_locked) { + if (fb->zsbuf->texture == src || + fb->zsbuf->texture == dst) { + r300_decompress_zmask(r300); + } else { + r300->zmask_locked = TRUE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); + } + } if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB || (!pipe->screen->is_format_supported(pipe->screen, @@ -441,11 +462,6 @@ static void r300_resource_copy_region(struct pipe_context *pipe, } } - is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0; - if (is_depth) { - r300_flush_depth_stencil(pipe, src, src_level, src_box->z); - } - if (old_format != new_format) { r300_texture_reinterpret_format(pipe->screen, dst, new_format); @@ -462,6 +478,11 @@ static void r300_resource_copy_region(struct pipe_context *pipe, r300_texture_reinterpret_format(pipe->screen, src, old_format); } + + if (r300->zmask_locked) { + r300->zmask_locked = FALSE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); + } } void r300_init_blit_functions(struct r300_context *r300) diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c index 2b183f62c5..15dc6d09ee 100644 --- a/src/gallium/drivers/r300/r300_chipset.c +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -424,5 +424,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) } caps->is_rv350 = caps->family >= CHIP_FAMILY_RV350; + /* XXX The 8x8 compression mode doesn't always work (piglit/fbo-depth fails). */ + caps->z_compress = /*caps->is_rv350 ? R300_ZCOMP_8X8 :*/ R300_ZCOMP_4X4; caps->dxtc_swizzle = caps->is_r400 || caps->is_r500; } diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h index f2035d2009..0be161fa07 100644 --- a/src/gallium/drivers/r300/r300_chipset.h +++ b/src/gallium/drivers/r300/r300_chipset.h @@ -33,6 +33,13 @@ #define PIPE_ZMASK_SIZE 4096 #define RV3xx_ZMASK_SIZE 5120 +/* The size of a compressed tile. Each compressed tile takes 2 bits + * in the ZMASK RAM, so there is always 16 tiles per one dword. */ +enum r300_zmask_compression { + R300_ZCOMP_4X4 = 4, + R300_ZCOMP_8X8 = 8 +}; + /* Structure containing all the possible information about a specific Radeon * in the R3xx, R4xx, and R5xx families. */ struct r300_capabilities { @@ -50,10 +57,12 @@ struct r300_capabilities { unsigned num_tex_units; /* Whether or not TCL is physically present */ boolean has_tcl; - /* Some chipsets do not have HiZ RAM - other have varying amounts . */ + /* Some chipsets do not have HiZ RAM - other have varying amounts. */ int hiz_ram; - /* some chipsets have zmask ram per pipe some don't */ + /* Some chipsets have zmask ram per pipe some don't. */ int zmask_ram; + /* Compression mode for ZMASK. */ + enum r300_zmask_compression z_compress; /* Whether or not this is RV350 or newer, including all r400 and r500 * chipsets. The differences compared to the oldest r300 chips are: * - Blend LTE/GTE thresholds diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index e265bdbd3b..552df2b476 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -94,6 +94,9 @@ static void r300_release_referenced_objects(struct r300_context *r300) remove_from_list(query); FREE(query); } + + r300->context.delete_depth_stencil_alpha_state(&r300->context, + r300->dsa_decompress_zmask); } static void r300_destroy_context(struct pipe_context* context) @@ -116,9 +119,6 @@ static void r300_destroy_context(struct pipe_context* context) /* XXX: This function assumes r300->query_list was initialized */ r300_release_referenced_objects(r300); - if (r300->zmask_mm) - r300_hyperz_destroy_mm(r300); - if (r300->cs) r300->rws->cs_destroy(r300->cs); @@ -238,7 +238,7 @@ static boolean r300_setup_atoms(struct r300_context* r300) if (has_hiz_ram) R300_INIT_ATOM(hiz_clear, 0); /* zmask clear */ - R300_INIT_ATOM(zmask_clear, 0); + R300_INIT_ATOM(zmask_clear, 4); } /* ZB (unpipelined), SU. */ R300_INIT_ATOM(query_start, 4); @@ -513,6 +513,15 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->dummy_vb = screen->resource_create(screen, &vb); } + { + struct pipe_depth_stencil_alpha_state dsa = {}; + dsa.depth.writemask = 1; + + r300->dsa_decompress_zmask = + r300->context.create_depth_stencil_alpha_state(&r300->context, + &dsa); + } + return &r300->context; fail: diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 9030f1bb98..6e96ae85ff 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -406,8 +406,6 @@ struct r300_texture { /* hyper-z memory allocs */ struct mem_block *hiz_mem[R300_MAX_TEXTURE_LEVELS]; - struct mem_block *zmask_mem[R300_MAX_TEXTURE_LEVELS]; - boolean zmask_in_use[R300_MAX_TEXTURE_LEVELS]; boolean hiz_in_use[R300_MAX_TEXTURE_LEVELS]; /* This is the level tiling flags were last time set for. @@ -589,15 +587,21 @@ struct r300_context { boolean two_sided_color; /* Incompatible vertex buffer layout? (misaligned stride or buffer_offset) */ boolean incompatible_vb_layout; -#define R300_Z_COMPRESS_44 1 -#define RV350_Z_COMPRESS_88 2 - int z_compression; + boolean cbzb_clear; - boolean z_decomp_rd; + /* Whether ZMASK is enabled. */ + boolean zmask_in_use; + /* Whether ZMASK is being decompressed. */ + boolean zmask_decompress; + /* Whether ZMASK is locked, i.e. should be disabled and cannot be taken over. */ + boolean zmask_locked; + /* The zbuffer the ZMASK of which is locked. */ + struct pipe_surface *locked_zbuffer; + + void *dsa_decompress_zmask; /* two mem block managers for hiz/zmask ram space */ struct mem_block *hiz_mm; - struct mem_block *zmask_mm; /* upload managers */ struct u_upload_mgr *upload_vb; @@ -687,7 +691,9 @@ void r300_init_state_functions(struct r300_context* r300); void r300_init_resource_functions(struct r300_context* r300); /* r300_blit.c */ -void r300_flush_depth_textures(struct r300_context *r300); +void r300_decompress_zmask(struct r300_context *r300); +void r300_decompress_zmask_locked_unsafe(struct r300_context *r300); +void r300_decompress_zmask_locked(struct r300_context *r300); /* r300_query.c */ void r300_resume_query(struct r300_context *r300, @@ -713,8 +719,7 @@ void r500_emit_index_bias(struct r300_context *r300, int index_bias); /* r300_state.c */ enum r300_fb_state_change { R300_CHANGED_FB_STATE = 0, - R300_CHANGED_CBZB_FLAG, - R300_CHANGED_ZCLEAR_FLAG, + R300_CHANGED_HYPERZ_FLAG, R300_CHANGED_MULTIWRITE }; diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index d14cdcbbaf..54e263436b 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -433,6 +433,7 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) tex = r300_texture(surf->base.texture); surf_pitch = surf->pitch & R300_DEPTHPITCH_MASK; + /* HiZ RAM. */ if (r300->screen->caps.hiz_ram) { if (tex->hiz_mem[level]) { @@ -443,14 +444,10 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_REG(R300_ZB_HIZ_PITCH, 0); } } + /* Z Mask RAM. (compressed zbuffer) */ - if (tex->zmask_mem[level]) { - OUT_CS_REG(R300_ZB_ZMASK_OFFSET, tex->zmask_mem[level]->ofs << 2); - OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf_pitch); - } else { - OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0); - OUT_CS_REG(R300_ZB_ZMASK_PITCH, 0); - } + OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0); + OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf_pitch); } } @@ -462,6 +459,7 @@ void r300_emit_hyperz_state(struct r300_context *r300, { struct r300_hyperz_state *z = state; CS_LOCALS(r300); + if (z->flush) WRITE_CS_TABLE(&z->cb_flush_begin, size); else @@ -1097,17 +1095,6 @@ static void r300_emit_hiz_line_clear(struct r300_context *r300, int start, uint1 END_CS; } -static void r300_emit_zmask_line_clear(struct r300_context *r300, int start, uint16_t count, uint32_t val) -{ - CS_LOCALS(r300); - BEGIN_CS(4); - OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_ZMASK, 2); - OUT_CS(start); - OUT_CS(count); - OUT_CS(val); - END_CS; -} - #define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state) @@ -1153,42 +1140,49 @@ void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state { struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; - struct r300_screen* r300screen = r300->screen; - uint32_t stride, offset = 0; - struct r300_texture* tex; - uint32_t i, height; - int mult, offset_shift; - - tex = r300_texture(fb->zsbuf->texture); - stride = tex->desc.stride_in_pixels[fb->zsbuf->u.tex.level]; - - offset = tex->zmask_mem[fb->zsbuf->u.tex.level]->ofs; + struct r300_texture *tex; + unsigned numdw, pipes; + unsigned compsize = r300->screen->caps.z_compress; + /* The tile size of 1 DWORD is: + * + * GPU Pipes 4x4 mode 8x8 mode + * ------------------------------------------ + * R580 4P/1Z 32x32 64x64 + * RV570 3P/1Z 48x16 96x32 + * RV530 1P/2Z 32x16 64x32 + */ + static unsigned num_blocks_x_per_dw[4] = {4, 8, 12, 8}; + static unsigned num_blocks_y_per_dw[4] = {4, 4, 4, 8}; + CS_LOCALS(r300); - if (r300->z_compression == RV350_Z_COMPRESS_88) - mult = 8; - else - mult = 4; + if (r300->screen->caps.family == CHIP_FAMILY_RV530) { + pipes = r300->screen->caps.num_z_pipes; + } else { + pipes = r300->screen->caps.num_frag_pipes; + } - height = ALIGN_DIVUP(fb->zsbuf->height, mult); + tex = r300_texture(fb->zsbuf->texture); - offset_shift = 4; - offset_shift += (r300screen->caps.num_frag_pipes / 2); - stride = ALIGN_DIVUP(stride, r300screen->caps.num_frag_pipes); + /* Get the zbuffer size (with the aligned width and height). */ + numdw = align(tex->desc.stride_in_pixels[fb->zsbuf->u.tex.level], + num_blocks_x_per_dw[pipes-1] * compsize) * + align(fb->zsbuf->height, + num_blocks_y_per_dw[pipes-1] * compsize); - /* okay have width in pixels - divide by block width */ - stride = ALIGN_DIVUP(stride, mult); - /* have width in blocks - divide by number of fragment pipes screen width */ - /* 16 blocks per dword */ - stride = ALIGN_DIVUP(stride, 16); + /* Convert pixels -> dwords. */ + numdw = ALIGN_DIVUP(numdw, num_blocks_x_per_dw[pipes-1] * compsize * + num_blocks_y_per_dw[pipes-1] * compsize); - for (i = 0; i < height; i++) { - offset = i * stride; - offset <<= offset_shift; - r300_emit_zmask_line_clear(r300, offset, stride, 0x0);//0xffffffff); - } + BEGIN_CS(size); + OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_ZMASK, 2); + OUT_CS(0); + OUT_CS(numdw); + OUT_CS(0); + END_CS; /* Mark the current zbuffer's zmask as in use. */ - tex->zmask_in_use[fb->zsbuf->u.tex.level] = TRUE; + r300->zmask_in_use = TRUE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); } void r300_emit_ztop_state(struct r300_context* r300, diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c index c22e307c67..d996d19175 100644 --- a/src/gallium/drivers/r300/r300_hyperz.c +++ b/src/gallium/drivers/r300/r300_hyperz.c @@ -127,7 +127,7 @@ static boolean r300_can_hiz(struct r300_context *r300) z->current_func, dsa_state->z_stencil_control); return FALSE; } - } + } return TRUE; } @@ -139,7 +139,6 @@ static void r300_update_hyperz(struct r300_context* r300) (struct pipe_framebuffer_state*)r300->fb_state.state; struct r300_texture *zstex = fb->zsbuf ? r300_texture(fb->zsbuf->texture) : NULL; - boolean zmask_in_use = FALSE; boolean hiz_in_use = FALSE; z->gb_z_peq_config = 0; @@ -158,42 +157,40 @@ static void r300_update_hyperz(struct r300_context* r300) if (!r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) return; - zmask_in_use = zstex->zmask_in_use[fb->zsbuf->u.tex.level]; hiz_in_use = zstex->hiz_in_use[fb->zsbuf->u.tex.level]; - /* Z fastfill. */ - if (zmask_in_use) { - z->zb_bw_cntl |= R300_FAST_FILL_ENABLE; /* | R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE;*/ - } - /* Zbuffer compression. */ - if (zmask_in_use && r300->z_compression) { - z->zb_bw_cntl |= R300_RD_COMP_ENABLE; - if (r300->z_decomp_rd == false) + if (r300->zmask_in_use && !r300->zmask_locked) { + z->zb_bw_cntl |= R300_FAST_FILL_ENABLE | + /*R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE |*/ + R300_RD_COMP_ENABLE; + + if (!r300->zmask_decompress) { z->zb_bw_cntl |= R300_WR_COMP_ENABLE; + } + + if (r300->screen->caps.z_compress == R300_ZCOMP_8X8) { + z->gb_z_peq_config |= R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8; + } } - /* RV350 and up optimizations. */ - /* The section 10.4.9 in the docs is a lie. */ - if (r300->z_compression == RV350_Z_COMPRESS_88) - z->gb_z_peq_config |= R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8; - - if (hiz_in_use) { - bool can_hiz = r300_can_hiz(r300); - if (can_hiz) { - z->zb_bw_cntl |= R300_HIZ_ENABLE; - z->sc_hyperz |= R300_SC_HYPERZ_ENABLE; - z->sc_hyperz |= r300_get_sc_hz_max(r300); - z->zb_bw_cntl |= r300_get_hiz_min(r300); + + if (hiz_in_use && r300_can_hiz(r300)) { + z->zb_bw_cntl |= R300_HIZ_ENABLE | + r300_get_hiz_min(r300); + + z->sc_hyperz |= R300_SC_HYPERZ_ENABLE | + r300_get_sc_hz_max(r300); + + if (r300->screen->caps.is_r500) { + z->zb_bw_cntl |= R500_HIZ_FP_EXP_BITS_3 | + R500_HIZ_EQUAL_REJECT_ENABLE; } } /* R500-specific features and optimizations. */ if (r300->screen->caps.is_r500) { - z->zb_bw_cntl |= R500_HIZ_FP_EXP_BITS_3; - z->zb_bw_cntl |= - R500_HIZ_EQUAL_REJECT_ENABLE | - R500_PEQ_PACKING_ENABLE | - R500_COVERED_PTR_MASKING_ENABLE; + z->zb_bw_cntl |= R500_PEQ_PACKING_ENABLE | + R500_COVERED_PTR_MASKING_ENABLE; } } @@ -297,26 +294,10 @@ static void r300_update_hiz_clear(struct r300_context *r300) r300->hiz_clear.size = height * 4; } -static void r300_update_zmask_clear(struct r300_context *r300) -{ - struct pipe_framebuffer_state *fb = - (struct pipe_framebuffer_state*)r300->fb_state.state; - uint32_t height; - int mult; - - if (r300->z_compression == RV350_Z_COMPRESS_88) - mult = 8; - else - mult = 4; - - height = ALIGN_DIVUP(fb->zsbuf->height, mult); - - r300->zmask_clear.size = height * 4; -} - void r300_update_hyperz_state(struct r300_context* r300) { r300_update_ztop(r300); + if (r300->hyperz_state.dirty) { r300_update_hyperz(r300); } @@ -324,9 +305,6 @@ void r300_update_hyperz_state(struct r300_context* r300) if (r300->hiz_clear.dirty) { r300_update_hiz_clear(r300); } - if (r300->zmask_clear.dirty) { - r300_update_zmask_clear(r300); - } } void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf) @@ -345,43 +323,6 @@ void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf) ndw = ALIGN_DIVUP(zsize, 64); tex->hiz_mem[level] = u_mmAllocMem(r300->hiz_mm, ndw, 0, 0); - return; -} - -void r300_zmask_alloc_block(struct r300_context *r300, struct r300_surface *surf, int compress) -{ - int bsize = 256; - uint32_t zsize, ndw; - int level = surf->base.u.tex.level; - struct r300_texture *tex; - - tex = r300_texture(surf->base.texture); - - /* We currently don't handle decompression for 3D textures and cubemaps - * correctly. */ - if (tex->desc.b.b.target != PIPE_TEXTURE_1D && - tex->desc.b.b.target != PIPE_TEXTURE_2D && - tex->desc.b.b.target != PIPE_TEXTURE_RECT) - return; - - /* Cannot flush zmask of 16-bit zbuffers. */ - if (util_format_get_blocksizebits(tex->desc.b.b.format) == 16) - return; - - if (tex->zmask_mem[level]) - return; - - zsize = tex->desc.layer_size_in_bytes[level]; - zsize /= util_format_get_blocksize(tex->desc.b.b.format); - - /* each zmask dword represents 16 4x4 blocks - which is 256 pixels - or 16 8x8 depending on the gb peq flag = 1024 pixels */ - if (compress == RV350_Z_COMPRESS_88) - bsize = 1024; - - ndw = ALIGN_DIVUP(zsize, bsize); - tex->zmask_mem[level] = u_mmAllocMem(r300->zmask_mm, ndw, 0, 0); - return; } boolean r300_hyperz_init_mm(struct r300_context *r300) @@ -389,15 +330,9 @@ boolean r300_hyperz_init_mm(struct r300_context *r300) struct r300_screen* r300screen = r300->screen; int frag_pipes = r300screen->caps.num_frag_pipes; - r300->zmask_mm = u_mmInit(0, r300screen->caps.zmask_ram * frag_pipes); - if (!r300->zmask_mm) - return FALSE; - if (r300screen->caps.hiz_ram) { r300->hiz_mm = u_mmInit(0, r300screen->caps.hiz_ram * frag_pipes); if (!r300->hiz_mm) { - u_mmDestroy(r300->zmask_mm); - r300->zmask_mm = NULL; return FALSE; } } @@ -413,7 +348,4 @@ void r300_hyperz_destroy_mm(struct r300_context *r300) u_mmDestroy(r300->hiz_mm); r300->hiz_mm = NULL; } - - u_mmDestroy(r300->zmask_mm); - r300->zmask_mm = NULL; } diff --git a/src/gallium/drivers/r300/r300_hyperz.h b/src/gallium/drivers/r300/r300_hyperz.h index 30a23ec649..d4c8e7c60a 100644 --- a/src/gallium/drivers/r300/r300_hyperz.h +++ b/src/gallium/drivers/r300/r300_hyperz.h @@ -28,8 +28,8 @@ struct r300_context; void r300_update_hyperz_state(struct r300_context* r300); void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf); -void r300_zmask_alloc_block(struct r300_context *r300, struct r300_surface *surf, int compress); boolean r300_hyperz_init_mm(struct r300_context *r300); void r300_hyperz_destroy_mm(struct r300_context *r300); + #endif diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 2664c1dc83..ba456d413f 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -694,8 +694,7 @@ void r300_mark_fb_state_dirty(struct r300_context *r300, } if (change == R300_CHANGED_FB_STATE || - change == R300_CHANGED_CBZB_FLAG || - change == R300_CHANGED_ZCLEAR_FLAG) { + change == R300_CHANGED_HYPERZ_FLAG) { r300_mark_atom_dirty(r300, &r300->hyperz_state); } @@ -719,8 +718,8 @@ void r300_mark_fb_state_dirty(struct r300_context *r300, } static void - r300_set_framebuffer_state(struct pipe_context* pipe, - const struct pipe_framebuffer_state* state) +r300_set_framebuffer_state(struct pipe_context* pipe, + const struct pipe_framebuffer_state* state) { struct r300_context* r300 = r300_context(pipe); struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state; @@ -728,7 +727,6 @@ static void boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); unsigned max_width, max_height, i; uint32_t zbuffer_bpp = 0; - int blocksize; if (r300->screen->caps.is_r500) { max_width = max_height = 4096; @@ -744,6 +742,32 @@ static void return; } + if (old_state->zsbuf && r300->zmask_in_use && !r300->zmask_locked) { + /* There is a zmask in use, what are we gonna do? */ + if (state->zsbuf) { + if (!pipe_surface_equal(old_state->zsbuf, state->zsbuf)) { + /* Decompress the currently bound zbuffer before we bind another one. */ + r300_decompress_zmask(r300); + } + } else { + /* We don't bind another zbuffer, so lock the current one. */ + r300->zmask_locked = TRUE; + pipe_surface_reference(&r300->locked_zbuffer, old_state->zsbuf); + } + } else if (r300->zmask_locked && r300->locked_zbuffer) { + /* We have a locked zbuffer now, what are we gonna do? */ + if (state->zsbuf) { + if (!pipe_surface_equal(r300->locked_zbuffer, state->zsbuf)) { + /* We are binding some other zbuffer, so decompress the locked one, + * it gets unlocked automatically. */ + r300_decompress_zmask_locked_unsafe(r300); + } else { + /* We are binding the locked zbuffer again, so unlock it. */ + r300->zmask_locked = FALSE; + } + } + } + /* If nr_cbufs is changed from zero to non-zero or vice versa... */ if (!!old_state->nr_cbufs != !!state->nr_cbufs) { r300_mark_atom_dirty(r300, &r300->blend_state); @@ -758,14 +782,15 @@ static void util_copy_framebuffer_state(r300->fb_state.state, state); + if (!r300->zmask_locked) { + pipe_surface_reference(&r300->locked_zbuffer, NULL); + } + r300_mark_fb_state_dirty(r300, R300_CHANGED_FB_STATE); r300->validate_buffers = TRUE; - r300->z_compression = false; - if (state->zsbuf) { - blocksize = util_format_get_blocksize(state->zsbuf->texture->format); - switch (blocksize) { + switch (util_format_get_blocksize(state->zsbuf->texture->format)) { case 2: zbuffer_bpp = 16; break; @@ -773,30 +798,19 @@ static void zbuffer_bpp = 24; break; } + + /* Setup Hyper-Z. */ if (can_hyperz) { struct r300_surface *zs_surf = r300_surface(state->zsbuf); - struct r300_texture *tex; - int compress = r300->screen->caps.is_rv350 ? RV350_Z_COMPRESS_88 : R300_Z_COMPRESS_44; + struct r300_texture *tex = r300_texture(zs_surf->base.texture); int level = zs_surf->base.u.tex.level; - tex = r300_texture(zs_surf->base.texture); - /* work out whether we can support hiz on this buffer */ r300_hiz_alloc_block(r300, zs_surf); - - /* work out whether we can support zmask features on this buffer */ - r300_zmask_alloc_block(r300, zs_surf, compress); - - if (tex->zmask_mem[level]) { - /* compression causes hangs on 16-bit */ - if (zbuffer_bpp == 24) - r300->z_compression = compress; - } + DBG(r300, DBG_HYPERZ, - "hyper-z features: hiz: %d @ %08x z-compression: %d z-fastfill: %d @ %08x\n", tex->hiz_mem[level] ? 1 : 0, - tex->hiz_mem[level] ? tex->hiz_mem[level]->ofs : 0xdeadbeef, - r300->z_compression, tex->zmask_mem[level] ? 1 : 0, - tex->zmask_mem[level] ? tex->zmask_mem[level]->ofs : 0xdeadbeef); + "hyper-z features: hiz: %d @ %08x\n", tex->hiz_mem[level] ? 1 : 0, + tex->hiz_mem[level] ? tex->hiz_mem[level]->ofs : 0xdeadbeef); } /* Polygon offset depends on the zbuffer bit depth. */ diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 95be7849f8..de4c271328 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -862,11 +862,35 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) } } -void r300_update_derived_state(struct r300_context* r300) +static void r300_decompress_depth_textures(struct r300_context *r300) { - r300_flush_depth_textures(r300); + struct r300_textures_state *state = + (struct r300_textures_state*)r300->textures_state.state; + struct pipe_resource *tex; + unsigned count = MIN2(state->sampler_view_count, + state->sampler_state_count); + unsigned i; + + if (!r300->zmask_locked || !r300->locked_zbuffer) { + return; + } + + for (i = 0; i < count; i++) { + if (state->sampler_views[i] && state->sampler_states[i]) { + tex = state->sampler_views[i]->base.texture; + if (tex == r300->locked_zbuffer->texture) { + r300_decompress_zmask_locked(r300); + return; + } + } + } +} + +void r300_update_derived_state(struct r300_context* r300) +{ if (r300->textures_state.dirty) { + r300_decompress_depth_textures(r300); r300_merge_textures_and_samplers(r300); } diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index a5fbe855e7..ca2762809d 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -743,8 +743,6 @@ static void r300_texture_destroy(struct pipe_screen *screen, for (i = 0; i < R300_MAX_TEXTURE_LEVELS; i++) { if (tex->hiz_mem[i]) u_mmFreeMem(tex->hiz_mem[i]); - if (tex->zmask_mem[i]) - u_mmFreeMem(tex->zmask_mem[i]); } FREE(tex); -- cgit v1.2.3