From db299a9f8244d53d9041fcdbd396a77ebe1f9e3e Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Tue, 25 Jan 2011 05:37:52 +0100 Subject: r300g: fix some bugs with zbuffer compression (v4) This drops the memblock manager for ZMASK. Instead, only one zbuffer can be compressed at a time. Note that this does not necessarily have to be slower. When there is a large number of zbuffers, compression might be used more often than it was before. It's also easier to debug. How it works: 1) 'clear' turns the compression on. 2) If some other zbuffer is set or the currently-bound zbuffer is used for texturing, the driver decompresses it and then turns the compression off. Notes: - The ZMASK clear has been refactored, so that only one packet3 is used to clear ZMASK. - The 8x8 compression mode is disabled. I couldn't make it work without issues. - Also removed driver-specific stuff from u_blitter. Driver status: - RV530 and R580 appear to just work (finally). - RV570 should work, but there may be an issue that we don't correctly calculate the number of dwords to clear, resulting in a partially uninitialized zbuffer. - RS690 misrenders as if no ZMASK clear happened. No idea what's going on. - RV350 may even hardlock. This issue was already present and this patch doesn't fix it. I think we are still missing some hardware info we need to make the zbuffer compression work fully. Note that there is also an issue with HiZ, resulting in a sort of blocky zigzagged corruption around some objects. --- src/gallium/drivers/r300/r300_emit.c | 88 +++++++++++++++++------------------- 1 file changed, 41 insertions(+), 47 deletions(-) (limited to 'src/gallium/drivers/r300/r300_emit.c') diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index d14cdcbbaf..54e263436b 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -433,6 +433,7 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) tex = r300_texture(surf->base.texture); surf_pitch = surf->pitch & R300_DEPTHPITCH_MASK; + /* HiZ RAM. */ if (r300->screen->caps.hiz_ram) { if (tex->hiz_mem[level]) { @@ -443,14 +444,10 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_REG(R300_ZB_HIZ_PITCH, 0); } } + /* Z Mask RAM. (compressed zbuffer) */ - if (tex->zmask_mem[level]) { - OUT_CS_REG(R300_ZB_ZMASK_OFFSET, tex->zmask_mem[level]->ofs << 2); - OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf_pitch); - } else { - OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0); - OUT_CS_REG(R300_ZB_ZMASK_PITCH, 0); - } + OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0); + OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf_pitch); } } @@ -462,6 +459,7 @@ void r300_emit_hyperz_state(struct r300_context *r300, { struct r300_hyperz_state *z = state; CS_LOCALS(r300); + if (z->flush) WRITE_CS_TABLE(&z->cb_flush_begin, size); else @@ -1097,17 +1095,6 @@ static void r300_emit_hiz_line_clear(struct r300_context *r300, int start, uint1 END_CS; } -static void r300_emit_zmask_line_clear(struct r300_context *r300, int start, uint16_t count, uint32_t val) -{ - CS_LOCALS(r300); - BEGIN_CS(4); - OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_ZMASK, 2); - OUT_CS(start); - OUT_CS(count); - OUT_CS(val); - END_CS; -} - #define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state) @@ -1153,42 +1140,49 @@ void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state { struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; - struct r300_screen* r300screen = r300->screen; - uint32_t stride, offset = 0; - struct r300_texture* tex; - uint32_t i, height; - int mult, offset_shift; - - tex = r300_texture(fb->zsbuf->texture); - stride = tex->desc.stride_in_pixels[fb->zsbuf->u.tex.level]; - - offset = tex->zmask_mem[fb->zsbuf->u.tex.level]->ofs; + struct r300_texture *tex; + unsigned numdw, pipes; + unsigned compsize = r300->screen->caps.z_compress; + /* The tile size of 1 DWORD is: + * + * GPU Pipes 4x4 mode 8x8 mode + * ------------------------------------------ + * R580 4P/1Z 32x32 64x64 + * RV570 3P/1Z 48x16 96x32 + * RV530 1P/2Z 32x16 64x32 + */ + static unsigned num_blocks_x_per_dw[4] = {4, 8, 12, 8}; + static unsigned num_blocks_y_per_dw[4] = {4, 4, 4, 8}; + CS_LOCALS(r300); - if (r300->z_compression == RV350_Z_COMPRESS_88) - mult = 8; - else - mult = 4; + if (r300->screen->caps.family == CHIP_FAMILY_RV530) { + pipes = r300->screen->caps.num_z_pipes; + } else { + pipes = r300->screen->caps.num_frag_pipes; + } - height = ALIGN_DIVUP(fb->zsbuf->height, mult); + tex = r300_texture(fb->zsbuf->texture); - offset_shift = 4; - offset_shift += (r300screen->caps.num_frag_pipes / 2); - stride = ALIGN_DIVUP(stride, r300screen->caps.num_frag_pipes); + /* Get the zbuffer size (with the aligned width and height). */ + numdw = align(tex->desc.stride_in_pixels[fb->zsbuf->u.tex.level], + num_blocks_x_per_dw[pipes-1] * compsize) * + align(fb->zsbuf->height, + num_blocks_y_per_dw[pipes-1] * compsize); - /* okay have width in pixels - divide by block width */ - stride = ALIGN_DIVUP(stride, mult); - /* have width in blocks - divide by number of fragment pipes screen width */ - /* 16 blocks per dword */ - stride = ALIGN_DIVUP(stride, 16); + /* Convert pixels -> dwords. */ + numdw = ALIGN_DIVUP(numdw, num_blocks_x_per_dw[pipes-1] * compsize * + num_blocks_y_per_dw[pipes-1] * compsize); - for (i = 0; i < height; i++) { - offset = i * stride; - offset <<= offset_shift; - r300_emit_zmask_line_clear(r300, offset, stride, 0x0);//0xffffffff); - } + BEGIN_CS(size); + OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_ZMASK, 2); + OUT_CS(0); + OUT_CS(numdw); + OUT_CS(0); + END_CS; /* Mark the current zbuffer's zmask as in use. */ - tex->zmask_in_use[fb->zsbuf->u.tex.level] = TRUE; + r300->zmask_in_use = TRUE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); } void r300_emit_ztop_state(struct r300_context* r300, -- cgit v1.2.3