summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/r300
diff options
context:
space:
mode:
authorMarek Olšák <maraeo@gmail.com>2011-01-25 05:37:52 +0100
committerMarek Olšák <maraeo@gmail.com>2011-01-27 18:12:01 +0100
commitdb299a9f8244d53d9041fcdbd396a77ebe1f9e3e (patch)
tree60adc94f4a132101be84579bf57988283e20ef90 /src/gallium/drivers/r300
parent7a4345fd83605695dc641af503f6e87b808b48d7 (diff)
r300g: fix some bugs with zbuffer compression (v4)
This drops the memblock manager for ZMASK. Instead, only one zbuffer can be compressed at a time. Note that this does not necessarily have to be slower. When there is a large number of zbuffers, compression might be used more often than it was before. It's also easier to debug. How it works: 1) 'clear' turns the compression on. 2) If some other zbuffer is set or the currently-bound zbuffer is used for texturing, the driver decompresses it and then turns the compression off. Notes: - The ZMASK clear has been refactored, so that only one packet3 is used to clear ZMASK. - The 8x8 compression mode is disabled. I couldn't make it work without issues. - Also removed driver-specific stuff from u_blitter. Driver status: - RV530 and R580 appear to just work (finally). - RV570 should work, but there may be an issue that we don't correctly calculate the number of dwords to clear, resulting in a partially uninitialized zbuffer. - RS690 misrenders as if no ZMASK clear happened. No idea what's going on. - RV350 may even hardlock. This issue was already present and this patch doesn't fix it. I think we are still missing some hardware info we need to make the zbuffer compression work fully. Note that there is also an issue with HiZ, resulting in a sort of blocky zigzagged corruption around some objects.
Diffstat (limited to 'src/gallium/drivers/r300')
-rw-r--r--src/gallium/drivers/r300/r300_blit.c227
-rw-r--r--src/gallium/drivers/r300/r300_chipset.c2
-rw-r--r--src/gallium/drivers/r300/r300_chipset.h13
-rw-r--r--src/gallium/drivers/r300/r300_context.c17
-rw-r--r--src/gallium/drivers/r300/r300_context.h25
-rw-r--r--src/gallium/drivers/r300/r300_emit.c88
-rw-r--r--src/gallium/drivers/r300/r300_hyperz.c120
-rw-r--r--src/gallium/drivers/r300/r300_hyperz.h2
-rw-r--r--src/gallium/drivers/r300/r300_state.c66
-rw-r--r--src/gallium/drivers/r300/r300_state_derived.c28
-rw-r--r--src/gallium/drivers/r300/r300_texture.c2
11 files changed, 299 insertions, 291 deletions
diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c
index e195128d26..f24d5582e1 100644
--- a/src/gallium/drivers/r300/r300_blit.c
+++ b/src/gallium/drivers/r300/r300_blit.c
@@ -58,8 +58,9 @@ static void r300_blitter_begin(struct r300_context* r300, enum r300_blitter_op o
util_blitter_save_vertex_buffers(r300->blitter, r300->vertex_buffer_count,
r300->vertex_buffer);
- if (op & (R300_CLEAR_SURFACE | R300_COPY))
+ if (op & (R300_CLEAR_SURFACE | R300_COPY)) {
util_blitter_save_framebuffer(r300->blitter, r300->fb_state.state);
+ }
if (op & R300_COPY) {
struct r300_textures_state* state =
@@ -108,6 +109,23 @@ static boolean r300_cbzb_clear_allowed(struct r300_context *r300,
return r300_surface(fb->cbufs[0])->cbzb_allowed;
}
+static boolean r300_fast_zclear_allowed(struct r300_context *r300)
+{
+ struct pipe_framebuffer_state *fb =
+ (struct pipe_framebuffer_state*)r300->fb_state.state;
+
+ /* Cannot decompress zmask with a 16-bit zbuffer.
+ * Also compression causes a hung. */
+ if (util_format_get_blocksizebits(fb->zsbuf->texture->format) == 16)
+ return FALSE;
+
+ /* Cannot use compression with a linear zbuffer. */
+ if (!r300_texture(fb->zsbuf->texture)->desc.microtile)
+ return FALSE;
+
+ return TRUE;
+}
+
static uint32_t r300_depth_clear_value(enum pipe_format format,
double depth, unsigned stencil)
{
@@ -132,37 +150,46 @@ static void r300_clear(struct pipe_context* pipe,
double depth,
unsigned stencil)
{
- /* My notes about fastfill:
+ /* My notes about Zbuffer compression:
*
- * 1) Only the zbuffer is cleared.
+ * 1) The zbuffer must be micro-tiled and whole microtiles must be
+ * written if compression is enabled. If microtiling is disabled,
+ * it locks up.
*
- * 2) The zbuffer must be micro-tiled and whole microtiles must be
- * written. If microtiling is disabled, it locks up.
+ * 2) There is ZMASK RAM which contains a compressed zbuffer.
+ * Each dword of the Z Mask contains compression information
+ * for 16 4x4 pixel tiles, that is 2 bits for each tile.
+ * On chips with 2 Z pipes, every other dword maps to a different
+ * pipe. On newer chipsets, there is a new compression mode
+ * with 8x8 pixel tiles per 2 bits.
*
- * 3) There is Z Mask RAM which contains a compressed zbuffer and
- * it interacts with fastfill. We should figure out how to use it
- * to get more performance.
- * This is what we know about the Z Mask:
+ * 3) The FASTFILL bit has nothing to do with filling. It only tells hw
+ * it should look in the ZMASK RAM first before fetching from a real
+ * zbuffer.
*
- * Each dword of the Z Mask contains compression information
- * for 16 4x4 pixel blocks, that is 2 bits for each block.
- * On chips with 2 Z pipes, every other dword maps to a different
- * pipe.
+ * 4) If a pixel is in a cleared state, ZB_DEPTHCLEARVALUE is returned
+ * during zbuffer reads instead of the value that is actually stored
+ * in the zbuffer memory. A pixel is in a cleared state when its ZMASK
+ * is equal to 0. Therefore, if you clear ZMASK with zeros, you may
+ * leave the zbuffer memory uninitialized, but then you must enable
+ * compression, so that the ZMASK RAM is actually used.
*
- * 4) ZB_DEPTHCLEARVALUE is used to clear the zbuffer and the Z Mask must
- * be equal to 0. (clear the Z Mask RAM with zeros)
+ * 5) Each 4x4 (or 8x8) tile is automatically decompressed and recompressed
+ * during zbuffer updates. A special decompressing operation should be
+ * used to fully decompress a zbuffer, which basically just stores all
+ * compressed tiles in ZMASK to the zbuffer memory.
*
- * 5) For 16-bit zbuffer, compression causes a hung with one or
+ * 6) For a 16-bit zbuffer, compression causes a hung with one or
* two samples and should not be used.
*
- * 6) FORCE_COMPRESSED_STENCIL_VALUE should be enabled for stencil clears
+ * 7) FORCE_COMPRESSED_STENCIL_VALUE should be enabled for stencil clears
* to avoid needless decompression.
*
- * 7) Fastfill must not be used if reading of compressed Z data is disabled
+ * 8) Fastfill must not be used if reading of compressed Z data is disabled
* and writing of compressed Z data is enabled (RD/WR_COMP_ENABLE),
* i.e. it cannot be used to compress the zbuffer.
*
- * 8) ZB_CB_CLEAR does not interact with fastfill in any way.
+ * 9) ZB_CB_CLEAR does not interact with zbuffer compression in any way.
*
* - Marek
*/
@@ -179,25 +206,23 @@ static void r300_clear(struct pipe_context* pipe,
boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ);
uint32_t hyperz_dcv = hyperz->zb_depthclearvalue;
- /* Decompress zbuffers that are bound as textures. If we didn't flush here,
- * it would happen inside the blitter when updating derived state,
- * causing a blitter operation to be called from inside the blitter,
- * which would overwrite saved states and they would never get restored. */
- r300_flush_depth_textures(r300);
-
/* Enable fast Z clear.
* The zbuffer must be in micro-tiled mode, otherwise it locks up. */
if ((buffers & PIPE_CLEAR_DEPTHSTENCIL) && can_hyperz) {
hyperz_dcv = hyperz->zb_depthclearvalue =
r300_depth_clear_value(fb->zsbuf->format, depth, stencil);
- r300_mark_fb_state_dirty(r300, R300_CHANGED_ZCLEAR_FLAG);
- if (zstex->zmask_mem[fb->zsbuf->u.tex.level]) {
+ if (r300_fast_zclear_allowed(r300)) {
r300_mark_atom_dirty(r300, &r300->zmask_clear);
buffers &= ~PIPE_CLEAR_DEPTHSTENCIL;
}
+
if (zstex->hiz_mem[fb->zsbuf->u.tex.level])
r300_mark_atom_dirty(r300, &r300->hiz_clear);
+
+ /* XXX Change this to r300_mark_atom_dirty(r300, &r300->hyperz_state);
+ * once hiz offset is constant. */
+ r300_mark_fb_state_dirty(r300, R300_CHANGED_HYPERZ_FLAG);
}
/* Enable CBZB clear. */
@@ -211,7 +236,7 @@ static void r300_clear(struct pipe_context* pipe,
height = surf->cbzb_height;
r300->cbzb_clear = TRUE;
- r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG);
+ r300_mark_fb_state_dirty(r300, R300_CHANGED_HYPERZ_FLAG);
}
/* Clear. */
@@ -225,7 +250,7 @@ static void r300_clear(struct pipe_context* pipe,
buffers, rgba, depth, stencil);
r300_blitter_end(r300);
} else if (r300->zmask_clear.dirty) {
- /* Just clear zmask and hiz now, this does not use a standard draw
+ /* Just clear zmask and hiz now, this does not use the standard draw
* procedure. */
unsigned dwords;
@@ -257,16 +282,15 @@ static void r300_clear(struct pipe_context* pipe,
if (r300->cbzb_clear) {
r300->cbzb_clear = FALSE;
hyperz->zb_depthclearvalue = hyperz_dcv;
- r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG);
+ r300_mark_fb_state_dirty(r300, R300_CHANGED_HYPERZ_FLAG);
}
/* Enable fastfill and/or hiz.
*
* If we cleared zmask/hiz, it's in use now. The Hyper-Z state update
* looks if zmask/hiz is in use and enables fastfill accordingly. */
- if (zstex &&
- (zstex->zmask_in_use[fb->zsbuf->u.tex.level] ||
- zstex->hiz_in_use[fb->zsbuf->u.tex.level])) {
+ if (r300->zmask_in_use ||
+ (zstex && zstex->hiz_in_use[fb->zsbuf->u.tex.level])) {
r300_mark_atom_dirty(r300, &r300->hyperz_state);
}
}
@@ -280,16 +304,16 @@ static void r300_clear_render_target(struct pipe_context *pipe,
{
struct r300_context *r300 = r300_context(pipe);
- /* Decompress zbuffers that are bound as textures. If we didn't flush here,
- * it would happen inside the blitter when updating derived state,
- * causing a blitter operation to be called from inside the blitter,
- * which would overwrite saved states and they would never get restored. */
- r300_flush_depth_textures(r300);
+ r300->zmask_locked = TRUE;
+ r300_mark_atom_dirty(r300, &r300->hyperz_state);
r300_blitter_begin(r300, R300_CLEAR_SURFACE);
util_blitter_clear_render_target(r300->blitter, dst, rgba,
dstx, dsty, width, height);
r300_blitter_end(r300);
+
+ r300->zmask_locked = FALSE;
+ r300_mark_atom_dirty(r300, &r300->hyperz_state);
}
/* Clear a region of a depth stencil surface. */
@@ -302,83 +326,70 @@ static void r300_clear_depth_stencil(struct pipe_context *pipe,
unsigned width, unsigned height)
{
struct r300_context *r300 = r300_context(pipe);
+ struct pipe_framebuffer_state *fb =
+ (struct pipe_framebuffer_state*)r300->fb_state.state;
- /* Decompress zbuffers that are bound as textures. If we didn't flush here,
- * it would happen inside the blitter when updating derived state,
- * causing a blitter operation to be called from inside the blitter,
- * which would overwrite saved states and they would never get restored. */
- r300_flush_depth_textures(r300);
+ if (r300->zmask_in_use && !r300->zmask_locked) {
+ if (fb->zsbuf->texture == dst->texture) {
+ r300_decompress_zmask(r300);
+ } else {
+ r300->zmask_locked = TRUE;
+ r300_mark_atom_dirty(r300, &r300->hyperz_state);
+ }
+ }
r300_blitter_begin(r300, R300_CLEAR_SURFACE);
util_blitter_clear_depth_stencil(r300->blitter, dst, clear_flags, depth, stencil,
dstx, dsty, width, height);
r300_blitter_end(r300);
+
+ if (r300->zmask_locked) {
+ r300->zmask_locked = FALSE;
+ r300_mark_atom_dirty(r300, &r300->hyperz_state);
+ }
}
-/* Flush a depth stencil buffer. */
-static void r300_flush_depth_stencil(struct pipe_context *pipe,
- struct pipe_resource *dst,
- unsigned level,
- unsigned layer)
+void r300_decompress_zmask(struct r300_context *r300)
{
- struct r300_context *r300 = r300_context(pipe);
- struct pipe_surface *dstsurf, surf_tmpl;
- struct r300_texture *tex = r300_texture(dst);
+ struct pipe_framebuffer_state *fb =
+ (struct pipe_framebuffer_state*)r300->fb_state.state;
- if (!tex->zmask_mem[level])
- return;
- if (!tex->zmask_in_use[level])
+ if (!r300->zmask_in_use || r300->zmask_locked)
return;
- surf_tmpl.format = dst->format;
- surf_tmpl.usage = PIPE_BIND_DEPTH_STENCIL;
- surf_tmpl.u.tex.level = level;
- surf_tmpl.u.tex.first_layer = layer;
- surf_tmpl.u.tex.last_layer = layer;
- dstsurf = pipe->create_surface(pipe, dst, &surf_tmpl);
-
- r300->z_decomp_rd = TRUE;
+ r300->zmask_decompress = TRUE;
+ r300_mark_atom_dirty(r300, &r300->hyperz_state);
- r300_blitter_begin(r300, R300_CLEAR_SURFACE);
- util_blitter_flush_depth_stencil(r300->blitter, dstsurf);
+ r300_blitter_begin(r300, R300_CLEAR);
+ util_blitter_clear_depth_custom(r300->blitter, fb->width, fb->height, 0,
+ r300->dsa_decompress_zmask);
r300_blitter_end(r300);
- r300->z_decomp_rd = FALSE;
- tex->zmask_in_use[level] = FALSE;
- pipe_surface_reference(&dstsurf, NULL);
+ r300->zmask_decompress = FALSE;
+ r300->zmask_in_use = FALSE;
+ r300_mark_atom_dirty(r300, &r300->hyperz_state);
}
-/* We can't use compressed zbuffers as samplers. */
-void r300_flush_depth_textures(struct r300_context *r300)
+void r300_decompress_zmask_locked_unsafe(struct r300_context *r300)
{
- struct r300_textures_state *state =
- (struct r300_textures_state*)r300->textures_state.state;
- unsigned i, level;
- unsigned count = MIN2(state->sampler_view_count,
- state->sampler_state_count);
-
- if (r300->z_decomp_rd)
- return;
-
- for (i = 0; i < count; i++)
- if (state->sampler_views[i] && state->sampler_states[i]) {
- struct pipe_resource *tex = state->sampler_views[i]->base.texture;
-
- if (tex->target == PIPE_TEXTURE_3D ||
- tex->target == PIPE_TEXTURE_CUBE)
- continue;
+ struct pipe_framebuffer_state fb = {0};
+ fb.width = r300->locked_zbuffer->width;
+ fb.height = r300->locked_zbuffer->height;
+ fb.nr_cbufs = 0;
+ fb.zsbuf = r300->locked_zbuffer;
+
+ r300->context.set_framebuffer_state(&r300->context, &fb);
+ r300_decompress_zmask(r300);
+}
- /* Ignore non-depth textures.
- * Also ignore reinterpreted depth textures, e.g. resource_copy. */
- if (!util_format_is_depth_or_stencil(tex->format))
- continue;
+void r300_decompress_zmask_locked(struct r300_context *r300)
+{
+ struct pipe_framebuffer_state saved_fb = {0};
- for (level = 0; level <= tex->last_level; level++)
- if (r300_texture(tex)->zmask_in_use[level]) {
- /* We don't handle 3D textures and cubemaps yet. */
- r300_flush_depth_stencil(&r300->context, tex, level, 0);
- }
- }
+ util_copy_framebuffer_state(&saved_fb, r300->fb_state.state);
+ r300_decompress_zmask_locked_unsafe(r300);
+ r300->context.set_framebuffer_state(&r300->context, &saved_fb);
+ util_unreference_framebuffer_state(&saved_fb);
}
/* Copy a block of pixels from one surface to another using HW. */
@@ -393,8 +404,6 @@ static void r300_hw_copy_region(struct pipe_context* pipe,
struct r300_context* r300 = r300_context(pipe);
r300_blitter_begin(r300, R300_COPY);
-
- /* Do a copy */
util_blitter_copy_region(r300->blitter, dst, dst_level, dstx, dsty, dstz,
src, src_level, src_box, TRUE);
r300_blitter_end(r300);
@@ -409,10 +418,22 @@ static void r300_resource_copy_region(struct pipe_context *pipe,
unsigned src_level,
const struct pipe_box *src_box)
{
+ struct r300_context *r300 = r300_context(pipe);
+ struct pipe_framebuffer_state *fb =
+ (struct pipe_framebuffer_state*)r300->fb_state.state;
enum pipe_format old_format = dst->format;
enum pipe_format new_format = old_format;
const struct util_format_description *desc = util_format_description(old_format);
- boolean is_depth;
+
+ if (r300->zmask_in_use && !r300->zmask_locked) {
+ if (fb->zsbuf->texture == src ||
+ fb->zsbuf->texture == dst) {
+ r300_decompress_zmask(r300);
+ } else {
+ r300->zmask_locked = TRUE;
+ r300_mark_atom_dirty(r300, &r300->hyperz_state);
+ }
+ }
if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB ||
(!pipe->screen->is_format_supported(pipe->screen,
@@ -441,11 +462,6 @@ static void r300_resource_copy_region(struct pipe_context *pipe,
}
}
- is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0;
- if (is_depth) {
- r300_flush_depth_stencil(pipe, src, src_level, src_box->z);
- }
-
if (old_format != new_format) {
r300_texture_reinterpret_format(pipe->screen,
dst, new_format);
@@ -462,6 +478,11 @@ static void r300_resource_copy_region(struct pipe_context *pipe,
r300_texture_reinterpret_format(pipe->screen,
src, old_format);
}
+
+ if (r300->zmask_locked) {
+ r300->zmask_locked = FALSE;
+ r300_mark_atom_dirty(r300, &r300->hyperz_state);
+ }
}
void r300_init_blit_functions(struct r300_context *r300)
diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c
index 2b183f62c5..15dc6d09ee 100644
--- a/src/gallium/drivers/r300/r300_chipset.c
+++ b/src/gallium/drivers/r300/r300_chipset.c
@@ -424,5 +424,7 @@ void r300_parse_chipset(struct r300_capabilities* caps)
}
caps->is_rv350 = caps->family >= CHIP_FAMILY_RV350;
+ /* XXX The 8x8 compression mode doesn't always work (piglit/fbo-depth fails). */
+ caps->z_compress = /*caps->is_rv350 ? R300_ZCOMP_8X8 :*/ R300_ZCOMP_4X4;
caps->dxtc_swizzle = caps->is_r400 || caps->is_r500;
}
diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h
index f2035d2009..0be161fa07 100644
--- a/src/gallium/drivers/r300/r300_chipset.h
+++ b/src/gallium/drivers/r300/r300_chipset.h
@@ -33,6 +33,13 @@
#define PIPE_ZMASK_SIZE 4096
#define RV3xx_ZMASK_SIZE 5120
+/* The size of a compressed tile. Each compressed tile takes 2 bits
+ * in the ZMASK RAM, so there is always 16 tiles per one dword. */
+enum r300_zmask_compression {
+ R300_ZCOMP_4X4 = 4,
+ R300_ZCOMP_8X8 = 8
+};
+
/* Structure containing all the possible information about a specific Radeon
* in the R3xx, R4xx, and R5xx families. */
struct r300_capabilities {
@@ -50,10 +57,12 @@ struct r300_capabilities {
unsigned num_tex_units;
/* Whether or not TCL is physically present */
boolean has_tcl;
- /* Some chipsets do not have HiZ RAM - other have varying amounts . */
+ /* Some chipsets do not have HiZ RAM - other have varying amounts. */
int hiz_ram;
- /* some chipsets have zmask ram per pipe some don't */
+ /* Some chipsets have zmask ram per pipe some don't. */
int zmask_ram;
+ /* Compression mode for ZMASK. */
+ enum r300_zmask_compression z_compress;
/* Whether or not this is RV350 or newer, including all r400 and r500
* chipsets. The differences compared to the oldest r300 chips are:
* - Blend LTE/GTE thresholds
diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index e265bdbd3b..552df2b476 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -94,6 +94,9 @@ static void r300_release_referenced_objects(struct r300_context *r300)
remove_from_list(query);
FREE(query);
}
+
+ r300->context.delete_depth_stencil_alpha_state(&r300->context,
+ r300->dsa_decompress_zmask);
}
static void r300_destroy_context(struct pipe_context* context)
@@ -116,9 +119,6 @@ static void r300_destroy_context(struct pipe_context* context)
/* XXX: This function assumes r300->query_list was initialized */
r300_release_referenced_objects(r300);
- if (r300->zmask_mm)
- r300_hyperz_destroy_mm(r300);
-
if (r300->cs)
r300->rws->cs_destroy(r300->cs);
@@ -238,7 +238,7 @@ static boolean r300_setup_atoms(struct r300_context* r300)
if (has_hiz_ram)
R300_INIT_ATOM(hiz_clear, 0);
/* zmask clear */
- R300_INIT_ATOM(zmask_clear, 0);
+ R300_INIT_ATOM(zmask_clear, 4);
}
/* ZB (unpipelined), SU. */
R300_INIT_ATOM(query_start, 4);
@@ -513,6 +513,15 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
r300->dummy_vb = screen->resource_create(screen, &vb);
}
+ {
+ struct pipe_depth_stencil_alpha_state dsa = {};
+ dsa.depth.writemask = 1;
+
+ r300->dsa_decompress_zmask =
+ r300->context.create_depth_stencil_alpha_state(&r300->context,
+ &dsa);
+ }
+
return &r300->context;
fail:
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index 9030f1bb98..6e96ae85ff 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -406,8 +406,6 @@ struct r300_texture {
/* hyper-z memory allocs */
struct mem_block *hiz_mem[R300_MAX_TEXTURE_LEVELS];
- struct mem_block *zmask_mem[R300_MAX_TEXTURE_LEVELS];
- boolean zmask_in_use[R300_MAX_TEXTURE_LEVELS];
boolean hiz_in_use[R300_MAX_TEXTURE_LEVELS];
/* This is the level tiling flags were last time set for.
@@ -589,15 +587,21 @@ struct r300_context {
boolean two_sided_color;
/* Incompatible vertex buffer layout? (misaligned stride or buffer_offset) */
boolean incompatible_vb_layout;
-#define R300_Z_COMPRESS_44 1
-#define RV350_Z_COMPRESS_88 2
- int z_compression;
+
boolean cbzb_clear;
- boolean z_decomp_rd;
+ /* Whether ZMASK is enabled. */
+ boolean zmask_in_use;
+ /* Whether ZMASK is being decompressed. */
+ boolean zmask_decompress;
+ /* Whether ZMASK is locked, i.e. should be disabled and cannot be taken over. */
+ boolean zmask_locked;
+ /* The zbuffer the ZMASK of which is locked. */
+ struct pipe_surface *locked_zbuffer;
+
+ void *dsa_decompress_zmask;
/* two mem block managers for hiz/zmask ram space */
struct mem_block *hiz_mm;
- struct mem_block *zmask_mm;
/* upload managers */
struct u_upload_mgr *upload_vb;
@@ -687,7 +691,9 @@ void r300_init_state_functions(struct r300_context* r300);
void r300_init_resource_functions(struct r300_context* r300);
/* r300_blit.c */
-void r300_flush_depth_textures(struct r300_context *r300);
+void r300_decompress_zmask(struct r300_context *r300);
+void r300_decompress_zmask_locked_unsafe(struct r300_context *r300);
+void r300_decompress_zmask_locked(struct r300_context *r300);
/* r300_query.c */
void r300_resume_query(struct r300_context *r300,
@@ -713,8 +719,7 @@ void r500_emit_index_bias(struct r300_context *r300, int index_bias);
/* r300_state.c */
enum r300_fb_state_change {
R300_CHANGED_FB_STATE = 0,
- R300_CHANGED_CBZB_FLAG,
- R300_CHANGED_ZCLEAR_FLAG,
+ R300_CHANGED_HYPERZ_FLAG,
R300_CHANGED_MULTIWRITE
};
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index d14cdcbbaf..54e263436b 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -433,6 +433,7 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state)
tex = r300_texture(surf->base.texture);
surf_pitch = surf->pitch & R300_DEPTHPITCH_MASK;
+
/* HiZ RAM. */
if (r300->screen->caps.hiz_ram) {
if (tex->hiz_mem[level]) {
@@ -443,14 +444,10 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state)
OUT_CS_REG(R300_ZB_HIZ_PITCH, 0);
}
}
+
/* Z Mask RAM. (compressed zbuffer) */
- if (tex->zmask_mem[level]) {
- OUT_CS_REG(R300_ZB_ZMASK_OFFSET, tex->zmask_mem[level]->ofs << 2);
- OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf_pitch);
- } else {
- OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0);
- OUT_CS_REG(R300_ZB_ZMASK_PITCH, 0);
- }
+ OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0);
+ OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf_pitch);
}
}
@@ -462,6 +459,7 @@ void r300_emit_hyperz_state(struct r300_context *r300,
{
struct r300_hyperz_state *z = state;
CS_LOCALS(r300);
+
if (z->flush)
WRITE_CS_TABLE(&z->cb_flush_begin, size);
else
@@ -1097,17 +1095,6 @@ static void r300_emit_hiz_line_clear(struct r300_context *r300, int start, uint1
END_CS;
}
-static void r300_emit_zmask_line_clear(struct r300_context *r300, int start, uint16_t count, uint32_t val)
-{
- CS_LOCALS(r300);
- BEGIN_CS(4);
- OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_ZMASK, 2);
- OUT_CS(start);
- OUT_CS(count);
- OUT_CS(val);
- END_CS;
-}
-
#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y))
void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state)
@@ -1153,42 +1140,49 @@ void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state
{
struct pipe_framebuffer_state *fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
- struct r300_screen* r300screen = r300->screen;
- uint32_t stride, offset = 0;
- struct r300_texture* tex;
- uint32_t i, height;
- int mult, offset_shift;
-
- tex = r300_texture(fb->zsbuf->texture);
- stride = tex->desc.stride_in_pixels[fb->zsbuf->u.tex.level];
-
- offset = tex->zmask_mem[fb->zsbuf->u.tex.level]->ofs;
+ struct r300_texture *tex;
+ unsigned numdw, pipes;
+ unsigned compsize = r300->screen->caps.z_compress;
+ /* The tile size of 1 DWORD is:
+ *
+ * GPU Pipes 4x4 mode 8x8 mode
+ * ------------------------------------------
+ * R580 4P/1Z 32x32 64x64
+ * RV570 3P/1Z 48x16 96x32
+ * RV530 1P/2Z 32x16 64x32
+ */
+ static unsigned num_blocks_x_per_dw[4] = {4, 8, 12, 8};
+ static unsigned num_blocks_y_per_dw[4] = {4, 4, 4, 8};
+ CS_LOCALS(r300);
- if (r300->z_compression == RV350_Z_COMPRESS_88)
- mult = 8;
- else
- mult = 4;
+ if (r300->screen->caps.family == CHIP_FAMILY_RV530) {
+ pipes = r300->screen->caps.num_z_pipes;
+ } else {
+ pipes = r300->screen->caps.num_frag_pipes;
+ }
- height = ALIGN_DIVUP(fb->zsbuf->height, mult);
+ tex = r300_texture(fb->zsbuf->texture);
- offset_shift = 4;
- offset_shift += (r300screen->caps.num_frag_pipes / 2);
- stride = ALIGN_DIVUP(stride, r300screen->caps.num_frag_pipes);
+ /* Get the zbuffer size (with the aligned width and height). */
+ numdw = align(tex->desc.stride_in_pixels[fb->zsbuf->u.tex.level],
+ num_blocks_x_per_dw[pipes-1] * compsize) *
+ align(fb->zsbuf->height,
+ num_blocks_y_per_dw[pipes-1] * compsize);
- /* okay have width in pixels - divide by block width */
- stride = ALIGN_DIVUP(stride, mult);
- /* have width in blocks - divide by number of fragment pipes screen width */
- /* 16 blocks per dword */
- stride = ALIGN_DIVUP(stride, 16);
+ /* Convert pixels -> dwords. */
+ numdw = ALIGN_DIVUP(numdw, num_blocks_x_per_dw[pipes-1] * compsize *
+ num_blocks_y_per_dw[pipes-1] * compsize);
- for (i = 0; i < height; i++) {
- offset = i * stride;
- offset <<= offset_shift;
- r300_emit_zmask_line_clear(r300, offset, stride, 0x0);//0xffffffff);
- }
+ BEGIN_CS(size);
+ OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_ZMASK, 2);
+ OUT_CS(0);
+ OUT_CS(numdw);
+ OUT_CS(0);
+ END_CS;
/* Mark the current zbuffer's zmask as in use. */
- tex->zmask_in_use[fb->zsbuf->u.tex.level] = TRUE;
+ r300->zmask_in_use = TRUE;
+ r300_mark_atom_dirty(r300, &r300->hyperz_state);
}
void r300_emit_ztop_state(struct r300_context* r300,
diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c
index c22e307c67..d996d19175 100644
--- a/src/gallium/drivers/r300/r300_hyperz.c
+++ b/src/gallium/drivers/r300/r300_hyperz.c
@@ -127,7 +127,7 @@ static boolean r300_can_hiz(struct r300_context *r300)
z->current_func, dsa_state->z_stencil_control);
return FALSE;
}
- }
+ }
return TRUE;
}
@@ -139,7 +139,6 @@ static void r300_update_hyperz(struct r300_context* r300)
(struct pipe_framebuffer_state*)r300->fb_state.state;
struct r300_texture *zstex =
fb->zsbuf ? r300_texture(fb->zsbuf->texture) : NULL;
- boolean zmask_in_use = FALSE;
boolean hiz_in_use = FALSE;
z->gb_z_peq_config = 0;
@@ -158,42 +157,40 @@ static void r300_update_hyperz(struct r300_context* r300)
if (!r300->rws->get_value(r300->rws, R300_CAN_HYPERZ))
return;
- zmask_in_use = zstex->zmask_in_use[fb->zsbuf->u.tex.level];
hiz_in_use = zstex->hiz_in_use[fb->zsbuf->u.tex.level];
- /* Z fastfill. */
- if (zmask_in_use) {
- z->zb_bw_cntl |= R300_FAST_FILL_ENABLE; /* | R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE;*/
- }
-
/* Zbuffer compression. */
- if (zmask_in_use && r300->z_compression) {
- z->zb_bw_cntl |= R300_RD_COMP_ENABLE;
- if (r300->z_decomp_rd == false)
+ if (r300->zmask_in_use && !r300->zmask_locked) {
+ z->zb_bw_cntl |= R300_FAST_FILL_ENABLE |
+ /*R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE |*/
+ R300_RD_COMP_ENABLE;
+
+ if (!r300->zmask_decompress) {
z->zb_bw_cntl |= R300_WR_COMP_ENABLE;
+ }
+
+ if (r300->screen->caps.z_compress == R300_ZCOMP_8X8) {
+ z->gb_z_peq_config |= R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8;
+ }
}
- /* RV350 and up optimizations. */
- /* The section 10.4.9 in the docs is a lie. */
- if (r300->z_compression == RV350_Z_COMPRESS_88)
- z->gb_z_peq_config |= R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8;
-
- if (hiz_in_use) {
- bool can_hiz = r300_can_hiz(r300);
- if (can_hiz) {
- z->zb_bw_cntl |= R300_HIZ_ENABLE;
- z->sc_hyperz |= R300_SC_HYPERZ_ENABLE;
- z->sc_hyperz |= r300_get_sc_hz_max(r300);
- z->zb_bw_cntl |= r300_get_hiz_min(r300);
+
+ if (hiz_in_use && r300_can_hiz(r300)) {
+ z->zb_bw_cntl |= R300_HIZ_ENABLE |
+ r300_get_hiz_min(r300);
+
+ z->sc_hyperz |= R300_SC_HYPERZ_ENABLE |
+ r300_get_sc_hz_max(r300);
+
+ if (r300->screen->caps.is_r500) {
+ z->zb_bw_cntl |= R500_HIZ_FP_EXP_BITS_3 |
+ R500_HIZ_EQUAL_REJECT_ENABLE;
}
}
/* R500-specific features and optimizations. */
if (r300->screen->caps.is_r500) {
- z->zb_bw_cntl |= R500_HIZ_FP_EXP_BITS_3;
- z->zb_bw_cntl |=
- R500_HIZ_EQUAL_REJECT_ENABLE |
- R500_PEQ_PACKING_ENABLE |
- R500_COVERED_PTR_MASKING_ENABLE;
+ z->zb_bw_cntl |= R500_PEQ_PACKING_ENABLE |
+ R500_COVERED_PTR_MASKING_ENABLE;
}
}
@@ -297,26 +294,10 @@ static void r300_update_hiz_clear(struct r300_context *r300)
r300->hiz_clear.size = height * 4;
}
-static void r300_update_zmask_clear(struct r300_context *r300)
-{
- struct pipe_framebuffer_state *fb =
- (struct pipe_framebuffer_state*)r300->fb_state.state;
- uint32_t height;
- int mult;
-
- if (r300->z_compression == RV350_Z_COMPRESS_88)
- mult = 8;
- else
- mult = 4;
-
- height = ALIGN_DIVUP(fb->zsbuf->height, mult);
-
- r300->zmask_clear.size = height * 4;
-}
-
void r300_update_hyperz_state(struct r300_context* r300)
{
r300_update_ztop(r300);
+
if (r300->hyperz_state.dirty) {
r300_update_hyperz(r300);
}
@@ -324,9 +305,6 @@ void r300_update_hyperz_state(struct r300_context* r300)
if (r300->hiz_clear.dirty) {
r300_update_hiz_clear(r300);
}
- if (r300->zmask_clear.dirty) {
- r300_update_zmask_clear(r300);
- }
}
void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf)
@@ -345,43 +323,6 @@ void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf)
ndw = ALIGN_DIVUP(zsize, 64);
tex->hiz_mem[level] = u_mmAllocMem(r300->hiz_mm, ndw, 0, 0);
- return;
-}
-
-void r300_zmask_alloc_block(struct r300_context *r300, struct r300_surface *surf, int compress)
-{
- int bsize = 256;
- uint32_t zsize, ndw;
- int level = surf->base.u.tex.level;
- struct r300_texture *tex;
-
- tex = r300_texture(surf->base.texture);
-
- /* We currently don't handle decompression for 3D textures and cubemaps
- * correctly. */
- if (tex->desc.b.b.target != PIPE_TEXTURE_1D &&
- tex->desc.b.b.target != PIPE_TEXTURE_2D &&
- tex->desc.b.b.target != PIPE_TEXTURE_RECT)
- return;
-
- /* Cannot flush zmask of 16-bit zbuffers. */
- if (util_format_get_blocksizebits(tex->desc.b.b.format) == 16)
- return;
-
- if (tex->zmask_mem[level])
- return;
-
- zsize = tex->desc.layer_size_in_bytes[level];
- zsize /= util_format_get_blocksize(tex->desc.b.b.format);
-
- /* each zmask dword represents 16 4x4 blocks - which is 256 pixels
- or 16 8x8 depending on the gb peq flag = 1024 pixels */
- if (compress == RV350_Z_COMPRESS_88)
- bsize = 1024;
-
- ndw = ALIGN_DIVUP(zsize, bsize);
- tex->zmask_mem[level] = u_mmAllocMem(r300->zmask_mm, ndw, 0, 0);
- return;
}
boolean r300_hyperz_init_mm(struct r300_context *r300)
@@ -389,15 +330,9 @@ boolean r300_hyperz_init_mm(struct r300_context *r300)
struct r300_screen* r300screen = r300->screen;
int frag_pipes = r300screen->caps.num_frag_pipes;
- r300->zmask_mm = u_mmInit(0, r300screen->caps.zmask_ram * frag_pipes);
- if (!r300->zmask_mm)
- return FALSE;
-
if (r300screen->caps.hiz_ram) {
r300->hiz_mm = u_mmInit(0, r300screen->caps.hiz_ram * frag_pipes);
if (!r300->hiz_mm) {
- u_mmDestroy(r300->zmask_mm);
- r300->zmask_mm = NULL;
return FALSE;
}
}
@@ -413,7 +348,4 @@ void r300_hyperz_destroy_mm(struct r300_context *r300)
u_mmDestroy(r300->hiz_mm);
r300->hiz_mm = NULL;
}
-
- u_mmDestroy(r300->zmask_mm);
- r300->zmask_mm = NULL;
}
diff --git a/src/gallium/drivers/r300/r300_hyperz.h b/src/gallium/drivers/r300/r300_hyperz.h
index 30a23ec649..d4c8e7c60a 100644
--- a/src/gallium/drivers/r300/r300_hyperz.h
+++ b/src/gallium/drivers/r300/r300_hyperz.h
@@ -28,8 +28,8 @@ struct r300_context;
void r300_update_hyperz_state(struct r300_context* r300);
void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf);
-void r300_zmask_alloc_block(struct r300_context *r300, struct r300_surface *surf, int compress);
boolean r300_hyperz_init_mm(struct r300_context *r300);
void r300_hyperz_destroy_mm(struct r300_context *r300);
+
#endif
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 2664c1dc83..ba456d413f 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -694,8 +694,7 @@ void r300_mark_fb_state_dirty(struct r300_context *r300,
}
if (change == R300_CHANGED_FB_STATE ||
- change == R300_CHANGED_CBZB_FLAG ||
- change == R300_CHANGED_ZCLEAR_FLAG) {
+ change == R300_CHANGED_HYPERZ_FLAG) {
r300_mark_atom_dirty(r300, &r300->hyperz_state);
}
@@ -719,8 +718,8 @@ void r300_mark_fb_state_dirty(struct r300_context *r300,
}
static void
- r300_set_framebuffer_state(struct pipe_context* pipe,
- const struct pipe_framebuffer_state* state)
+r300_set_framebuffer_state(struct pipe_context* pipe,
+ const struct pipe_framebuffer_state* state)
{
struct r300_context* r300 = r300_context(pipe);
struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state;
@@ -728,7 +727,6 @@ static void
boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ);
unsigned max_width, max_height, i;
uint32_t zbuffer_bpp = 0;
- int blocksize;
if (r300->screen->caps.is_r500) {
max_width = max_height = 4096;
@@ -744,6 +742,32 @@ static void
return;
}
+ if (old_state->zsbuf && r300->zmask_in_use && !r300->zmask_locked) {
+ /* There is a zmask in use, what are we gonna do? */
+ if (state->zsbuf) {
+ if (!pipe_surface_equal(old_state->zsbuf, state->zsbuf)) {
+ /* Decompress the currently bound zbuffer before we bind another one. */
+ r300_decompress_zmask(r300);
+ }
+ } else {
+ /* We don't bind another zbuffer, so lock the current one. */
+ r300->zmask_locked = TRUE;
+ pipe_surface_reference(&r300->locked_zbuffer, old_state->zsbuf);
+ }
+ } else if (r300->zmask_locked && r300->locked_zbuffer) {
+ /* We have a locked zbuffer now, what are we gonna do? */
+ if (state->zsbuf) {
+ if (!pipe_surface_equal(r300->locked_zbuffer, state->zsbuf)) {
+ /* We are binding some other zbuffer, so decompress the locked one,
+ * it gets unlocked automatically. */
+ r300_decompress_zmask_locked_unsafe(r300);
+ } else {
+ /* We are binding the locked zbuffer again, so unlock it. */
+ r300->zmask_locked = FALSE;
+ }
+ }
+ }
+
/* If nr_cbufs is changed from zero to non-zero or vice versa... */
if (!!old_state->nr_cbufs != !!state->nr_cbufs) {
r300_mark_atom_dirty(r300, &r300->blend_state);
@@ -758,14 +782,15 @@ static void
util_copy_framebuffer_state(r300->fb_state.state, state);
+ if (!r300->zmask_locked) {
+ pipe_surface_reference(&r300->locked_zbuffer, NULL);
+ }
+
r300_mark_fb_state_dirty(r300, R300_CHANGED_FB_STATE);
r300->validate_buffers = TRUE;
- r300->z_compression = false;
-
if (state->zsbuf) {
- blocksize = util_format_get_blocksize(state->zsbuf->texture->format);
- switch (blocksize) {
+ switch (util_format_get_blocksize(state->zsbuf->texture->format)) {
case 2:
zbuffer_bpp = 16;
break;
@@ -773,30 +798,19 @@ static void
zbuffer_bpp = 24;
break;
}
+
+ /* Setup Hyper-Z. */
if (can_hyperz) {
struct r300_surface *zs_surf = r300_surface(state->zsbuf);
- struct r300_texture *tex;
- int compress = r300->screen->caps.is_rv350 ? RV350_Z_COMPRESS_88 : R300_Z_COMPRESS_44;
+ struct r300_texture *tex = r300_texture(zs_surf->base.texture);
int level = zs_surf->base.u.tex.level;
- tex = r300_texture(zs_surf->base.texture);
-
/* work out whether we can support hiz on this buffer */
r300_hiz_alloc_block(r300, zs_surf);
-
- /* work out whether we can support zmask features on this buffer */
- r300_zmask_alloc_block(r300, zs_surf, compress);
-
- if (tex->zmask_mem[level]) {
- /* compression causes hangs on 16-bit */
- if (zbuffer_bpp == 24)
- r300->z_compression = compress;
- }
+
DBG(r300, DBG_HYPERZ,
- "hyper-z features: hiz: %d @ %08x z-compression: %d z-fastfill: %d @ %08x\n", tex->hiz_mem[level] ? 1 : 0,
- tex->hiz_mem[level] ? tex->hiz_mem[level]->ofs : 0xdeadbeef,
- r300->z_compression, tex->zmask_mem[level] ? 1 : 0,
- tex->zmask_mem[level] ? tex->zmask_mem[level]->ofs : 0xdeadbeef);
+ "hyper-z features: hiz: %d @ %08x\n", tex->hiz_mem[level] ? 1 : 0,
+ tex->hiz_mem[level] ? tex->hiz_mem[level]->ofs : 0xdeadbeef);
}
/* Polygon offset depends on the zbuffer bit depth. */
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 95be7849f8..de4c271328 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -862,11 +862,35 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300)
}
}
-void r300_update_derived_state(struct r300_context* r300)
+static void r300_decompress_depth_textures(struct r300_context *r300)
{
- r300_flush_depth_textures(r300);
+ struct r300_textures_state *state =
+ (struct r300_textures_state*)r300->textures_state.state;
+ struct pipe_resource *tex;
+ unsigned count = MIN2(state->sampler_view_count,
+ state->sampler_state_count);
+ unsigned i;
+
+ if (!r300->zmask_locked || !r300->locked_zbuffer) {
+ return;
+ }
+
+ for (i = 0; i < count; i++) {
+ if (state->sampler_views[i] && state->sampler_states[i]) {
+ tex = state->sampler_views[i]->base.texture;
+ if (tex == r300->locked_zbuffer->texture) {
+ r300_decompress_zmask_locked(r300);
+ return;
+ }
+ }
+ }
+}
+
+void r300_update_derived_state(struct r300_context* r300)
+{
if (r300->textures_state.dirty) {
+ r300_decompress_depth_textures(r300);
r300_merge_textures_and_samplers(r300);
}
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index a5fbe855e7..ca2762809d 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -743,8 +743,6 @@ static void r300_texture_destroy(struct pipe_screen *screen,
for (i = 0; i < R300_MAX_TEXTURE_LEVELS; i++) {
if (tex->hiz_mem[i])
u_mmFreeMem(tex->hiz_mem[i]);
- if (tex->zmask_mem[i])
- u_mmFreeMem(tex->zmask_mem[i]);
}
FREE(tex);