diff options
author | Christoph Bumiller <e0425955@student.tuwien.ac.at> | 2010-08-18 14:37:47 +0200 |
---|---|---|
committer | Christoph Bumiller <e0425955@student.tuwien.ac.at> | 2010-08-18 14:37:47 +0200 |
commit | 3e54d63429fe7ca5db3c75c181abbaf7a7f55724 (patch) | |
tree | e129c36aaef712525f0a04fc5b06c445e3cf84df /src/gallium/drivers/r300 | |
parent | eaab76457818fad0926b84c663440e8987e1f19f (diff) | |
parent | 85d9bc236d6a8ff8f12cbc2150f8c3740354f573 (diff) |
Merge remote branch 'origin/master' into nv50-compiler
Diffstat (limited to 'src/gallium/drivers/r300')
32 files changed, 1819 insertions, 812 deletions
diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile index 13152635a6..728bc40a5b 100644 --- a/src/gallium/drivers/r300/Makefile +++ b/src/gallium/drivers/r300/Makefile @@ -24,6 +24,7 @@ C_SOURCES = \ r300_vs.c \ r300_vs_draw.c \ r300_texture.c \ + r300_texture_desc.c \ r300_tgsi_to_rc.c \ r300_transfer.c diff --git a/src/gallium/drivers/r300/SConscript b/src/gallium/drivers/r300/SConscript index 552ed4e5be..bf023daaa5 100644 --- a/src/gallium/drivers/r300/SConscript +++ b/src/gallium/drivers/r300/SConscript @@ -34,6 +34,7 @@ r300 = env.ConvenienceLibrary( 'r300_vs.c', 'r300_vs_draw.c', 'r300_texture.c', + 'r300_texture_desc.c', 'r300_tgsi_to_rc.c', 'r300_transfer.c', ] + r300compiler) + r300compiler diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 895efaa1c4..47ffc0cb3c 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -21,7 +21,10 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "r300_context.h" +#include "r300_emit.h" +#include "r300_hyperz.h" #include "r300_texture.h" +#include "r300_winsys.h" #include "util/u_format.h" #include "util/u_pack_color.h" @@ -81,7 +84,7 @@ static void r300_blitter_end(struct r300_context *r300) } static uint32_t r300_depth_clear_cb_value(enum pipe_format format, - const float* rgba) + const float* rgba) { union util_color uc; util_pack_color(rgba, format, &uc); @@ -97,29 +100,29 @@ static boolean r300_cbzb_clear_allowed(struct r300_context *r300, { struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; - struct r300_surface *surf = r300_surface(fb->cbufs[0]); - unsigned bpp; /* Only color clear allowed, and only one colorbuffer. */ if (clear_buffers != PIPE_CLEAR_COLOR || fb->nr_cbufs != 1) return FALSE; - /* The colorbuffer must be point-sampled. */ - if (surf->base.texture->nr_samples > 1) - return FALSE; - - bpp = util_format_get_blocksizebits(surf->base.format); + return r300_surface(fb->cbufs[0])->cbzb_allowed; +} - /* ZB can only work with the two pixel sizes. */ - if (bpp != 16 && bpp != 32) - return FALSE; +static uint32_t r300_depth_clear_value(enum pipe_format format, + double depth, unsigned stencil) +{ + switch (format) { + case PIPE_FORMAT_Z16_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + return util_pack_z(format, depth); - /* If the midpoint ZB offset is not aligned to 2048, it returns garbage - * with certain texture sizes. Macrotiling ensures the alignment. */ - if (!r300_texture(surf->base.texture)->mip_macrotile[surf->base.level]) - return FALSE; + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + return util_pack_z_stencil(format, depth, stencil); - return TRUE; + default: + assert(0); + return 0; + } } /* Clear currently bound buffers. */ @@ -169,8 +172,27 @@ static void r300_clear(struct pipe_context* pipe, (struct pipe_framebuffer_state*)r300->fb_state.state; struct r300_hyperz_state *hyperz = (struct r300_hyperz_state*)r300->hyperz_state.state; + struct r300_texture *zstex = + fb->zsbuf ? r300_texture(fb->zsbuf->texture) : NULL; uint32_t width = fb->width; uint32_t height = fb->height; + boolean has_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); + uint32_t hyperz_dcv = hyperz->zb_depthclearvalue; + + /* Enable fast Z clear. + * The zbuffer must be in micro-tiled mode, otherwise it locks up. */ + if ((buffers & PIPE_CLEAR_DEPTHSTENCIL) && has_hyperz) { + hyperz_dcv = hyperz->zb_depthclearvalue = + r300_depth_clear_value(fb->zsbuf->format, depth, stencil); + + r300_mark_fb_state_dirty(r300, R300_CHANGED_ZCLEAR_FLAG); + if (zstex->zmask_mem[fb->zsbuf->level]) { + r300->zmask_clear.dirty = TRUE; + buffers &= ~PIPE_CLEAR_DEPTHSTENCIL; + } + if (zstex->hiz_mem[fb->zsbuf->level]) + r300->hiz_clear.dirty = TRUE; + } /* Enable CBZB clear. */ if (r300_cbzb_clear_allowed(r300, buffers)) { @@ -187,20 +209,61 @@ static void r300_clear(struct pipe_context* pipe, } /* Clear. */ - r300_blitter_begin(r300, R300_CLEAR); - util_blitter_clear(r300->blitter, - width, - height, - fb->nr_cbufs, - buffers, rgba, depth, stencil); - r300_blitter_end(r300); + if (buffers) { + /* Clear using the blitter. */ + r300_blitter_begin(r300, R300_CLEAR); + util_blitter_clear(r300->blitter, + width, + height, + fb->nr_cbufs, + buffers, rgba, depth, stencil); + r300_blitter_end(r300); + } else if (r300->zmask_clear.dirty) { + /* Just clear zmask and hiz now, this does not use a standard draw + * procedure. */ + unsigned dwords; + + /* Calculate zmask_clear and hiz_clear atom sizes. */ + r300_update_hyperz_state(r300); + dwords = r300->zmask_clear.size + + (r300->hiz_clear.dirty ? r300->hiz_clear.size : 0) + + r300_get_num_cs_end_dwords(r300); + + /* Reserve CS space. */ + if (dwords > (r300->cs->ndw - r300->cs->cdw)) { + r300->context.flush(&r300->context, 0, NULL); + } + + /* Emit clear packets. */ + r300_emit_zmask_clear(r300, r300->zmask_clear.size, + r300->zmask_clear.state); + r300->zmask_clear.dirty = FALSE; + if (r300->hiz_clear.dirty) { + r300_emit_hiz_clear(r300, r300->hiz_clear.size, + r300->hiz_clear.state); + r300->hiz_clear.dirty = FALSE; + } + } else { + assert(0); + } /* Disable CBZB clear. */ if (r300->cbzb_clear) { r300->cbzb_clear = FALSE; + hyperz->zb_depthclearvalue = hyperz_dcv; r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG); } + /* Enable fastfill and/or hiz. + * + * If we cleared zmask/hiz, it's in use now. The Hyper-Z state update + * looks if zmask/hiz is in use and enables fastfill accordingly. */ + if (zstex && + (zstex->zmask_in_use[fb->zsbuf->level] || + zstex->hiz_in_use[fb->zsbuf->level])) { + r300->hyperz_state.dirty = TRUE; + } + /* XXX this flush "fixes" a hardlock in the cubestorm xscreensaver */ if (r300->flush_counter == 0) pipe->flush(pipe, 0, NULL); @@ -238,6 +301,33 @@ static void r300_clear_depth_stencil(struct pipe_context *pipe, r300_blitter_end(r300); } +/* Flush a depth stencil buffer. */ +void r300_flush_depth_stencil(struct pipe_context *pipe, + struct pipe_resource *dst, + struct pipe_subresource subdst, + unsigned zslice) +{ + struct r300_context *r300 = r300_context(pipe); + struct pipe_surface *dstsurf; + struct r300_texture *tex = r300_texture(dst); + + if (!tex->zmask_mem[subdst.level]) + return; + if (!tex->zmask_in_use[subdst.level]) + return; + + dstsurf = pipe->screen->get_tex_surface(pipe->screen, dst, + subdst.face, subdst.level, zslice, + PIPE_BIND_DEPTH_STENCIL); + r300->z_decomp_rd = TRUE; + r300_blitter_begin(r300, R300_CLEAR_SURFACE); + util_blitter_flush_depth_stencil(r300->blitter, dstsurf); + r300_blitter_end(r300); + r300->z_decomp_rd = FALSE; + + tex->zmask_in_use[subdst.level] = FALSE; +} + /* Copy a block of pixels from one surface to another using HW. */ static void r300_hw_copy_region(struct pipe_context* pipe, struct pipe_resource *dst, @@ -269,7 +359,7 @@ static void r300_resource_copy_region(struct pipe_context *pipe, { enum pipe_format old_format = dst->format; enum pipe_format new_format = old_format; - + boolean is_depth; if (!pipe->screen->is_format_supported(pipe->screen, old_format, src->target, src->nr_samples, @@ -296,6 +386,10 @@ static void r300_resource_copy_region(struct pipe_context *pipe, } } + is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0; + if (is_depth) { + r300_flush_depth_stencil(pipe, src, subsrc, srcz); + } if (old_format != new_format) { dst->format = new_format; src->format = new_format; diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c index 21f3b9d261..48c2409211 100644 --- a/src/gallium/drivers/r300/r300_chipset.c +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -36,7 +36,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->num_vert_fpus = 2; caps->num_tex_units = 16; caps->has_tcl = debug_get_bool_option("RADEON_NO_TCL", FALSE) ? FALSE : TRUE; - caps->has_hiz = TRUE; + caps->hiz_ram = 0; caps->is_r400 = FALSE; caps->is_r500 = FALSE; caps->high_second_pipe = FALSE; @@ -49,6 +49,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R300; caps->high_second_pipe = TRUE; caps->num_vert_fpus = 4; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x4145: @@ -61,6 +63,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R300; caps->high_second_pipe = TRUE; caps->num_vert_fpus = 4; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x4150: @@ -77,8 +81,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x4E54: case 0x4E56: caps->family = CHIP_FAMILY_RV350; - caps->has_hiz = FALSE; caps->high_second_pipe = TRUE; + caps->zmask_ram = RV3xx_ZMASK_SIZE; break; case 0x4148: @@ -91,12 +95,16 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R350; caps->high_second_pipe = TRUE; caps->num_vert_fpus = 4; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x4E4A: caps->family = CHIP_FAMILY_R360; caps->high_second_pipe = TRUE; caps->num_vert_fpus = 4; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x5460: @@ -108,8 +116,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5B64: case 0x5B65: caps->family = CHIP_FAMILY_RV370; - caps->has_hiz = FALSE; caps->high_second_pipe = TRUE; + caps->zmask_ram = RV3xx_ZMASK_SIZE; break; case 0x3150: @@ -120,6 +128,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x3E54: caps->family = CHIP_FAMILY_RV380; caps->high_second_pipe = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = RV3xx_ZMASK_SIZE; break; case 0x4A48: @@ -135,6 +145,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R420; caps->num_vert_fpus = 6; caps->is_r400 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x5548: @@ -149,6 +161,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R423; caps->num_vert_fpus = 6; caps->is_r400 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x554C: @@ -161,6 +175,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R430; caps->num_vert_fpus = 6; caps->is_r400 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x5D4C: @@ -172,6 +188,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R480; caps->num_vert_fpus = 6; caps->is_r400 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x4B48: @@ -182,6 +200,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R481; caps->num_vert_fpus = 6; caps->is_r400 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x5E4C: @@ -199,34 +219,36 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RV410; caps->num_vert_fpus = 6; caps->is_r400 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x5954: case 0x5955: caps->family = CHIP_FAMILY_RS480; - caps->has_hiz = FALSE; caps->has_tcl = FALSE; + caps->zmask_ram = RV3xx_ZMASK_SIZE; break; case 0x5974: case 0x5975: caps->family = CHIP_FAMILY_RS482; - caps->has_hiz = FALSE; caps->has_tcl = FALSE; + caps->zmask_ram = RV3xx_ZMASK_SIZE; break; case 0x5A41: case 0x5A42: caps->family = CHIP_FAMILY_RS400; - caps->has_hiz = FALSE; caps->has_tcl = FALSE; + caps->zmask_ram = RV3xx_ZMASK_SIZE; break; case 0x5A61: case 0x5A62: caps->family = CHIP_FAMILY_RC410; - caps->has_hiz = FALSE; caps->has_tcl = FALSE; + caps->zmask_ram = RV3xx_ZMASK_SIZE; break; case 0x791E: @@ -234,6 +256,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RS690; caps->has_tcl = FALSE; caps->is_r400 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x793F: @@ -242,6 +266,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RS600; caps->has_tcl = FALSE; caps->is_r400 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x796C: @@ -251,6 +277,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RS740; caps->has_tcl = FALSE; caps->is_r400 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x7100: @@ -270,6 +298,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R520; caps->num_vert_fpus = 8; caps->is_r500 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x7140: @@ -313,6 +343,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RV515; caps->num_vert_fpus = 2; caps->is_r500 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x71C0: @@ -334,6 +366,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RV530; caps->num_vert_fpus = 5; caps->is_r500 = TRUE; + /*caps->hiz_ram = RV530_HIZ_LIMIT;*/ + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x7240: @@ -354,12 +388,16 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R580; caps->num_vert_fpus = 8; caps->is_r500 = TRUE; + caps->hiz_ram = RV530_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x7280: caps->family = CHIP_FAMILY_RV570; caps->num_vert_fpus = 8; caps->is_r500 = TRUE; + caps->hiz_ram = RV530_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x7281: @@ -376,6 +414,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RV560; caps->num_vert_fpus = 8; caps->is_r500 = TRUE; + caps->hiz_ram = RV530_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; default: diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h index 65750f54e7..e7ca642b4f 100644 --- a/src/gallium/drivers/r300/r300_chipset.h +++ b/src/gallium/drivers/r300/r300_chipset.h @@ -25,6 +25,14 @@ #include "pipe/p_compiler.h" +/* these are sizes in dwords */ +#define R300_HIZ_LIMIT 10240 +#define RV530_HIZ_LIMIT 15360 + +/* rv3xx have only one pipe */ +#define PIPE_ZMASK_SIZE 4096 +#define RV3xx_ZMASK_SIZE 5120 + /* Structure containing all the possible information about a specific Radeon * in the R3xx, R4xx, and R5xx families. */ struct r300_capabilities { @@ -42,8 +50,10 @@ struct r300_capabilities { unsigned num_tex_units; /* Whether or not TCL is physically present */ boolean has_tcl; - /* Some chipsets do not have HiZ RAM. */ - boolean has_hiz; + /* Some chipsets do not have HiZ RAM - other have varying amounts . */ + int hiz_ram; + /* some chipsets have zmask ram per pipe some don't */ + int zmask_ram; /* Whether or not this is RV350 or newer, including all r400 and r500 * chipsets. The differences compared to the oldest r300 chips are: * - Blend LTE/GTE thresholds diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index df90359058..e8b6c4f7af 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -30,6 +30,7 @@ #include "r300_cb.h" #include "r300_context.h" #include "r300_emit.h" +#include "r300_hyperz.h" #include "r300_screen.h" #include "r300_screen_buffer.h" #include "r300_winsys.h" @@ -114,6 +115,10 @@ static void r300_destroy_context(struct pipe_context* context) u_upload_destroy(r300->upload_vb); u_upload_destroy(r300->upload_ib); + /* setup hyper-z mm */ + if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) + r300_hyperz_destroy_mm(r300); + translate_cache_destroy(r300->tran.translate_cache); r300_release_referenced_objects(r300); @@ -166,6 +171,9 @@ static void r300_setup_atoms(struct r300_context* r300) boolean is_r500 = r300->screen->caps.is_r500; boolean has_tcl = r300->screen->caps.has_tcl; boolean drm_2_3_0 = r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0); + boolean drm_2_6_0 = r300->rws->get_value(r300->rws, R300_VID_DRM_2_6_0); + boolean has_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); + boolean has_hiz_ram = r300->screen->caps.hiz_ram > 0; /* Create the actual atom list. * @@ -188,8 +196,8 @@ static void r300_setup_atoms(struct r300_context* r300) R300_INIT_ATOM(gpu_flush, 9); R300_INIT_ATOM(aa_state, 4); R300_INIT_ATOM(fb_state, 0); + R300_INIT_ATOM(hyperz_state, is_r500 || (is_rv350 && drm_2_6_0) ? 10 : 8); /* ZB (unpipelined), SC. */ - R300_INIT_ATOM(hyperz_state, 6); R300_INIT_ATOM(ztop_state, 2); /* ZB, FG. */ R300_INIT_ATOM(dsa_state, is_r500 ? 8 : 6); @@ -220,6 +228,13 @@ static void r300_setup_atoms(struct r300_context* r300) /* TX. */ R300_INIT_ATOM(texture_cache_inval, 2); R300_INIT_ATOM(textures_state, 0); + if (has_hyperz) { + /* HiZ Clear */ + if (has_hiz_ram) + R300_INIT_ATOM(hiz_clear, 0); + /* zmask clear */ + R300_INIT_ATOM(zmask_clear, 0); + } /* ZB (unpipelined), SU. */ R300_INIT_ATOM(query_start, 4); @@ -282,8 +297,7 @@ static void r300_init_states(struct pipe_context *pipe) (struct r300_vap_invariant_state*)r300->vap_invariant_state.state; struct r300_invariant_state *invariant = (struct r300_invariant_state*)r300->invariant_state.state; - struct r300_hyperz_state *hyperz = - (struct r300_hyperz_state*)r300->hyperz_state.state; + CB_LOCALS; pipe->set_blend_color(pipe, &bc); @@ -351,10 +365,20 @@ static void r300_init_states(struct pipe_context *pipe) /* Initialize the hyperz state. */ { - BEGIN_CB(&hyperz->cb_begin, r300->hyperz_state.size); + struct r300_hyperz_state *hyperz = + (struct r300_hyperz_state*)r300->hyperz_state.state; + BEGIN_CB(&hyperz->cb_flush_begin, r300->hyperz_state.size); + OUT_CB_REG(R300_ZB_ZCACHE_CTLSTAT, + R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE); OUT_CB_REG(R300_ZB_BW_CNTL, 0); OUT_CB_REG(R300_ZB_DEPTHCLEARVALUE, 0); OUT_CB_REG(R300_SC_HYPERZ, R300_SC_HYPERZ_ADJ_2); + + if (r300->screen->caps.is_r500 || + (r300->screen->caps.is_rv350 && + r300->rws->get_value(r300->rws, R300_VID_DRM_2_6_0))) { + OUT_CB_REG(R300_GB_Z_PEQ_CONFIG, 0); + } END_CB; } } @@ -415,6 +439,10 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, rws->cs_set_flush(r300->cs, r300_flush_cb, r300); + /* setup hyper-z mm */ + if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) + r300_hyperz_init_mm(r300); + r300->upload_ib = u_upload_create(&r300->context, 32 * 1024, 16, PIPE_BIND_INDEX_BUFFER); diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index b9c96d5bdd..6fa7f470f9 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -106,13 +106,19 @@ struct r300_dsa_state { }; struct r300_hyperz_state { + int current_func; /* -1 after a clear before first op */ + int flush; /* This is actually a command buffer with named dwords. */ + uint32_t cb_flush_begin; + uint32_t zb_zcache_ctlstat; /* R300_ZB_CACHE_CNTL */ uint32_t cb_begin; uint32_t zb_bw_cntl; /* R300_ZB_BW_CNTL */ uint32_t cb_reg1; uint32_t zb_depthclearvalue; /* R300_ZB_DEPTHCLEARVALUE */ uint32_t cb_reg2; uint32_t sc_hyperz; /* R300_SC_HYPERZ */ + uint32_t cb_reg3; + uint32_t gb_z_peq_config; /* R300_GB_Z_PEQ_CONFIG: 0x4028 */ }; struct r300_gpu_flush { @@ -318,29 +324,39 @@ struct r300_surface { uint32_t cbzb_midpoint_offset; /* DEPTHOFFSET. */ uint32_t cbzb_pitch; /* DEPTHPITCH. */ uint32_t cbzb_format; /* ZB_FORMAT. */ + + /* Whether the CBZB clear is allowed on the surface. */ + boolean cbzb_allowed; + }; -struct r300_texture { - /* Parent class */ +struct r300_texture_desc { + /* Parent class. */ struct u_resource b; - enum r300_buffer_domain domain; + /* Buffer tiling. + * Macrotiling is specified per-level because small mipmaps cannot + * be macrotiled. */ + enum r300_buffer_tiling microtile; + enum r300_buffer_tiling macrotile[R300_MAX_TEXTURE_LEVELS]; /* Offsets into the buffer. */ - unsigned offset[R300_MAX_TEXTURE_LEVELS]; + unsigned offset_in_bytes[R300_MAX_TEXTURE_LEVELS]; - /* A pitch for each mip-level */ - unsigned pitch[R300_MAX_TEXTURE_LEVELS]; + /* Strides for each mip-level. */ + unsigned stride_in_pixels[R300_MAX_TEXTURE_LEVELS]; + unsigned stride_in_bytes[R300_MAX_TEXTURE_LEVELS]; - /* A pitch multiplied by blockwidth as hardware wants - * the number of pixels instead of the number of blocks. */ - unsigned hwpitch[R300_MAX_TEXTURE_LEVELS]; + /* Size of one zslice or face or 2D image based on the texture target. */ + unsigned layer_size_in_bytes[R300_MAX_TEXTURE_LEVELS]; - /* Size of one zslice or face based on the texture target */ - unsigned layer_size[R300_MAX_TEXTURE_LEVELS]; + /* Total size of this texture, in bytes, + * derived from the texture properties. */ + unsigned size_in_bytes; - /* Whether the mipmap level is macrotiled. */ - enum r300_buffer_tiling mip_macrotile[R300_MAX_TEXTURE_LEVELS]; + /* Total size of the buffer backing this texture, in bytes. + * It must be >= size. */ + unsigned buffer_size_in_bytes; /** * If non-zero, override the natural texture layout with @@ -350,16 +366,24 @@ struct r300_texture { * * \sa r300_texture_get_stride */ - unsigned stride_override; + unsigned stride_in_bytes_override; - /* Total size of this texture, in bytes. */ - unsigned size; + /* Whether this texture has non-power-of-two dimensions. + * It can be either a regular texture or a rectangle one. */ + boolean is_npot; - /* Whether this texture has non-power-of-two dimensions - * or a user-specified pitch. - * It can be either a regular texture or a rectangle one. - */ - boolean uses_pitch; + /* This flag says that hardware must use the stride for addressing + * instead of the width. */ + boolean uses_stride_addressing; + + /* Whether CBZB fast color clear is allowed on the miplevel. */ + boolean cbzb_allowed[R300_MAX_TEXTURE_LEVELS]; +}; + +struct r300_texture { + struct r300_texture_desc desc; + + enum r300_buffer_domain domain; /* Pipe buffer backing this texture. */ struct r300_winsys_buffer *buffer; @@ -370,8 +394,11 @@ struct r300_texture { /* All bits should be filled in. */ struct r300_texture_fb_state fb_state; - /* Buffer tiling */ - enum r300_buffer_tiling microtile, macrotile; + /* hyper-z memory allocs */ + struct mem_block *hiz_mem[R300_MAX_TEXTURE_LEVELS]; + struct mem_block *zmask_mem[R300_MAX_TEXTURE_LEVELS]; + boolean zmask_in_use[R300_MAX_TEXTURE_LEVELS]; + boolean hiz_in_use[R300_MAX_TEXTURE_LEVELS]; /* This is the level tiling flags were last time set for. * It's used to prevent redundant tiling-flags changes from happening.*/ @@ -498,6 +525,10 @@ struct r300_context { struct r300_atom texture_cache_inval; /* GPU flush. */ struct r300_atom gpu_flush; + /* HiZ clear */ + struct r300_atom hiz_clear; + /* zmask clear */ + struct r300_atom zmask_clear; /* Invariant state. This must be emitted to get the engine started. */ struct r300_atom invariant_state; @@ -510,6 +541,8 @@ struct r300_context { struct r300_vertex_element_state *velems; bool any_user_vbs; + struct pipe_index_buffer index_buffer; + /* Vertex info for Draw. */ struct vertex_info vertex_info; @@ -533,8 +566,16 @@ struct r300_context { boolean two_sided_color; /* Incompatible vertex buffer layout? (misaligned stride or buffer_offset) */ boolean incompatible_vb_layout; - +#define R300_Z_COMPRESS_44 1 +#define RV350_Z_COMPRESS_88 2 + int z_compression; boolean cbzb_clear; + boolean z_decomp_rd; + + /* two mem block managers for hiz/zmask ram space */ + struct mem_block *hiz_mm; + struct mem_block *zmask_mm; + /* upload managers */ struct u_upload_mgr *upload_vb; struct u_upload_mgr *upload_ib; @@ -586,6 +627,12 @@ void r300_init_render_functions(struct r300_context *r300); void r300_init_state_functions(struct r300_context* r300); void r300_init_resource_functions(struct r300_context* r300); +/* r300_blit.c */ +void r300_flush_depth_stencil(struct pipe_context *pipe, + struct pipe_resource *dst, + struct pipe_subresource subdst, + unsigned zslice); + /* r300_query.c */ void r300_resume_query(struct r300_context *r300, struct r300_query *query); @@ -605,7 +652,8 @@ void r300_plug_in_stencil_ref_fallback(struct r300_context *r300); /* r300_state.c */ enum r300_fb_state_change { R300_CHANGED_FB_STATE = 0, - R300_CHANGED_CBZB_FLAG + R300_CHANGED_CBZB_FLAG, + R300_CHANGED_ZCLEAR_FLAG }; void r300_mark_fb_state_dirty(struct r300_context *r300, diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index 3beb625d43..c194d6a1b0 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -136,8 +136,8 @@ #define WRITE_CS_TABLE(values, count) do { \ CS_DEBUG(assert(cs_count == 0);) \ - memcpy(cs_copy->ptr + cs_copy->cdw, values, count * 4); \ - cs_copy->cdw += count; \ + memcpy(cs_copy->ptr + cs_copy->cdw, (values), (count) * 4); \ + cs_copy->cdw += (count); \ } while (0) #endif /* R300_CS_H */ diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index 31d4e14681..c3e157e99a 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -38,11 +38,13 @@ static const struct debug_named_value debug_options[] = { { "fall", DBG_FALL, "Fallbacks (for debugging)" }, { "rs", DBG_RS, "Rasterizer (for debugging)" }, { "fb", DBG_FB, "Framebuffer (for debugging)" }, + { "cbzb", DBG_CBZB, "Fast color clear info (for debugging)" }, { "fakeocc", DBG_FAKE_OCC, "Use fake occlusion queries (for debugging)" }, { "anisohq", DBG_ANISOHQ, "High quality anisotropic filtering (for benchmarking)" }, { "notiling", DBG_NO_TILING, "Disable tiling (for benchmarking)" }, { "noimmd", DBG_NO_IMMD, "Disable immediate mode (for benchmarking)" }, { "stats", DBG_STATS, "Gather statistics" }, + { "hyperz", DBG_HYPERZ, "HyperZ (for debugging)" }, /* must be last */ DEBUG_NAMED_VALUE_END diff --git a/src/gallium/drivers/r300/r300_defines.h b/src/gallium/drivers/r300/r300_defines.h index d510d80a7b..896aeef395 100644 --- a/src/gallium/drivers/r300/r300_defines.h +++ b/src/gallium/drivers/r300/r300_defines.h @@ -36,7 +36,10 @@ enum r300_buffer_tiling { R300_BUFFER_LINEAR = 0, R300_BUFFER_TILED, - R300_BUFFER_SQUARETILED + R300_BUFFER_SQUARETILED, + + R300_BUFFER_UNKNOWN, + R300_BUFFER_SELECT_LAYOUT = R300_BUFFER_UNKNOWN }; enum r300_buffer_domain { /* bitfield */ diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 36a26a7871..d0fd45349e 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -25,6 +25,7 @@ #include "util/u_format.h" #include "util/u_math.h" +#include "util/u_mm.h" #include "util/u_simple_list.h" #include "r300_context.h" @@ -329,6 +330,7 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)state; struct r300_surface* surf; unsigned i; + boolean has_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); CS_LOCALS(r300); BEGIN_CS(size); @@ -364,6 +366,10 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); OUT_CS_RELOC(surf->buffer, surf->cbzb_pitch, 0, surf->domain); + + DBG(r300, DBG_CBZB, + "CBZB clearing cbuf %08x %08x\n", surf->cbzb_format, + surf->cbzb_pitch); } /* Set up a zbuffer. */ else if (fb->zsbuf) { @@ -377,15 +383,32 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); OUT_CS_RELOC(surf->buffer, surf->pitch, 0, surf->domain); - /* HiZ RAM. */ - if (r300->screen->caps.has_hiz) { - OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0); - OUT_CS_REG(R300_ZB_HIZ_PITCH, 0); + if (has_hyperz) { + uint32_t surf_pitch; + struct r300_texture *tex; + int level = surf->base.level; + tex = r300_texture(surf->base.texture); + + surf_pitch = surf->pitch & R300_DEPTHPITCH_MASK; + /* HiZ RAM. */ + if (r300->screen->caps.hiz_ram) { + if (tex->hiz_mem[level]) { + OUT_CS_REG(R300_ZB_HIZ_OFFSET, tex->hiz_mem[level]->ofs << 2); + OUT_CS_REG(R300_ZB_HIZ_PITCH, surf_pitch); + } else { + OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0); + OUT_CS_REG(R300_ZB_HIZ_PITCH, 0); + } + } + /* Z Mask RAM. (compressed zbuffer) */ + if (tex->zmask_mem[level]) { + OUT_CS_REG(R300_ZB_ZMASK_OFFSET, tex->zmask_mem[level]->ofs << 2); + OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf_pitch); + } else { + OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0); + OUT_CS_REG(R300_ZB_ZMASK_PITCH, 0); + } } - - /* Z Mask RAM. (compressed zbuffer) */ - OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0); - OUT_CS_REG(R300_ZB_ZMASK_PITCH, 0); } END_CS; @@ -394,8 +417,12 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) void r300_emit_hyperz_state(struct r300_context *r300, unsigned size, void *state) { + struct r300_hyperz_state *z = state; CS_LOCALS(r300); - WRITE_CS_TABLE(state, size); + if (z->flush) + WRITE_CS_TABLE(&z->cb_flush_begin, size); + else + WRITE_CS_TABLE(&z->cb_begin, size - 2); } void r300_emit_hyperz_end(struct r300_context *r300) @@ -403,9 +430,11 @@ void r300_emit_hyperz_end(struct r300_context *r300) struct r300_hyperz_state z = *(struct r300_hyperz_state*)r300->hyperz_state.state; + z.flush = 1; z.zb_bw_cntl = 0; z.zb_depthclearvalue = 0; z.sc_hyperz = R300_SC_HYPERZ_ADJ_2; + z.gb_z_peq_config = 0; r300_emit_hyperz_state(r300, r300->hyperz_state.size, &z); } @@ -907,6 +936,22 @@ void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_TABLE(data, 4); } } + + /* Emit flow control instructions. */ + if (code->num_fc_ops) { + + OUT_CS_REG(R300_VAP_PVS_FLOW_CNTL_OPC, code->fc_ops); + if (r300screen->caps.is_r500) { + OUT_CS_REG_SEQ(R500_VAP_PVS_FLOW_CNTL_ADDRS_LW_0, code->num_fc_ops * 2); + OUT_CS_TABLE(code->fc_op_addrs.r500, code->num_fc_ops * 2); + } else { + OUT_CS_REG_SEQ(R300_VAP_PVS_FLOW_CNTL_ADDRS_0, code->num_fc_ops); + OUT_CS_TABLE(code->fc_op_addrs.r300, code->num_fc_ops); + } + OUT_CS_REG_SEQ(R300_VAP_PVS_FLOW_CNTL_LOOP_INDEX_0, code->num_fc_ops); + OUT_CS_TABLE(code->fc_loop_index, code->num_fc_ops); + } + END_CS; } @@ -943,6 +988,111 @@ void r300_emit_viewport_state(struct r300_context* r300, END_CS; } +static void r300_emit_hiz_line_clear(struct r300_context *r300, int start, uint16_t count, uint32_t val) +{ + CS_LOCALS(r300); + BEGIN_CS(4); + OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_HIZ, 2); + OUT_CS(start); + OUT_CS(count); + OUT_CS(val); + END_CS; +} + +static void r300_emit_zmask_line_clear(struct r300_context *r300, int start, uint16_t count, uint32_t val) +{ + CS_LOCALS(r300); + BEGIN_CS(4); + OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_ZMASK, 2); + OUT_CS(start); + OUT_CS(count); + OUT_CS(val); + END_CS; +} + +#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) + +void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state) +{ + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + struct r300_hyperz_state *z = + (struct r300_hyperz_state*)r300->hyperz_state.state; + struct r300_screen* r300screen = r300->screen; + uint32_t stride, offset = 0, height, offset_shift; + struct r300_texture* tex; + int i; + + tex = r300_texture(fb->zsbuf->texture); + + offset = tex->hiz_mem[fb->zsbuf->level]->ofs; + stride = tex->desc.stride_in_pixels[fb->zsbuf->level]; + + /* convert from pixels to 4x4 blocks */ + stride = ALIGN_DIVUP(stride, 4); + + stride = ALIGN_DIVUP(stride, r300screen->caps.num_frag_pipes); + /* there are 4 blocks per dwords */ + stride = ALIGN_DIVUP(stride, 4); + + height = ALIGN_DIVUP(fb->zsbuf->height, 4); + + offset_shift = 2; + offset_shift += (r300screen->caps.num_frag_pipes / 2); + + for (i = 0; i < height; i++) { + offset = i * stride; + offset <<= offset_shift; + r300_emit_hiz_line_clear(r300, offset, stride, 0xffffffff); + } + z->current_func = -1; + + /* Mark the current zbuffer's hiz ram as in use. */ + tex->hiz_in_use[fb->zsbuf->level] = TRUE; +} + +void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state) +{ + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + struct r300_screen* r300screen = r300->screen; + uint32_t stride, offset = 0; + struct r300_texture* tex; + uint32_t i, height; + int mult, offset_shift; + + tex = r300_texture(fb->zsbuf->texture); + stride = tex->desc.stride_in_pixels[fb->zsbuf->level]; + + offset = tex->zmask_mem[fb->zsbuf->level]->ofs; + + if (r300->z_compression == RV350_Z_COMPRESS_88) + mult = 8; + else + mult = 4; + + height = ALIGN_DIVUP(fb->zsbuf->height, mult); + + offset_shift = 4; + offset_shift += (r300screen->caps.num_frag_pipes / 2); + stride = ALIGN_DIVUP(stride, r300screen->caps.num_frag_pipes); + + /* okay have width in pixels - divide by block width */ + stride = ALIGN_DIVUP(stride, mult); + /* have width in blocks - divide by number of fragment pipes screen width */ + /* 16 blocks per dword */ + stride = ALIGN_DIVUP(stride, 16); + + for (i = 0; i < height; i++) { + offset = i * stride; + offset <<= offset_shift; + r300_emit_zmask_line_clear(r300, offset, stride, 0x0);//0xffffffff); + } + + /* Mark the current zbuffer's zmask as in use. */ + tex->zmask_in_use[fb->zsbuf->level] = TRUE; +} + void r300_emit_ztop_state(struct r300_context* r300, unsigned size, void* state) { @@ -1062,6 +1212,17 @@ unsigned r300_get_num_dirty_dwords(struct r300_context *r300) return dwords; } +unsigned r300_get_num_cs_end_dwords(struct r300_context *r300) +{ + unsigned dwords = 0; + + /* Emitted in flush. */ + dwords += 26; /* emit_query_end */ + dwords += r300->hyperz_state.size + 2; /* emit_hyperz_end + zcache flush */ + + return dwords; +} + /* Emit all dirty state. */ void r300_emit_dirty_state(struct r300_context* r300) { diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 5d05039669..bae2525634 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -112,7 +112,11 @@ void r300_emit_texture_cache_inval(struct r300_context* r300, unsigned size, voi void r300_emit_invariant_state(struct r300_context *r300, unsigned size, void *state); +void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state); +void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state); + unsigned r300_get_num_dirty_dwords(struct r300_context *r300); +unsigned r300_get_num_cs_end_dwords(struct r300_context *r300); /* Emit all dirty state. */ void r300_emit_dirty_state(struct r300_context* r300); diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index ae7b5759e7..fe182b6615 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -43,14 +43,6 @@ static void r300_flush(struct pipe_context* pipe, u_upload_flush(r300->upload_vb); u_upload_flush(r300->upload_ib); - /* We probably need to flush Draw, but we may have been called from - * within Draw. This feels kludgy, but it might be the best thing. - * - * Of course, the best thing is to kill Draw with fire. :3 */ - if (r300->draw && !r300->draw->flushing) { - draw_flush(r300->draw); - } - if (r300->dirty_hw) { r300_emit_hyperz_end(r300); r300_emit_query_end(r300); diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index b145ded639..2a0c30620a 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -72,6 +72,11 @@ void r300_shader_read_fs_inputs(struct tgsi_shader_info* info, fs_inputs->wpos = i; break; + case TGSI_SEMANTIC_FACE: + assert(index == 0); + fs_inputs->face = i; + break; + default: fprintf(stderr, "r300: FP: Unknown input semantic: %i\n", info->input_semantic_name[i]); @@ -120,6 +125,9 @@ static void allocate_hardware_inputs( allocate(mydata, inputs->color[i], reg++); } } + if (inputs->face != ATTR_UNUSED) { + allocate(mydata, inputs->face, reg++); + } for (i = 0; i < ATTR_GENERIC_COUNT; i++) { if (inputs->generic[i] != ATTR_UNUSED) { allocate(mydata, inputs->generic[i], reg++); @@ -173,7 +181,7 @@ static void get_external_state( t = (struct r300_texture*)texstate->sampler_views[i]->base.texture; /* XXX this should probably take into account STR, not just S. */ - if (t->uses_pitch) { + if (t->desc.is_npot) { switch (s->state.wrap_s) { case PIPE_TEX_WRAP_REPEAT: state->unit[i].wrap_mode = RC_WRAP_REPEAT; @@ -248,13 +256,18 @@ static void r300_emit_fs_code_to_buffer( shader->cb_code_size = 19 + ((code->inst_end + 1) * 6) + - imm_count * 7; + imm_count * 7 + + code->int_constant_count * 2; NEW_CB(shader->cb_code, shader->cb_code_size); OUT_CB_REG(R500_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO); OUT_CB_REG(R500_US_PIXSIZE, code->max_temp_idx); OUT_CB_REG(R500_US_FC_CTRL, code->us_fc_ctrl); - OUT_CB_REG(R500_US_CODE_RANGE, + for(i = 0; i < code->int_constant_count; i++){ + OUT_CB_REG(R500_US_FC_INT_CONST_0 + (i * 4), + code->int_constants[i]); + } + OUT_CB_REG(R500_US_CODE_RANGE, R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(code->inst_end)); OUT_CB_REG(R500_US_CODE_OFFSET, 0); OUT_CB_REG(R500_US_CODE_ADDR, @@ -355,13 +368,14 @@ static void r300_translate_fragment_shader( { struct r300_fragment_program_compiler compiler; struct tgsi_to_rc ttr; - int wpos; + int wpos, face; unsigned i; tgsi_scan_shader(tokens, &shader->info); r300_shader_read_fs_inputs(&shader->info, &shader->inputs); wpos = shader->inputs.wpos; + face = shader->inputs.face; /* Setup the compiler. */ memset(&compiler, 0, sizeof(compiler)); @@ -378,7 +392,7 @@ static void r300_translate_fragment_shader( find_output_registers(&compiler, shader); if (compiler.Base.Debug) { - debug_printf("r300: Initial fragment program\n"); + DBG(r300, DBG_FP, "r300: Initial fragment program\n"); tgsi_dump(tokens, 0); } @@ -401,6 +415,10 @@ static void r300_translate_fragment_shader( rc_transform_fragment_wpos(&compiler.Base, wpos, wpos, TRUE); } + if (face != ATTR_UNUSED) { + rc_transform_fragment_face(&compiler.Base, face); + } + /* Invoke the compiler */ r3xx_compile_fragment_program(&compiler); @@ -413,7 +431,7 @@ static void r300_translate_fragment_shader( } if (compiler.Base.Error) { - fprintf(stderr, "r300 FP: Compiler Error:\n%sUsing a dummy shader" + DBG(r300, DBG_FP, "r300 FP: Compiler Error:\n%sUsing a dummy shader" " instead.\nIf there's an 'unknown opcode' message, please" " file a bug report and attach this log.\n", compiler.Base.ErrorMsg); diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c index e952895601..811b5646e1 100644 --- a/src/gallium/drivers/r300/r300_hyperz.c +++ b/src/gallium/drivers/r300/r300_hyperz.c @@ -25,21 +25,170 @@ #include "r300_hyperz.h" #include "r300_reg.h" #include "r300_fs.h" +#include "r300_winsys.h" +#include "util/u_format.h" +#include "util/u_mm.h" + +/* + HiZ rules - taken from various docs + 1. HiZ only works on depth values + 2. Cannot HiZ if stencil fail or zfail is !KEEP + 3. on R300/400, HiZ is disabled if depth test is EQUAL + 4. comparison changes without clears usually mean disabling HiZ +*/ /*****************************************************************************/ /* The HyperZ setup */ /*****************************************************************************/ +static bool r300_get_sc_hz_max(struct r300_context *r300) +{ + struct r300_dsa_state *dsa_state = r300->dsa_state.state; + int func = dsa_state->z_stencil_control & 0x7; + int ret = R300_SC_HYPERZ_MIN; + + if (func >= 4 && func <= 7) + ret = R300_SC_HYPERZ_MAX; + return ret; +} + +static bool r300_zfunc_same_direction(int func1, int func2) +{ + /* func1 is less/lessthan */ + if (func1 == 1 || func1 == 2) + if (func2 == 3 || func2 == 4 || func2 == 5) + return FALSE; + + if (func2 == 1 || func2 == 2) + if (func1 == 4 || func1 == 5) + return FALSE; + return TRUE; +} + +static int r300_get_hiz_min(struct r300_context *r300) +{ + struct r300_dsa_state *dsa_state = r300->dsa_state.state; + int func = dsa_state->z_stencil_control & 0x7; + int ret = R300_HIZ_MIN; + + if (func == 1 || func == 2) + ret = R300_HIZ_MAX; + return ret; +} + +static boolean r300_dsa_stencil_op_not_keep(struct pipe_stencil_state *s) +{ + if (s->enabled && (s->fail_op != PIPE_STENCIL_OP_KEEP || + s->zfail_op != PIPE_STENCIL_OP_KEEP)) + return TRUE; + return FALSE; +} + +static boolean r300_can_hiz(struct r300_context *r300) +{ + struct r300_dsa_state *dsa_state = r300->dsa_state.state; + struct pipe_depth_stencil_alpha_state *dsa = &dsa_state->dsa; + struct r300_screen* r300screen = r300->screen; + struct r300_hyperz_state *z = r300->hyperz_state.state; + + /* shader writes depth - no HiZ */ + if (r300_fragment_shader_writes_depth(r300_fs(r300))) /* (5) */ + return FALSE; + + if (r300->query_current) + return FALSE; + /* if stencil fail/zfail op is not KEEP */ + if (r300_dsa_stencil_op_not_keep(&dsa->stencil[0]) || + r300_dsa_stencil_op_not_keep(&dsa->stencil[1])) + return FALSE; + + if (dsa->depth.enabled) { + /* if depth func is EQUAL pre-r500 */ + if (dsa->depth.func == PIPE_FUNC_EQUAL && !r300screen->caps.is_r500) + return FALSE; + /* if depth func is NOTEQUAL */ + if (dsa->depth.func == PIPE_FUNC_NOTEQUAL) + return FALSE; + } + /* depth comparison function - if just cleared save and return okay */ + if (z->current_func == -1) { + int func = dsa_state->z_stencil_control & 0x7; + if (func != 0 && func != 7) + z->current_func = dsa_state->z_stencil_control & 0x7; + } else { + /* simple don't change */ + if (!r300_zfunc_same_direction(z->current_func, (dsa_state->z_stencil_control & 0x7))) { + DBG(r300, DBG_HYPERZ, "z func changed direction - disabling hyper-z %d -> %d\n", z->current_func, dsa_state->z_stencil_control); + return FALSE; + } + } + return TRUE; +} + static void r300_update_hyperz(struct r300_context* r300) { struct r300_hyperz_state *z = (struct r300_hyperz_state*)r300->hyperz_state.state; + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + struct r300_texture *zstex = + fb->zsbuf ? r300_texture(fb->zsbuf->texture) : NULL; + boolean zmask_in_use = FALSE; + boolean hiz_in_use = FALSE; + z->gb_z_peq_config = 0; z->zb_bw_cntl = 0; z->sc_hyperz = R300_SC_HYPERZ_ADJ_2; + z->flush = 0; - if (r300->cbzb_clear) + if (r300->cbzb_clear) { z->zb_bw_cntl |= R300_ZB_CB_CLEAR_CACHE_LINE_WRITE_ONLY; + return; + } + + if (!zstex) + return; + + if (!r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) + return; + + zmask_in_use = zstex->zmask_in_use[fb->zsbuf->level]; + hiz_in_use = zstex->hiz_in_use[fb->zsbuf->level]; + + /* Z fastfill. */ + if (zmask_in_use) { + z->zb_bw_cntl |= R300_FAST_FILL_ENABLE; /* | R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE;*/ + } + + /* Zbuffer compression. */ + if (zmask_in_use && r300->z_compression) { + z->zb_bw_cntl |= R300_RD_COMP_ENABLE; + if (r300->z_decomp_rd == false) + z->zb_bw_cntl |= R300_WR_COMP_ENABLE; + } + /* RV350 and up optimizations. */ + /* The section 10.4.9 in the docs is a lie. */ + if (r300->z_compression == RV350_Z_COMPRESS_88) + z->gb_z_peq_config |= R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8; + + if (hiz_in_use) { + bool can_hiz = r300_can_hiz(r300); + if (can_hiz) { + z->zb_bw_cntl |= R300_HIZ_ENABLE; + z->sc_hyperz |= R300_SC_HYPERZ_ENABLE; + z->sc_hyperz |= r300_get_sc_hz_max(r300); + z->zb_bw_cntl |= r300_get_hiz_min(r300); + } + } + + /* R500-specific features and optimizations. */ + if (r300->screen->caps.is_r500) { + z->zb_bw_cntl |= R500_HIZ_FP_EXP_BITS_3; + z->zb_bw_cntl |= + R500_HIZ_EQUAL_REJECT_ENABLE | + R500_PEQ_PACKING_ENABLE | + R500_COVERED_PTR_MASKING_ENABLE; + } } /*****************************************************************************/ @@ -126,15 +275,121 @@ static void r300_update_ztop(struct r300_context* r300) } else { ztop_state->z_buffer_top = R300_ZTOP_ENABLE; } - if (ztop_state->z_buffer_top != old_ztop) r300->ztop_state.dirty = TRUE; } +#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) + +static void r300_update_hiz_clear(struct r300_context *r300) +{ + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + uint32_t height; + + height = ALIGN_DIVUP(fb->zsbuf->height, 4); + r300->hiz_clear.size = height * 4; +} + +static void r300_update_zmask_clear(struct r300_context *r300) +{ + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + uint32_t height; + int mult; + + if (r300->z_compression == RV350_Z_COMPRESS_88) + mult = 8; + else + mult = 4; + + height = ALIGN_DIVUP(fb->zsbuf->height, mult); + + r300->zmask_clear.size = height * 4; +} + void r300_update_hyperz_state(struct r300_context* r300) { r300_update_ztop(r300); if (r300->hyperz_state.dirty) { r300_update_hyperz(r300); } + + if (r300->hiz_clear.dirty) { + r300_update_hiz_clear(r300); + } + if (r300->zmask_clear.dirty) { + r300_update_zmask_clear(r300); + } +} + +void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf) +{ + struct r300_texture *tex; + uint32_t zsize, ndw; + int level = surf->base.level; + + tex = r300_texture(surf->base.texture); + + if (tex->hiz_mem[level]) + return; + + zsize = tex->desc.layer_size_in_bytes[level]; + zsize /= util_format_get_blocksize(tex->desc.b.b.format); + ndw = ALIGN_DIVUP(zsize, 64); + + tex->hiz_mem[level] = u_mmAllocMem(r300->hiz_mm, ndw, 0, 0); + return; +} + +void r300_zmask_alloc_block(struct r300_context *r300, struct r300_surface *surf, int compress) +{ + int bsize = 256; + uint32_t zsize, ndw; + int level = surf->base.level; + struct r300_texture *tex; + + tex = r300_texture(surf->base.texture); + + /* We currently don't handle decompression for 3D textures and cubemaps + * correctly. */ + if (tex->desc.b.b.target != PIPE_TEXTURE_1D && + tex->desc.b.b.target != PIPE_TEXTURE_2D) + return; + + if (tex->zmask_mem[level]) + return; + + zsize = tex->desc.layer_size_in_bytes[level]; + zsize /= util_format_get_blocksize(tex->desc.b.b.format); + + /* each zmask dword represents 16 4x4 blocks - which is 256 pixels + or 16 8x8 depending on the gb peq flag = 1024 pixels */ + if (compress == RV350_Z_COMPRESS_88) + bsize = 1024; + + ndw = ALIGN_DIVUP(zsize, bsize); + tex->zmask_mem[level] = u_mmAllocMem(r300->zmask_mm, ndw, 0, 0); + return; +} + +void r300_hyperz_init_mm(struct r300_context *r300) +{ + struct r300_screen* r300screen = r300->screen; + int frag_pipes = r300screen->caps.num_frag_pipes; + + if (r300screen->caps.hiz_ram) + r300->hiz_mm = u_mmInit(0, r300screen->caps.hiz_ram * frag_pipes); + + r300->zmask_mm = u_mmInit(0, r300screen->caps.zmask_ram * frag_pipes); +} + +void r300_hyperz_destroy_mm(struct r300_context *r300) +{ + struct r300_screen* r300screen = r300->screen; + + if (r300screen->caps.hiz_ram) + u_mmDestroy(r300->hiz_mm); + + u_mmDestroy(r300->zmask_mm); } diff --git a/src/gallium/drivers/r300/r300_hyperz.h b/src/gallium/drivers/r300/r300_hyperz.h index 3df5053b89..09e1ff6625 100644 --- a/src/gallium/drivers/r300/r300_hyperz.h +++ b/src/gallium/drivers/r300/r300_hyperz.h @@ -27,4 +27,9 @@ struct r300_context; void r300_update_hyperz_state(struct r300_context* r300); +void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf); +void r300_zmask_alloc_block(struct r300_context *r300, struct r300_surface *surf, int compress); + +void r300_hyperz_init_mm(struct r300_context *r300); +void r300_hyperz_destroy_mm(struct r300_context *r300); #endif diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index 2acc1a903e..60d3b600cb 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -496,6 +496,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R300_VAP_GB_HORZ_CLIP_ADJ 0x2228 #define R300_VAP_GB_HORZ_DISC_ADJ 0x222c +#define R300_VAP_PVS_FLOW_CNTL_ADDRS_0 0x2230 +#define R300_PVS_FC_ACT_ADRS(x) ((x) << 0) +#define R300_PVS_FC_LOOP_CNT_JMP_INST(x) ((x) << 8) +#define R300_PVS_FC_LAST_INST(x) ((x) << 16) +#define R300_PVS_FC_RTN_INST(x) ((x) << 24) + /* gap */ /* Sometimes, END_OF_PKT and 0x2284=0 are the only commands sent between @@ -514,6 +520,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_2288_R300 0x00750000 /* -- nh */ # define R300_2288_RV350 0x0000FFFF /* -- Vladimir */ +#define R300_VAP_PVS_FLOW_CNTL_LOOP_INDEX_0 0x2290 +#define R300_PVS_FC_LOOP_INIT_VAL(x) ((x) << 0) +#define R300_PVS_FC_LOOP_STEP_VAL(x) ((x) << 8) + /* gap */ /* Addresses are relative to the vertex program instruction area of the @@ -548,6 +558,9 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R300_VAP_PVS_CODE_CNTL_1 0x22D8 # define R300_PVS_LAST_VTX_SRC_INST_SHIFT 0 #define R300_VAP_PVS_FLOW_CNTL_OPC 0x22DC +#define R300_VAP_PVS_FC_OPC_JUMP(x) (1 << (2 * (x))) +#define R300_VAP_PVS_FC_OPC_LOOP(x) (2 << (2 * (x))) +#define R300_VAP_PVS_FC_OPC_JSR(x) (3 << (2 * (x))) /* The entire range from 0x2300 to 0x2AC inclusive seems to be used for * immediate vertices @@ -564,6 +577,14 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* write 0 to indicate end of packet? */ #define R300_VAP_VTX_END_OF_PKT 0x24AC +#define R500_VAP_PVS_FLOW_CNTL_ADDRS_LW_0 0x2500 +#define R500_PVS_FC_ACT_ADRS(x) ((x) << 0) +#define R500_PVS_FC_LOOP_CNT_JMP_INST(x) ((x) << 16) + +#define R500_VAP_PVS_FLOW_CNTL_ADDRS_UW_0 0x2504 +#define R500_PVS_FC_LAST_INST(x) ((x) << 0) +#define R500_PVS_FC_RTN_INST(x) ((x) << 16) + /* gap */ /* These are values from r300_reg/r300_reg.h - they are known to be correct @@ -3436,6 +3457,7 @@ enum { # define R300_VBPNTR_SIZE1(x) (((x) >> 2) << 16) # define R300_VBPNTR_STRIDE1(x) (((x) >> 2) << 24) +#define R300_PACKET3_3D_CLEAR_ZMASK 0x00003200 #define R300_PACKET3_INDX_BUFFER 0x00003300 # define R300_INDX_BUFFER_DST_SHIFT 0 # define R300_INDX_BUFFER_SKIP_SHIFT 16 diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index bae02135da..86b11ca045 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -186,20 +186,14 @@ enum r300_prepare_flags { * \param cs_dwords The number of dwords to reserve in CS. * \param aos_offset The offset passed to emit_aos. * \param index_bias The index bias to emit. - * \param end_cs_dwords The number of free dwords which must be available - * at the end of CS after drawing in case the CS space - * management is performed by a draw_* function manually. - * The parameter may be NULL. */ static void r300_prepare_for_rendering(struct r300_context *r300, enum r300_prepare_flags flags, struct pipe_resource *index_buffer, unsigned cs_dwords, int aos_offset, - int index_bias, - unsigned *end_cs_dwords) + int index_bias) { - unsigned end_dwords = 0; boolean flushed = FALSE; boolean first_draw = flags & PREP_FIRST_DRAW; boolean emit_aos = flags & PREP_EMIT_AOS; @@ -221,11 +215,7 @@ static void r300_prepare_for_rendering(struct r300_context *r300, cs_dwords += 7; /* emit_aos_swtcl */ } - /* Emitted in flush. */ - end_dwords += 26; /* emit_query_end */ - end_dwords += r300->hyperz_state.size; /* emit_hyperz_end */ - - cs_dwords += end_dwords; + cs_dwords += r300_get_num_cs_end_dwords(r300); /* Reserve requested CS space. */ if (cs_dwords > (r300->cs->ndw - r300->cs->cdw)) { @@ -250,9 +240,6 @@ static void r300_prepare_for_rendering(struct r300_context *r300, if (emit_aos_swtcl) r300_emit_aos_swtcl(r300, indexed); } - - if (end_cs_dwords) - *end_cs_dwords = end_dwords; } static boolean immd_is_good_idea(struct r300_context *r300, @@ -353,7 +340,7 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300, dwords = 9 + count * vertex_size; - r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0, NULL); + r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0); BEGIN_CS(dwords); OUT_CS_REG(R300_GA_COLOR_CONTROL, @@ -533,7 +520,7 @@ static void r300_draw_range_elements(struct pipe_context* pipe, /* 15 dwords for emit_draw_elements */ r300_prepare_for_rendering(r300, PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS | PREP_INDEXED, - indexBuffer, 15, buffer_offset, indexBias, NULL); + indexBuffer, 15, buffer_offset, indexBias); if (alt_num_verts || count <= 65535) { r300_emit_draw_elements(r300, indexBuffer, indexSize, @@ -552,7 +539,7 @@ static void r300_draw_range_elements(struct pipe_context* pipe, if (count) { r300_prepare_for_rendering(r300, PREP_VALIDATE_VBOS | PREP_EMIT_AOS | PREP_INDEXED, - indexBuffer, 15, buffer_offset, indexBias, NULL); + indexBuffer, 15, buffer_offset, indexBias); } } while (count); } @@ -566,19 +553,6 @@ static void r300_draw_range_elements(struct pipe_context* pipe, } } -/* Simple helpers for context setup. Should probably be moved to util. */ -static void r300_draw_elements(struct pipe_context* pipe, - struct pipe_resource* indexBuffer, - unsigned indexSize, int indexBias, unsigned mode, - unsigned start, unsigned count) -{ - struct r300_context *r300 = r300_context(pipe); - - pipe->draw_range_elements(pipe, indexBuffer, indexSize, indexBias, - 0, r300->vertex_buffer_max_index, - mode, start, count); -} - static void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, unsigned start, unsigned count) { @@ -610,7 +584,7 @@ static void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, } else { /* 9 spare dwords for emit_draw_arrays. */ r300_prepare_for_rendering(r300, PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS, - NULL, 9, start, 0, NULL); + NULL, 9, start, 0); if (alt_num_verts || count <= 65535) { r300_emit_draw_arrays(r300, mode, count); @@ -626,7 +600,7 @@ static void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, if (count) { r300_prepare_for_rendering(r300, PREP_VALIDATE_VBOS | PREP_EMIT_AOS, NULL, 9, - start, 0, NULL); + start, 0); } } while (count); } @@ -638,111 +612,104 @@ static void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, } } -/**************************************************************************** - * The rest of this file is for SW TCL rendering only. Please be polite and * - * keep these functions separated so that they are easier to locate. ~C. * - ***************************************************************************/ - -/* SW TCL arrays, using Draw. */ -static void r300_swtcl_draw_arrays(struct pipe_context* pipe, - unsigned mode, - unsigned start, - unsigned count) +static void r300_draw_vbo(struct pipe_context* pipe, + const struct pipe_draw_info *info) { struct r300_context* r300 = r300_context(pipe); - struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS]; - int i; - if (r300->skip_rendering) { - return; + if (info->indexed && r300->index_buffer.buffer) { + unsigned offset; + + assert(r300->index_buffer.offset % r300->index_buffer.index_size == 0); + offset = r300->index_buffer.offset / r300->index_buffer.index_size; + + r300_draw_range_elements(pipe, + r300->index_buffer.buffer, + r300->index_buffer.index_size, + info->index_bias, + info->min_index, + info->max_index, + info->mode, + info->start + offset, + info->count); } - - if (!u_trim_pipe_prim(mode, &count)) { - return; - } - - r300_update_derived_state(r300); - - for (i = 0; i < r300->vertex_buffer_count; i++) { - void* buf = pipe_buffer_map(pipe, - r300->vertex_buffer[i].buffer, - PIPE_TRANSFER_READ, - &vb_transfer[i]); - draw_set_mapped_vertex_buffer(r300->draw, i, buf); - } - - draw_set_mapped_element_buffer(r300->draw, 0, 0, NULL); - - draw_arrays(r300->draw, mode, start, count); - - /* XXX Not sure whether this is the best fix. - * It prevents CS from being rejected and weird assertion failures. */ - draw_flush(r300->draw); - - for (i = 0; i < r300->vertex_buffer_count; i++) { - pipe_buffer_unmap(pipe, r300->vertex_buffer[i].buffer, - vb_transfer[i]); - draw_set_mapped_vertex_buffer(r300->draw, i, NULL); + else { + r300_draw_arrays(pipe, + info->mode, + info->start, + info->count); } } +/**************************************************************************** + * The rest of this file is for SW TCL rendering only. Please be polite and * + * keep these functions separated so that they are easier to locate. ~C. * + ***************************************************************************/ + /* SW TCL elements, using Draw. */ -static void r300_swtcl_draw_range_elements(struct pipe_context* pipe, - struct pipe_resource* indexBuffer, - unsigned indexSize, - int indexBias, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count) +static void r300_swtcl_draw_vbo(struct pipe_context* pipe, + const struct pipe_draw_info *info) { struct r300_context* r300 = r300_context(pipe); struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS]; - struct pipe_transfer *ib_transfer; + struct pipe_transfer *ib_transfer = NULL; + unsigned count = info->count; int i; - void* indices; + void* indices = NULL; if (r300->skip_rendering) { return; } - if (!u_trim_pipe_prim(mode, &count)) { + if (!u_trim_pipe_prim(info->mode, &count)) { return; } r300_update_derived_state(r300); for (i = 0; i < r300->vertex_buffer_count; i++) { - void* buf = pipe_buffer_map(pipe, - r300->vertex_buffer[i].buffer, - PIPE_TRANSFER_READ, - &vb_transfer[i]); - draw_set_mapped_vertex_buffer(r300->draw, i, buf); + if (r300->vertex_buffer[i].buffer) { + void *buf = pipe_buffer_map(pipe, + r300->vertex_buffer[i].buffer, + PIPE_TRANSFER_READ, + &vb_transfer[i]); + draw_set_mapped_vertex_buffer(r300->draw, i, buf); + } } - indices = pipe_buffer_map(pipe, indexBuffer, - PIPE_TRANSFER_READ, &ib_transfer); - draw_set_mapped_element_buffer_range(r300->draw, indexSize, indexBias, - minIndex, maxIndex, indices); + if (info->indexed && r300->index_buffer.buffer) { + indices = pipe_buffer_map(pipe, r300->index_buffer.buffer, + PIPE_TRANSFER_READ, &ib_transfer); + if (indices) + indices = (void *) ((char *) indices + r300->index_buffer.offset); + } - draw_arrays(r300->draw, mode, start, count); + draw_set_mapped_element_buffer_range(r300->draw, (indices) ? + r300->index_buffer.index_size : 0, + info->index_bias, + info->min_index, + info->max_index, + indices); + + draw_arrays(r300->draw, info->mode, info->start, count); /* XXX Not sure whether this is the best fix. * It prevents CS from being rejected and weird assertion failures. */ draw_flush(r300->draw); for (i = 0; i < r300->vertex_buffer_count; i++) { - pipe_buffer_unmap(pipe, r300->vertex_buffer[i].buffer, - vb_transfer[i]); - draw_set_mapped_vertex_buffer(r300->draw, i, NULL); + if (r300->vertex_buffer[i].buffer) { + pipe_buffer_unmap(pipe, r300->vertex_buffer[i].buffer, + vb_transfer[i]); + draw_set_mapped_vertex_buffer(r300->draw, i, NULL); + } } - pipe_buffer_unmap(pipe, indexBuffer, - ib_transfer); - draw_set_mapped_element_buffer_range(r300->draw, 0, 0, - start, start + count - 1, - NULL); + if (ib_transfer) { + pipe_buffer_unmap(pipe, r300->index_buffer.buffer, ib_transfer); + draw_set_mapped_element_buffer_range(r300->draw, 0, 0, info->start, + info->start + count - 1, NULL); + } } /* Object for rendering using Draw. */ @@ -820,6 +787,8 @@ static void* r300_render_map_vertices(struct vbuf_render* render) PIPE_TRANSFER_WRITE, &r300render->vbo_transfer); + assert(r300render->vbo_ptr); + return ((uint8_t*)r300render->vbo_ptr + r300render->vbo_offset); } @@ -872,7 +841,7 @@ static void r300_render_draw_arrays(struct vbuf_render* render, (void) i; (void) ptr; r300_prepare_for_rendering(r300, PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL, - NULL, dwords, 0, 0, NULL); + NULL, dwords, 0, 0); DBG(r300, DBG_DRAW, "r300: render_draw_arrays (count: %d)\n", count); @@ -925,7 +894,8 @@ static void r300_render_draw_elements(struct vbuf_render* render, * indices than it can fit in CS. */ r300_prepare_for_rendering(r300, PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL | PREP_INDEXED, - NULL, 256, 0, 0, &end_cs_dwords); + NULL, 256, 0, 0); + end_cs_dwords = r300_get_num_cs_end_dwords(r300); while (count) { free_dwords = r300->cs->ndw - r300->cs->cdw; @@ -955,7 +925,8 @@ static void r300_render_draw_elements(struct vbuf_render* render, if (count) { r300_prepare_for_rendering(r300, PREP_EMIT_AOS_SWTCL | PREP_INDEXED, - NULL, 256, 0, 0, &end_cs_dwords); + NULL, 256, 0, 0); + end_cs_dwords = r300_get_num_cs_end_dwords(r300); } } } @@ -1049,7 +1020,7 @@ static void r300_blitter_draw_rectangle(struct blitter_context *blitter, r300->clip_state.dirty = FALSE; r300->viewport_state.dirty = FALSE; - r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0, NULL); + r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0); DBG(r300, DBG_DRAW, "r300: draw_rectangle\n"); @@ -1141,16 +1112,11 @@ static void r300_resource_resolve(struct pipe_context* pipe, void r300_init_render_functions(struct r300_context *r300) { - /* Set generic functions. */ - r300->context.draw_elements = r300_draw_elements; - /* Set draw functions based on presence of HW TCL. */ if (r300->screen->caps.has_tcl) { - r300->context.draw_arrays = r300_draw_arrays; - r300->context.draw_range_elements = r300_draw_range_elements; + r300->context.draw_vbo = r300_draw_vbo; } else { - r300->context.draw_arrays = r300_swtcl_draw_arrays; - r300->context.draw_range_elements = r300_swtcl_draw_range_elements; + r300->context.draw_vbo = r300_swtcl_draw_vbo; } r300->context.resource_resolve = r300_resource_resolve; diff --git a/src/gallium/drivers/r300/r300_render_stencilref.c b/src/gallium/drivers/r300/r300_render_stencilref.c index 9a6b4e12ff..1f035d64a2 100644 --- a/src/gallium/drivers/r300/r300_render_stencilref.c +++ b/src/gallium/drivers/r300/r300_render_stencilref.c @@ -34,13 +34,8 @@ #include "r300_reg.h" struct r300_stencilref_context { - void (*draw_arrays)(struct pipe_context *pipe, - unsigned mode, unsigned start, unsigned count); - - void (*draw_range_elements)( - struct pipe_context *pipe, struct pipe_resource *indexBuffer, - unsigned indexSize, int indexBias, unsigned minIndex, unsigned maxIndex, - unsigned mode, unsigned start, unsigned count); + void (*draw_vbo)(struct pipe_context *pipe, + const struct pipe_draw_info *info); uint32_t rs_cull_mode; uint32_t zb_stencilrefmask; @@ -105,41 +100,19 @@ static void r300_stencilref_end(struct r300_context *r300) r300->dsa_state.dirty = TRUE; } -static void r300_stencilref_draw_arrays(struct pipe_context *pipe, unsigned mode, - unsigned start, unsigned count) -{ - struct r300_context *r300 = r300_context(pipe); - struct r300_stencilref_context *sr = r300->stencilref_fallback; - - if (!r300_stencilref_needed(r300)) { - sr->draw_arrays(pipe, mode, start, count); - } else { - r300_stencilref_begin(r300); - sr->draw_arrays(pipe, mode, start, count); - r300_stencilref_switch_side(r300); - sr->draw_arrays(pipe, mode, start, count); - r300_stencilref_end(r300); - } -} - -static void r300_stencilref_draw_range_elements( - struct pipe_context *pipe, struct pipe_resource *indexBuffer, - unsigned indexSize, int indexBias, unsigned minIndex, unsigned maxIndex, - unsigned mode, unsigned start, unsigned count) +static void r300_stencilref_draw_vbo(struct pipe_context *pipe, + const struct pipe_draw_info *info) { struct r300_context *r300 = r300_context(pipe); struct r300_stencilref_context *sr = r300->stencilref_fallback; if (!r300_stencilref_needed(r300)) { - sr->draw_range_elements(pipe, indexBuffer, indexSize, indexBias, - minIndex, maxIndex, mode, start, count); + sr->draw_vbo(pipe, info); } else { r300_stencilref_begin(r300); - sr->draw_range_elements(pipe, indexBuffer, indexSize, indexBias, - minIndex, maxIndex, mode, start, count); + sr->draw_vbo(pipe, info); r300_stencilref_switch_side(r300); - sr->draw_range_elements(pipe, indexBuffer, indexSize, indexBias, - minIndex, maxIndex, mode, start, count); + sr->draw_vbo(pipe, info); r300_stencilref_end(r300); } } @@ -148,11 +121,9 @@ void r300_plug_in_stencil_ref_fallback(struct r300_context *r300) { r300->stencilref_fallback = CALLOC_STRUCT(r300_stencilref_context); - /* Save original draw functions. */ - r300->stencilref_fallback->draw_arrays = r300->context.draw_arrays; - r300->stencilref_fallback->draw_range_elements = r300->context.draw_range_elements; + /* Save original draw function. */ + r300->stencilref_fallback->draw_vbo = r300->context.draw_vbo; - /* Override the draw functions. */ - r300->context.draw_arrays = r300_stencilref_draw_arrays; - r300->context.draw_range_elements = r300_stencilref_draw_range_elements; + /* Override the draw function. */ + r300->context.draw_vbo = r300_stencilref_draw_vbo; } diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 5a11b98eb6..1e4edcdbc3 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -115,7 +115,6 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_MIRROR_REPEAT: case PIPE_CAP_BLEND_EQUATION_SEPARATE: case PIPE_CAP_TEXTURE_SWIZZLE: - case PIPE_CAP_DEPTH_CLAMP: return 1; /* Unsupported features (boolean caps). */ @@ -124,6 +123,8 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_TGSI_CONT_SUPPORTED: case PIPE_CAP_INDEP_BLEND_ENABLE: case PIPE_CAP_INDEP_BLEND_FUNC: + case PIPE_CAP_DEPTH_CLAMP: /* XXX implemented, but breaks Regnum Online */ + case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: return 0; /* Texturing. */ @@ -150,9 +151,6 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_MAX_CONST_BUFFER_SIZE: return 256; - case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: - return 1; - /* Fragment coordinate conventions. */ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: @@ -257,8 +255,6 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, uint32_t retval = 0; boolean is_r500 = r300_screen(screen)->caps.is_r500; boolean is_r400 = r300_screen(screen)->caps.is_r400; - boolean is_z24 = format == PIPE_FORMAT_X8Z24_UNORM || - format == PIPE_FORMAT_S8_USCALED_Z24_UNORM; boolean is_color2101010 = format == PIPE_FORMAT_R10G10B10A2_UNORM || format == PIPE_FORMAT_R10G10B10X2_SNORM || format == PIPE_FORMAT_B10G10R10A2_UNORM || @@ -281,11 +277,14 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, case 3: case 4: case 6: + return FALSE; +#if 0 if (usage != PIPE_BIND_RENDER_TARGET || !util_format_is_rgba8_variant( util_format_description(format))) { return FALSE; } +#endif break; default: return FALSE; @@ -293,8 +292,6 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, /* Check sampler format support. */ if ((usage & PIPE_BIND_SAMPLER_VIEW) && - /* Z24 cannot be sampled from on non-r5xx. */ - (is_r500 || !is_z24) && /* ATI1N is r5xx-only. */ (is_r500 || !is_ati1n) && /* ATI2N is supported on r4xx-r5xx. */ diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index edc494ff6c..13a3320b99 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -90,6 +90,8 @@ r300_winsys_screen(struct pipe_screen *screen) { #define DBG_FALL (1 << 8) #define DBG_FB (1 << 9) #define DBG_RS_BLOCK (1 << 10) +#define DBG_CBZB (1 << 11) +#define DBG_HYPERZ (1 << 12) /* Features. */ #define DBG_ANISOHQ (1 << 16) #define DBG_NO_TILING (1 << 17) diff --git a/src/gallium/drivers/r300/r300_shader_semantics.h b/src/gallium/drivers/r300/r300_shader_semantics.h index cb7a37033f..4be23e64ce 100644 --- a/src/gallium/drivers/r300/r300_shader_semantics.h +++ b/src/gallium/drivers/r300/r300_shader_semantics.h @@ -38,6 +38,7 @@ struct r300_shader_semantics { int psize; int color[ATTR_COLOR_COUNT]; int bcolor[ATTR_COLOR_COUNT]; + int face; int generic[ATTR_GENERIC_COUNT]; int fog; int wpos; @@ -50,6 +51,7 @@ static INLINE void r300_shader_semantics_reset( info->pos = ATTR_UNUSED; info->psize = ATTR_UNUSED; + info->face = ATTR_UNUSED; info->fog = ATTR_UNUSED; info->wpos = ATTR_UNUSED; diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index f52265b1c0..239edd98e3 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -25,6 +25,7 @@ #include "util/u_blitter.h" #include "util/u_math.h" +#include "util/u_mm.h" #include "util/u_memory.h" #include "util/u_pack_color.h" @@ -43,6 +44,7 @@ #include "r300_texture.h" #include "r300_vs.h" #include "r300_winsys.h" +#include "r300_hyperz.h" /* r300_state: Functions used to intialize state context by translating * Gallium state objects into semi-native r300 state objects. */ @@ -446,7 +448,6 @@ static void r300_set_clip_state(struct pipe_context* pipe, r300->clip_state.dirty = TRUE; } else { - draw_flush(r300->draw); draw_set_clip_state(r300->draw, state); } } @@ -473,14 +474,14 @@ static void* dsa->dsa = *state; - /* Depth test setup. */ + /* Depth test setup. - separate write mask depth for decomp flush */ + if (state->depth.writemask) { + dsa->z_buffer_control |= R300_Z_WRITE_ENABLE; + } + if (state->depth.enabled) { dsa->z_buffer_control |= R300_Z_ENABLE; - if (state->depth.writemask) { - dsa->z_buffer_control |= R300_Z_WRITE_ENABLE; - } - dsa->z_stencil_control |= (r300_translate_depth_stencil_function(state->depth.func) << R300_Z_FUNC_SHIFT); @@ -593,6 +594,7 @@ static void r300_bind_dsa_state(struct pipe_context* pipe, UPDATE_STATE(state, r300->dsa_state); + r300->hyperz_state.dirty = TRUE; /* Will be updated before the emission. */ r300_dsa_inject_stencilref(r300); } @@ -619,7 +621,8 @@ static void r300_tex_set_tiling_flags(struct r300_context *r300, { /* Check if the macrotile flag needs to be changed. * Skip changing the flags otherwise. */ - if (tex->mip_macrotile[tex->surface_level] != tex->mip_macrotile[level]) { + if (tex->desc.macrotile[tex->surface_level] != + tex->desc.macrotile[level]) { /* Tiling determines how DRM treats the buffer data. * We must flush CS when changing it if the buffer is referenced. */ if (r300->rws->cs_is_buffer_referenced(r300->cs, @@ -627,8 +630,8 @@ static void r300_tex_set_tiling_flags(struct r300_context *r300, r300->context.flush(&r300->context, 0, NULL); r300->rws->buffer_set_tiling(r300->rws, tex->buffer, - tex->microtile, tex->mip_macrotile[level], - tex->pitch[0] * util_format_get_blocksize(tex->b.b.format)); + tex->desc.microtile, tex->desc.macrotile[level], + tex->desc.stride_in_bytes[0]); tex->surface_level = level; } @@ -670,8 +673,10 @@ static void r300_print_fb_surf_info(struct pipe_surface *surf, unsigned index, surf->zslice, surf->face, surf->level, util_format_short_name(surf->format), - rtex->macrotile ? "YES" : " NO", rtex->microtile ? "YES" : " NO", - rtex->hwpitch[0], tex->width0, tex->height0, tex->depth0, + rtex->desc.macrotile[0] ? "YES" : " NO", + rtex->desc.microtile ? "YES" : " NO", + rtex->desc.stride_in_pixels[0], + tex->width0, tex->height0, tex->depth0, tex->last_level, util_format_short_name(tex->format)); } @@ -679,6 +684,7 @@ void r300_mark_fb_state_dirty(struct r300_context *r300, enum r300_fb_state_change change) { struct pipe_framebuffer_state *state = r300->fb_state.state; + boolean has_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); /* What is marked as dirty depends on the enum r300_fb_state_change. */ r300->gpu_flush.dirty = TRUE; @@ -695,8 +701,11 @@ void r300_mark_fb_state_dirty(struct r300_context *r300, if (r300->cbzb_clear) r300->fb_state.size += 10; - else if (state->zsbuf) - r300->fb_state.size += r300->screen->caps.has_hiz ? 18 : 14; + else if (state->zsbuf) { + r300->fb_state.size += 10; + if (has_hyperz) + r300->fb_state.size += r300->screen->caps.hiz_ram ? 8 : 4; + } /* The size of the rest of atoms stays the same. */ } @@ -708,8 +717,10 @@ static void struct r300_context* r300 = r300_context(pipe); struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state; struct pipe_framebuffer_state *old_state = r300->fb_state.state; + boolean has_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); unsigned max_width, max_height, i; uint32_t zbuffer_bpp = 0; + int blocksize; if (r300->screen->caps.is_r500) { max_width = max_height = 4096; @@ -725,10 +736,6 @@ static void return; } - if (r300->draw) { - draw_flush(r300->draw); - } - /* If nr_cbufs is changed from zero to non-zero or vice versa... */ if (!!old_state->nr_cbufs != !!state->nr_cbufs) { r300->blend_state.dirty = TRUE; @@ -745,20 +752,50 @@ static void r300_mark_fb_state_dirty(r300, R300_CHANGED_FB_STATE); - /* Polygon offset depends on the zbuffer bit depth. */ - if (state->zsbuf && r300->polygon_offset_enabled) { - switch (util_format_get_blocksize(state->zsbuf->texture->format)) { - case 2: - zbuffer_bpp = 16; - break; - case 4: - zbuffer_bpp = 24; - break; + r300->z_compression = false; + + if (state->zsbuf) { + blocksize = util_format_get_blocksize(state->zsbuf->texture->format); + switch (blocksize) { + case 2: + zbuffer_bpp = 16; + break; + case 4: + zbuffer_bpp = 24; + break; + } + if (has_hyperz) { + struct r300_surface *zs_surf = r300_surface(state->zsbuf); + struct r300_texture *tex; + int compress = r300->screen->caps.is_rv350 ? RV350_Z_COMPRESS_88 : R300_Z_COMPRESS_44; + int level = zs_surf->base.level; + + tex = r300_texture(zs_surf->base.texture); + + /* work out whether we can support hiz on this buffer */ + r300_hiz_alloc_block(r300, zs_surf); + + /* work out whether we can support zmask features on this buffer */ + r300_zmask_alloc_block(r300, zs_surf, compress); + + if (tex->zmask_mem[level]) { + /* compression causes hangs on 16-bit */ + if (zbuffer_bpp == 24) + r300->z_compression = compress; + } + DBG(r300, DBG_HYPERZ, + "hyper-z features: hiz: %d @ %08x z-compression: %d z-fastfill: %d @ %08x\n", tex->hiz_mem[level] ? 1 : 0, + tex->hiz_mem[level] ? tex->hiz_mem[level]->ofs : 0xdeadbeef, + r300->z_compression, tex->zmask_mem[level] ? 1 : 0, + tex->zmask_mem[level] ? tex->zmask_mem[level]->ofs : 0xdeadbeef); } + /* Polygon offset depends on the zbuffer bit depth. */ if (r300->zbuffer_bpp != zbuffer_bpp) { r300->zbuffer_bpp = zbuffer_bpp; - r300->rs_state.dirty = TRUE; + + if (r300->polygon_offset_enabled) + r300->rs_state.dirty = TRUE; } } @@ -1016,7 +1053,7 @@ static void* r300_create_rs_state(struct pipe_context* pipe, for (i = 0; i < 8; i++) { if (state->sprite_coord_enable & (1 << i)) stuffing_enable |= - R300_GB_TEX_STR << (R300_GB_TEX0_SOURCE_SHIFT + (i*2)); + R300_GB_TEX_ST << (R300_GB_TEX0_SOURCE_SHIFT + (i*2)); } point_texcoord_left = 0.0f; @@ -1093,14 +1130,11 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) boolean last_two_sided_color = r300->two_sided_color; if (r300->draw && rs) { - draw_flush(r300->draw); draw_set_rasterizer_state(r300->draw, &rs->rs_draw, state); } if (rs) { - r300->polygon_offset_enabled = (rs->rs.offset_point || - rs->rs.offset_line || - rs->rs.offset_tri); + r300->polygon_offset_enabled = rs->polygon_offset_enable; r300->sprite_coord_enable = rs->rs.sprite_coord_enable; r300->two_sided_color = rs->rs.light_twoside; } else { @@ -1293,7 +1327,7 @@ static void r300_set_fragment_sampler_views(struct pipe_context* pipe, /* Set the texrect factor in the fragment shader. * Needed for RECT and NPOT fallback. */ texture = r300_texture(views[i]->texture); - if (texture->uses_pitch) { + if (texture->desc.is_npot) { r300->fs_rc_constant_state.dirty = TRUE; } @@ -1327,6 +1361,7 @@ r300_create_sampler_view(struct pipe_context *pipe, { struct r300_sampler_view *view = CALLOC_STRUCT(r300_sampler_view); struct r300_texture *tex = r300_texture(texture); + boolean is_r500 = r300_screen(pipe->screen)->caps.is_r500; if (view) { view->base = *templ; @@ -1342,8 +1377,9 @@ r300_create_sampler_view(struct pipe_context *pipe, view->format = tex->tx_format; view->format.format1 |= r300_translate_texformat(templ->format, - view->swizzle); - if (r300_screen(pipe->screen)->caps.is_r500) { + view->swizzle, + is_r500); + if (is_r500) { view->format.format2 |= r500_tx_format_msb_bit(templ->format); } } @@ -1380,7 +1416,6 @@ static void r300_set_viewport_state(struct pipe_context* pipe, r300->viewport = *state; if (r300->draw) { - draw_flush(r300->draw); draw_set_viewport_state(r300->draw, state); viewport->vte_control = R300_VTX_XY_FMT | R300_VTX_Z_FMT; return; @@ -1481,7 +1516,6 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe, } else { /* SW TCL. */ - draw_flush(r300->draw); draw_set_vertex_buffers(r300->draw, count, buffers); } @@ -1500,6 +1534,23 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe, r300->vertex_buffer_count = count; } +static void r300_set_index_buffer(struct pipe_context* pipe, + const struct pipe_index_buffer *ib) +{ + struct r300_context* r300 = r300_context(pipe); + + if (ib) { + pipe_resource_reference(&r300->index_buffer.buffer, ib->buffer); + memcpy(&r300->index_buffer, ib, sizeof(r300->index_buffer)); + } + else { + pipe_resource_reference(&r300->index_buffer.buffer, NULL); + memset(&r300->index_buffer, 0, sizeof(r300->index_buffer)); + } + + /* TODO make this more like a state */ +} + /* Initialize the PSC tables. */ static void r300_vertex_psc(struct r300_vertex_element_state *velems) { @@ -1649,7 +1700,6 @@ static void r300_bind_vertex_elements_state(struct pipe_context *pipe, r300->velems = velems; if (r300->draw) { - draw_flush(r300->draw); draw_set_vertex_elements(r300->draw, velems->count, velems->velem); return; } @@ -1701,10 +1751,12 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader) r300->rs_block_state.dirty = TRUE; /* Will be updated before the emission. */ if (r300->screen->caps.has_tcl) { + unsigned fc_op_dwords = r300->screen->caps.is_r500 ? 3 : 2; r300->vs_state.dirty = TRUE; r300->vs_state.size = vs->code.length + 9 + - (vs->immediates_count ? vs->immediates_count * 4 + 3 : 0); + (vs->immediates_count ? vs->immediates_count * 4 + 3 : 0) + + (vs->code.num_fc_ops ? vs->code.num_fc_ops * fc_op_dwords + 4 : 0); if (vs->externals_count) { r300->vs_constants.dirty = TRUE; @@ -1715,7 +1767,6 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader) r300->pvs_flush.dirty = TRUE; } else { - draw_flush(r300->draw); draw_bind_vertex_shader(r300->draw, (struct draw_vertex_shader*)vs->draw_vs); } @@ -1847,6 +1898,7 @@ void r300_init_state_functions(struct r300_context* r300) r300->context.set_viewport_state = r300_set_viewport_state; r300->context.set_vertex_buffers = r300_set_vertex_buffers; + r300->context.set_index_buffer = r300_set_index_buffer; r300->context.create_vertex_elements_state = r300_create_vertex_elements_state; r300->context.bind_vertex_elements_state = r300_bind_vertex_elements_state; diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 2ef9766578..4a63ed7fc1 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -46,6 +46,11 @@ enum r300_rs_swizzle { SWIZ_0001, }; +enum r300_rs_col_write_type { + WRITE_COLOR = 0, + WRITE_FACE +}; + static void r300_draw_emit_attrib(struct r300_context* r300, enum attrib_emit emit, enum interp_mode interp, @@ -203,8 +208,10 @@ static void r300_rs_col(struct r300_rs_block* rs, int id, int ptr, rs->inst[id] |= R300_RS_INST_COL_ID(id); } -static void r300_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset) +static void r300_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset, + enum r300_rs_col_write_type type) { + assert(type != WRITE_COLOR); rs->inst[id] |= R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(fp_offset); } @@ -213,19 +220,19 @@ static void r300_rs_tex(struct r300_rs_block* rs, int id, int ptr, enum r300_rs_swizzle swiz) { if (swiz == SWIZ_X001) { - rs->ip[id] |= R300_RS_TEX_PTR(ptr*4) | + rs->ip[id] |= R300_RS_TEX_PTR(ptr) | R300_RS_SEL_S(R300_RS_SEL_C0) | R300_RS_SEL_T(R300_RS_SEL_K0) | R300_RS_SEL_R(R300_RS_SEL_K0) | R300_RS_SEL_Q(R300_RS_SEL_K1); } else if (swiz == SWIZ_XY01) { - rs->ip[id] |= R300_RS_TEX_PTR(ptr*4) | + rs->ip[id] |= R300_RS_TEX_PTR(ptr) | R300_RS_SEL_S(R300_RS_SEL_C0) | R300_RS_SEL_T(R300_RS_SEL_C1) | R300_RS_SEL_R(R300_RS_SEL_K0) | R300_RS_SEL_Q(R300_RS_SEL_K1); } else { - rs->ip[id] |= R300_RS_TEX_PTR(ptr*4) | + rs->ip[id] |= R300_RS_TEX_PTR(ptr) | R300_RS_SEL_S(R300_RS_SEL_C0) | R300_RS_SEL_T(R300_RS_SEL_C1) | R300_RS_SEL_R(R300_RS_SEL_C2) | @@ -252,32 +259,36 @@ static void r500_rs_col(struct r300_rs_block* rs, int id, int ptr, rs->inst[id] |= R500_RS_INST_COL_ID(id); } -static void r500_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset) +static void r500_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset, + enum r300_rs_col_write_type type) { - rs->inst[id] |= R500_RS_INST_COL_CN_WRITE | - R500_RS_INST_COL_ADDR(fp_offset); + if (type == WRITE_FACE) + rs->inst[id] |= R500_RS_INST_COL_CN_WRITE_BACKFACE | + R500_RS_INST_COL_ADDR(fp_offset); + else + rs->inst[id] |= R500_RS_INST_COL_CN_WRITE | + R500_RS_INST_COL_ADDR(fp_offset); + } static void r500_rs_tex(struct r300_rs_block* rs, int id, int ptr, enum r300_rs_swizzle swiz) { - int rs_tex_comp = ptr*4; - if (swiz == SWIZ_X001) { - rs->ip[id] |= R500_RS_SEL_S(rs_tex_comp) | + rs->ip[id] |= R500_RS_SEL_S(ptr) | R500_RS_SEL_T(R500_RS_IP_PTR_K0) | R500_RS_SEL_R(R500_RS_IP_PTR_K0) | R500_RS_SEL_Q(R500_RS_IP_PTR_K1); } else if (swiz == SWIZ_XY01) { - rs->ip[id] |= R500_RS_SEL_S(rs_tex_comp) | - R500_RS_SEL_T(rs_tex_comp + 1) | + rs->ip[id] |= R500_RS_SEL_S(ptr) | + R500_RS_SEL_T(ptr + 1) | R500_RS_SEL_R(R500_RS_IP_PTR_K0) | R500_RS_SEL_Q(R500_RS_IP_PTR_K1); } else { - rs->ip[id] |= R500_RS_SEL_S(rs_tex_comp) | - R500_RS_SEL_T(rs_tex_comp + 1) | - R500_RS_SEL_R(rs_tex_comp + 2) | - R500_RS_SEL_Q(rs_tex_comp + 3); + rs->ip[id] |= R500_RS_SEL_S(ptr) | + R500_RS_SEL_T(ptr + 1) | + R500_RS_SEL_R(ptr + 2) | + R500_RS_SEL_Q(ptr + 3); } rs->inst[id] |= R500_RS_INST_TEX_ID(id); } @@ -305,9 +316,9 @@ static void r300_update_rs_block(struct r300_context *r300) struct r300_shader_semantics *vs_outputs = &vs->outputs; struct r300_shader_semantics *fs_inputs = &r300_fs(r300)->shader->inputs; struct r300_rs_block rs = {0}; - int i, col_count = 0, tex_count = 0, fp_offset = 0, count, loc = 0; + int i, col_count = 0, tex_count = 0, fp_offset = 0, count, loc = 0, tex_ptr = 0; void (*rX00_rs_col)(struct r300_rs_block*, int, int, enum r300_rs_swizzle); - void (*rX00_rs_col_write)(struct r300_rs_block*, int, int); + void (*rX00_rs_col_write)(struct r300_rs_block*, int, int, enum r300_rs_col_write_type); void (*rX00_rs_tex)(struct r300_rs_block*, int, int, enum r300_rs_swizzle); void (*rX00_rs_tex_write)(struct r300_rs_block*, int, int); boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED || @@ -326,6 +337,11 @@ static void r300_update_rs_block(struct r300_context *r300) rX00_rs_tex_write = r300_rs_tex_write; } + /* 0x5555 copied from classic, which means: + * Select user color 0 for COLOR0 up to COLOR7. + * What the hell does that mean? */ + rs.vap_vtx_state_cntl = 0x5555; + /* The position is always present in VAP. */ rs.vap_vsm_vtx_assm |= R300_INPUT_CNTL_POS; rs.vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT; @@ -352,7 +368,7 @@ static void r300_update_rs_block(struct r300_context *r300) /* Write it to the FS input register if it's needed by the FS. */ if (fs_inputs->color[i] != ATTR_UNUSED) { - rX00_rs_col_write(&rs, col_count, fp_offset); + rX00_rs_col_write(&rs, col_count, fp_offset, WRITE_COLOR); fp_offset++; DBG(r300, DBG_RS, @@ -393,12 +409,31 @@ static void r300_update_rs_block(struct r300_context *r300) stream_loc_notcl[loc++] = 6 + tex_count; /* Rasterize it. */ - rX00_rs_tex(&rs, tex_count, tex_count, SWIZ_XYZW); + rX00_rs_tex(&rs, tex_count, tex_ptr, SWIZ_XYZW); tex_count++; + tex_ptr += 4; } } } + /* gl_FrontFacing. + * Note that we can use either the two-sided color selection based on + * the front and back vertex shader colors, or gl_FrontFacing, + * but not both! It locks up otherwise. + * + * In Direct3D 9, the two-sided color selection can be used + * with shaders 2.0 only, while gl_FrontFacing can be used + * with shaders 3.0 only. The hardware apparently hasn't been designed + * to support both at the same time. */ + if (r300->screen->caps.is_r500 && fs_inputs->face != ATTR_UNUSED && + !(any_bcolor_used && r300->two_sided_color)) { + rX00_rs_col(&rs, col_count, col_count, SWIZ_XYZW); + rX00_rs_col_write(&rs, col_count, fp_offset, WRITE_FACE); + fp_offset++; + col_count++; + DBG(r300, DBG_RS, "r300: Rasterized FACE written to FS.\n"); + } + /* Rasterize texture coordinates. */ for (i = 0; i < ATTR_GENERIC_COUNT && tex_count < 8; i++) { bool sprite_coord = !!(r300->sprite_coord_enable & (1 << i)); @@ -412,7 +447,7 @@ static void r300_update_rs_block(struct r300_context *r300) } /* Rasterize it. */ - rX00_rs_tex(&rs, tex_count, tex_count, + rX00_rs_tex(&rs, tex_count, tex_ptr, sprite_coord ? SWIZ_XY01 : SWIZ_XYZW); /* Write it to the FS input register if it's needed by the FS. */ @@ -429,6 +464,7 @@ static void r300_update_rs_block(struct r300_context *r300) i, sprite_coord ? " (sprite coord)" : ""); } tex_count++; + tex_ptr += sprite_coord ? 2 : 4; } else { /* Skip the FS input register, leave it uninitialized. */ /* If we try to set it to (0,0,0,1), it will lock up. */ @@ -449,7 +485,7 @@ static void r300_update_rs_block(struct r300_context *r300) stream_loc_notcl[loc++] = 6 + tex_count; /* Rasterize it. */ - rX00_rs_tex(&rs, tex_count, tex_count, SWIZ_X001); + rX00_rs_tex(&rs, tex_count, tex_ptr, SWIZ_X001); /* Write it to the FS input register if it's needed by the FS. */ if (fs_inputs->fog != ATTR_UNUSED) { @@ -461,6 +497,7 @@ static void r300_update_rs_block(struct r300_context *r300) DBG(r300, DBG_RS, "r300: Rasterized fog unused.\n"); } tex_count++; + tex_ptr += 4; } else { /* Skip the FS input register, leave it uninitialized. */ /* If we try to set it to (0,0,0,1), it will lock up. */ @@ -480,7 +517,7 @@ static void r300_update_rs_block(struct r300_context *r300) stream_loc_notcl[loc++] = 6 + tex_count; /* Rasterize it. */ - rX00_rs_tex(&rs, tex_count, tex_count, SWIZ_XYZW); + rX00_rs_tex(&rs, tex_count, tex_ptr, SWIZ_XYZW); /* Write it to the FS input register. */ rX00_rs_tex_write(&rs, tex_count, fp_offset); @@ -489,6 +526,7 @@ static void r300_update_rs_block(struct r300_context *r300) fp_offset++; tex_count++; + tex_ptr += 4; } /* Invalidate the rest of the no-TCL (GA) stream locations. */ @@ -507,7 +545,7 @@ static void r300_update_rs_block(struct r300_context *r300) DBG(r300, DBG_RS, "r300: --- Rasterizer status ---: colors: %i, " "generics: %i.\n", col_count, tex_count); - rs.count = (tex_count*4) | (col_count << R300_IC_COUNT_SHIFT) | + rs.count = MIN2(tex_ptr, 32) | (col_count << R300_IC_COUNT_SHIFT) | R300_HIRES_EN; count = MAX3(col_count, tex_count, 1); @@ -528,15 +566,9 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) struct r300_sampler_state *sampler; struct r300_sampler_view *view; struct r300_texture *tex; - unsigned min_level, max_level, i, size; + unsigned min_level, max_level, i, j, size; unsigned count = MIN2(state->sampler_view_count, state->sampler_state_count); - unsigned char depth_swizzle[4] = { - UTIL_FORMAT_SWIZZLE_X, - UTIL_FORMAT_SWIZZLE_X, - UTIL_FORMAT_SWIZZLE_X, - UTIL_FORMAT_SWIZZLE_X - }; /* The KIL opcode fix, see below. */ if (!count && !r300->screen->caps.is_r500) @@ -563,14 +595,29 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) /* Assign a texture cache region. */ texstate->format.format1 |= view->texcache_region; - /* If compare mode is disabled, the sampler view swizzles - * are stored in the format. - * Otherwise, swizzles must be applied after the compare mode - * in the fragment shader. */ - if (util_format_is_depth_or_stencil(tex->b.b.format)) { + /* Depth textures are kinda special. */ + if (util_format_is_depth_or_stencil(tex->desc.b.b.format)) { + unsigned char depth_swizzle[4]; + + if (!r300->screen->caps.is_r500 && + util_format_get_blocksizebits(tex->desc.b.b.format) == 32) { + /* X24x8 is sampled as Y16X16 on r3xx-r4xx. + * The depth here is at the Y component. */ + for (j = 0; j < 4; j++) + depth_swizzle[j] = UTIL_FORMAT_SWIZZLE_Y; + } else { + for (j = 0; j < 4; j++) + depth_swizzle[j] = UTIL_FORMAT_SWIZZLE_X; + } + + /* If compare mode is disabled, sampler view swizzles + * are stored in the format. + * Otherwise, the swizzles must be applied after the compare + * mode in the fragment shader. */ if (sampler->state.compare_mode == PIPE_TEX_COMPARE_NONE) { texstate->format.format1 |= - r300_get_swizzle_combined(depth_swizzle, view->swizzle); + r300_get_swizzle_combined(depth_swizzle, + view->swizzle); } else { texstate->format.format1 |= r300_get_swizzle_combined(depth_swizzle, 0); @@ -578,12 +625,12 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) } /* to emulate 1D textures through 2D ones correctly */ - if (tex->b.b.target == PIPE_TEXTURE_1D) { + if (tex->desc.b.b.target == PIPE_TEXTURE_1D) { texstate->filter0 &= ~R300_TX_WRAP_T_MASK; texstate->filter0 |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE); } - if (tex->uses_pitch) { + if (tex->desc.is_npot) { /* NPOT textures don't support mip filter, unfortunately. * This prevents incorrect rendering. */ texstate->filter0 &= ~R300_TX_MIN_FILTER_MIP_MASK; @@ -610,7 +657,7 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) /* determine min/max levels */ /* the MAX_MIP level is the largest (finest) one */ max_level = MIN3(sampler->max_lod + view->base.first_level, - tex->b.b.last_level, view->base.last_level); + tex->desc.b.b.last_level, view->base.last_level); min_level = MIN2(sampler->min_lod + view->base.first_level, max_level); texstate->format.format0 |= R300_TX_NUM_LEVELS(max_level); @@ -665,8 +712,44 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) } } +/* We can't use compressed zbuffers as samplers. */ +static void r300_flush_depth_textures(struct r300_context *r300) +{ + struct r300_textures_state *state = + (struct r300_textures_state*)r300->textures_state.state; + unsigned i, level; + unsigned count = MIN2(state->sampler_view_count, + state->sampler_state_count); + + if (r300->z_decomp_rd) + return; + + for (i = 0; i < count; i++) + if (state->sampler_views[i] && state->sampler_states[i]) { + struct pipe_resource *tex = state->sampler_views[i]->base.texture; + + if (tex->target == PIPE_TEXTURE_3D || + tex->target == PIPE_TEXTURE_CUBE) + continue; + + /* Ignore non-depth textures. + * Also ignore reinterpreted depth textures, e.g. resource_copy. */ + if (!util_format_is_depth_or_stencil(tex->format)) + continue; + + for (level = 0; level <= tex->last_level; level++) + if (r300_texture(tex)->zmask_in_use[level]) { + /* We don't handle 3D textures and cubemaps yet. */ + r300_flush_depth_stencil(&r300->context, tex, + u_subresource(0, level), 0); + } + } +} + void r300_update_derived_state(struct r300_context* r300) { + r300_flush_depth_textures(r300); + if (r300->textures_state.dirty) { r300_merge_textures_and_samplers(r300); } diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 5750bc4329..da8eadd3b5 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -26,6 +26,7 @@ #include "r300_context.h" #include "r300_reg.h" +#include "r300_texture_desc.h" #include "r300_transfer.h" #include "r300_screen.h" #include "r300_winsys.h" @@ -34,14 +35,10 @@ #include "util/u_format_s3tc.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_mm.h" #include "pipe/p_screen.h" -enum r300_dim { - DIM_WIDTH = 0, - DIM_HEIGHT = 1 -}; - unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format, const unsigned char *swizzle_view) { @@ -109,7 +106,8 @@ unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format, * The FORMAT specifies how the texture sampler will treat the texture, and * makes available X, Y, Z, W, ZERO, and ONE for swizzling. */ uint32_t r300_translate_texformat(enum pipe_format format, - const unsigned char *swizzle_view) + const unsigned char *swizzle_view, + boolean is_r500) { uint32_t result = 0; const struct util_format_description *desc; @@ -134,7 +132,10 @@ uint32_t r300_translate_texformat(enum pipe_format format, return R300_TX_FORMAT_X16; case PIPE_FORMAT_X8Z24_UNORM: case PIPE_FORMAT_S8_USCALED_Z24_UNORM: - return R500_TX_FORMAT_Y8X24; + if (is_r500) + return R500_TX_FORMAT_Y8X24; + else + return R300_TX_FORMAT_Y16X16; default: return ~0; /* Unsupported. */ } @@ -537,26 +538,27 @@ boolean r300_is_zs_format_supported(enum pipe_format format) boolean r300_is_sampler_format_supported(enum pipe_format format) { - return r300_translate_texformat(format, 0) != ~0; + return r300_translate_texformat(format, 0, TRUE) != ~0; } static void r300_texture_setup_immutable_state(struct r300_screen* screen, struct r300_texture* tex) { struct r300_texture_format_state* f = &tex->tx_format; - struct pipe_resource *pt = &tex->b.b; + struct pipe_resource *pt = &tex->desc.b.b; boolean is_r500 = screen->caps.is_r500; /* Set sampler state. */ f->format0 = R300_TX_WIDTH((pt->width0 - 1) & 0x7ff) | R300_TX_HEIGHT((pt->height0 - 1) & 0x7ff); - if (tex->uses_pitch) { + if (tex->desc.uses_stride_addressing) { /* rectangles love this */ f->format0 |= R300_TX_PITCH_EN; - f->format2 = (tex->hwpitch[0] - 1) & 0x1fff; + f->format2 = (tex->desc.stride_in_pixels[0] - 1) & 0x1fff; } else { - /* power of two textures (3D, mipmaps, and no pitch) */ + /* Power of two textures (3D, mipmaps, and no pitch), + * also NPOT textures with a width being POT. */ f->format0 |= R300_TX_DEPTH(util_logbase2(pt->depth0) & 0xf); } @@ -579,8 +581,8 @@ static void r300_texture_setup_immutable_state(struct r300_screen* screen, } } - f->tile_config = R300_TXO_MACRO_TILE(tex->macrotile) | - R300_TXO_MICRO_TILE(tex->microtile); + f->tile_config = R300_TXO_MACRO_TILE(tex->desc.macrotile[0]) | + R300_TXO_MICRO_TILE(tex->desc.microtile); } static void r300_texture_setup_fb_state(struct r300_screen* screen, @@ -589,23 +591,23 @@ static void r300_texture_setup_fb_state(struct r300_screen* screen, unsigned i; /* Set framebuffer state. */ - if (util_format_is_depth_or_stencil(tex->b.b.format)) { - for (i = 0; i <= tex->b.b.last_level; i++) { + if (util_format_is_depth_or_stencil(tex->desc.b.b.format)) { + for (i = 0; i <= tex->desc.b.b.last_level; i++) { tex->fb_state.pitch[i] = - tex->hwpitch[i] | - R300_DEPTHMACROTILE(tex->mip_macrotile[i]) | - R300_DEPTHMICROTILE(tex->microtile); + tex->desc.stride_in_pixels[i] | + R300_DEPTHMACROTILE(tex->desc.macrotile[i]) | + R300_DEPTHMICROTILE(tex->desc.microtile); } - tex->fb_state.format = r300_translate_zsformat(tex->b.b.format); + tex->fb_state.format = r300_translate_zsformat(tex->desc.b.b.format); } else { - for (i = 0; i <= tex->b.b.last_level; i++) { + for (i = 0; i <= tex->desc.b.b.last_level; i++) { tex->fb_state.pitch[i] = - tex->hwpitch[i] | - r300_translate_colorformat(tex->b.b.format) | - R300_COLOR_TILE(tex->mip_macrotile[i]) | - R300_COLOR_MICROTILE(tex->microtile); + tex->desc.stride_in_pixels[i] | + r300_translate_colorformat(tex->desc.b.b.format) | + R300_COLOR_TILE(tex->desc.macrotile[i]) | + R300_COLOR_MICROTILE(tex->desc.microtile); } - tex->fb_state.format = r300_translate_out_fmt(tex->b.b.format); + tex->fb_state.format = r300_translate_out_fmt(tex->desc.b.b.format); } } @@ -625,282 +627,6 @@ void r300_texture_reinterpret_format(struct pipe_screen *screen, r300_texture_setup_fb_state(r300_screen(screen), r300_texture(tex)); } -unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level, - unsigned zslice, unsigned face) -{ - unsigned offset = tex->offset[level]; - - switch (tex->b.b.target) { - case PIPE_TEXTURE_3D: - assert(face == 0); - return offset + zslice * tex->layer_size[level]; - - case PIPE_TEXTURE_CUBE: - assert(zslice == 0); - return offset + face * tex->layer_size[level]; - - default: - assert(zslice == 0 && face == 0); - return offset; - } -} - -/* Returns the number of pixels that the texture should be aligned to - * in the given dimension. */ -static unsigned r300_get_pixel_alignment(struct r300_texture *tex, - enum r300_buffer_tiling macrotile, - enum r300_dim dim) -{ - static const unsigned table[2][5][3][2] = - { - { - /* Macro: linear linear linear - Micro: linear tiled square-tiled */ - {{ 32, 1}, { 8, 4}, { 0, 0}}, /* 8 bits per pixel */ - {{ 16, 1}, { 8, 2}, { 4, 4}}, /* 16 bits per pixel */ - {{ 8, 1}, { 4, 2}, { 0, 0}}, /* 32 bits per pixel */ - {{ 4, 1}, { 0, 0}, { 2, 2}}, /* 64 bits per pixel */ - {{ 2, 1}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */ - }, - { - /* Macro: tiled tiled tiled - Micro: linear tiled square-tiled */ - {{256, 8}, {64, 32}, { 0, 0}}, /* 8 bits per pixel */ - {{128, 8}, {64, 16}, {32, 32}}, /* 16 bits per pixel */ - {{ 64, 8}, {32, 16}, { 0, 0}}, /* 32 bits per pixel */ - {{ 32, 8}, { 0, 0}, {16, 16}}, /* 64 bits per pixel */ - {{ 16, 8}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */ - } - }; - static const unsigned aa_block[2] = {4, 8}; - unsigned res = 0; - unsigned pixsize = util_format_get_blocksize(tex->b.b.format); - - assert(macrotile <= R300_BUFFER_TILED); - assert(tex->microtile <= R300_BUFFER_SQUARETILED); - assert(pixsize <= 16); - assert(dim <= DIM_HEIGHT); - - if (tex->b.b.nr_samples > 1) { - /* Multisampled textures have their own alignment scheme. */ - if (pixsize == 4) - res = aa_block[dim]; - } else { - /* Standard alignment. */ - res = table[macrotile][util_logbase2(pixsize)][tex->microtile][dim]; - } - - assert(res); - return res; -} - -/* Return true if macrotiling should be enabled on the miplevel. */ -static boolean r300_texture_macro_switch(struct r300_texture *tex, - unsigned level, - boolean rv350_mode, - enum r300_dim dim) -{ - unsigned tile, texdim; - - tile = r300_get_pixel_alignment(tex, R300_BUFFER_TILED, dim); - if (dim == DIM_WIDTH) { - texdim = u_minify(tex->b.b.width0, level); - } else { - texdim = u_minify(tex->b.b.height0, level); - } - - /* See TX_FILTER1_n.MACRO_SWITCH. */ - if (rv350_mode) { - return texdim >= tile; - } else { - return texdim > tile; - } -} - -/** - * Return the stride, in bytes, of the texture images of the given texture - * at the given level. - */ -unsigned r300_texture_get_stride(struct r300_screen* screen, - struct r300_texture* tex, unsigned level) -{ - unsigned tile_width, width, stride; - - if (tex->stride_override) - return tex->stride_override; - - /* Check the level. */ - if (level > tex->b.b.last_level) { - SCREEN_DBG(screen, DBG_TEX, "%s: level (%u) > last_level (%u)\n", - __FUNCTION__, level, tex->b.b.last_level); - return 0; - } - - width = u_minify(tex->b.b.width0, level); - - if (util_format_is_plain(tex->b.b.format)) { - tile_width = r300_get_pixel_alignment(tex, tex->mip_macrotile[level], - DIM_WIDTH); - width = align(width, tile_width); - - stride = util_format_get_stride(tex->b.b.format, width); - - /* Some IGPs need a minimum stride of 64 bytes, hmm... - * This doesn't seem to apply to tiled textures, according to r300c. */ - if (!tex->microtile && !tex->mip_macrotile[level] && - (screen->caps.family == CHIP_FAMILY_RS600 || - screen->caps.family == CHIP_FAMILY_RS690 || - screen->caps.family == CHIP_FAMILY_RS740)) { - return stride < 64 ? 64 : stride; - } - - /* The alignment to 32 bytes is sort of implied by the layout... */ - return stride; - } else { - return align(util_format_get_stride(tex->b.b.format, width), 32); - } -} - -static unsigned r300_texture_get_nblocksy(struct r300_texture* tex, - unsigned level) -{ - unsigned height, tile_height; - - height = u_minify(tex->b.b.height0, level); - - if (util_format_is_plain(tex->b.b.format)) { - tile_height = r300_get_pixel_alignment(tex, tex->mip_macrotile[level], - DIM_HEIGHT); - height = align(height, tile_height); - - /* This is needed for the kernel checker, unfortunately. */ - height = util_next_power_of_two(height); - } - - return util_format_get_nblocksy(tex->b.b.format, height); -} - -static void r300_texture_3d_fix_mipmapping(struct r300_screen *screen, - struct r300_texture *tex) -{ - /* The kernels <= 2.6.34-rc4 compute the size of mipmapped 3D textures - * incorrectly. This is a workaround to prevent CS from being rejected. */ - - unsigned i, size; - - if (!screen->rws->get_value(screen->rws, R300_VID_DRM_2_3_0) && - tex->b.b.target == PIPE_TEXTURE_3D && - tex->b.b.last_level > 0) { - size = 0; - - for (i = 0; i <= tex->b.b.last_level; i++) { - size += r300_texture_get_stride(screen, tex, i) * - r300_texture_get_nblocksy(tex, i); - } - - size *= tex->b.b.depth0; - tex->size = size; - } -} - -static void r300_setup_miptree(struct r300_screen* screen, - struct r300_texture* tex) -{ - struct pipe_resource* base = &tex->b.b; - unsigned stride, size, layer_size, nblocksy, i; - boolean rv350_mode = screen->caps.is_rv350; - - SCREEN_DBG(screen, DBG_TEXALLOC, - "r300: Making miptree for texture, format %s\n", - util_format_short_name(base->format)); - - for (i = 0; i <= base->last_level; i++) { - /* Let's see if this miplevel can be macrotiled. */ - tex->mip_macrotile[i] = - (tex->macrotile == R300_BUFFER_TILED && - r300_texture_macro_switch(tex, i, rv350_mode, DIM_WIDTH) && - r300_texture_macro_switch(tex, i, rv350_mode, DIM_HEIGHT)) ? - R300_BUFFER_TILED : R300_BUFFER_LINEAR; - - stride = r300_texture_get_stride(screen, tex, i); - nblocksy = r300_texture_get_nblocksy(tex, i); - layer_size = stride * nblocksy; - - if (base->nr_samples) { - layer_size *= base->nr_samples; - } - - if (base->target == PIPE_TEXTURE_CUBE) - size = layer_size * 6; - else - size = layer_size * u_minify(base->depth0, i); - - tex->offset[i] = tex->size; - tex->size = tex->offset[i] + size; - tex->layer_size[i] = layer_size; - tex->pitch[i] = stride / util_format_get_blocksize(base->format); - tex->hwpitch[i] = - tex->pitch[i] * util_format_get_blockwidth(base->format); - - SCREEN_DBG(screen, DBG_TEXALLOC, "r300: Texture miptree: Level %d " - "(%dx%dx%d px, pitch %d bytes) %d bytes total, macrotiled %s\n", - i, u_minify(base->width0, i), u_minify(base->height0, i), - u_minify(base->depth0, i), stride, tex->size, - tex->mip_macrotile[i] ? "TRUE" : "FALSE"); - } -} - -static void r300_setup_flags(struct r300_texture* tex) -{ - tex->uses_pitch = !util_is_power_of_two(tex->b.b.width0) || - !util_is_power_of_two(tex->b.b.height0) || - tex->stride_override; -} - -static void r300_setup_tiling(struct pipe_screen *screen, - struct r300_texture *tex) -{ - struct r300_winsys_screen *rws = (struct r300_winsys_screen *)screen->winsys; - enum pipe_format format = tex->b.b.format; - boolean rv350_mode = r300_screen(screen)->caps.is_rv350; - boolean is_zb = util_format_is_depth_or_stencil(format); - boolean dbg_no_tiling = SCREEN_DBG_ON(r300_screen(screen), DBG_NO_TILING); - - if (!util_format_is_plain(format)) { - return; - } - - /* If height == 1, disable microtiling except for zbuffer. */ - if (!is_zb && (tex->b.b.height0 == 1 || dbg_no_tiling)) { - return; - } - - /* Set microtiling. */ - switch (util_format_get_blocksize(format)) { - case 1: - case 4: - tex->microtile = R300_BUFFER_TILED; - break; - - case 2: - case 8: - if (rws->get_value(rws, R300_VID_SQUARE_TILING_SUPPORT)) { - tex->microtile = R300_BUFFER_SQUARETILED; - } - break; - } - - if (dbg_no_tiling) { - return; - } - - /* Set macrotiling. */ - if (r300_texture_macro_switch(tex, 0, rv350_mode, DIM_WIDTH) && - r300_texture_macro_switch(tex, 0, rv350_mode, DIM_HEIGHT)) { - tex->macrotile = R300_BUFFER_TILED; - } -} - static unsigned r300_texture_is_referenced(struct pipe_context *context, struct pipe_resource *texture, unsigned face, unsigned level) @@ -920,8 +646,16 @@ static void r300_texture_destroy(struct pipe_screen *screen, { struct r300_texture* tex = (struct r300_texture*)texture; struct r300_winsys_screen *rws = (struct r300_winsys_screen *)texture->screen->winsys; + int i; rws->buffer_reference(rws, &tex->buffer, NULL); + for (i = 0; i < R300_MAX_TEXTURE_LEVELS; i++) { + if (tex->hiz_mem[i]) + u_mmFreeMem(tex->hiz_mem[i]); + if (tex->zmask_mem[i]) + u_mmFreeMem(tex->zmask_mem[i]); + } + FREE(tex); } @@ -937,11 +671,10 @@ static boolean r300_texture_get_handle(struct pipe_screen* screen, } return rws->buffer_get_handle(rws, tex->buffer, - r300_texture_get_stride(r300_screen(screen), tex, 0), - whandle); + tex->desc.stride_in_bytes[0], whandle); } -struct u_resource_vtbl r300_texture_vtbl = +struct u_resource_vtbl r300_texture_vtbl = { r300_texture_get_handle, /* get_handle */ r300_texture_destroy, /* resource_destroy */ @@ -954,17 +687,69 @@ struct u_resource_vtbl r300_texture_vtbl = u_default_transfer_inline_write /* transfer_inline_write */ }; -/* Create a new texture. */ -struct pipe_resource* r300_texture_create(struct pipe_screen* screen, - const struct pipe_resource* base) +/* The common texture constructor. */ +static struct r300_texture* +r300_texture_create_object(struct r300_screen *rscreen, + const struct pipe_resource *base, + enum r300_buffer_tiling microtile, + enum r300_buffer_tiling macrotile, + unsigned stride_in_bytes_override, + unsigned max_buffer_size, + struct r300_winsys_buffer *buffer) { - struct r300_texture* tex = CALLOC_STRUCT(r300_texture); - struct r300_screen* rscreen = r300_screen(screen); - struct r300_winsys_screen *rws = (struct r300_winsys_screen *)screen->winsys; - + struct r300_winsys_screen *rws = rscreen->rws; + struct r300_texture *tex = CALLOC_STRUCT(r300_texture); if (!tex) { + if (buffer) + rws->buffer_reference(rws, &buffer, NULL); + return NULL; + } + + /* Initialize the descriptor. */ + if (!r300_texture_desc_init(rscreen, &tex->desc, base, + microtile, macrotile, + stride_in_bytes_override, + max_buffer_size)) { + if (buffer) + rws->buffer_reference(rws, &buffer, NULL); + FREE(tex); return NULL; } + /* Initialize the hardware state. */ + r300_texture_setup_immutable_state(rscreen, tex); + r300_texture_setup_fb_state(rscreen, tex); + + tex->desc.b.vtbl = &r300_texture_vtbl; + pipe_reference_init(&tex->desc.b.b.reference, 1); + tex->domain = base->flags & R300_RESOURCE_FLAG_TRANSFER ? + R300_DOMAIN_GTT : + R300_DOMAIN_VRAM | R300_DOMAIN_GTT; + tex->buffer = buffer; + + /* Create the backing buffer if needed. */ + if (!tex->buffer) { + tex->buffer = rws->buffer_create(rws, tex->desc.size_in_bytes, 2048, + base->bind, base->usage, tex->domain); + + if (!tex->buffer) { + FREE(tex); + return NULL; + } + } + + rws->buffer_set_tiling(rws, tex->buffer, + tex->desc.microtile, tex->desc.macrotile[0], + tex->desc.stride_in_bytes[0]); + + return tex; +} + +/* Create a new texture. */ +struct pipe_resource *r300_texture_create(struct pipe_screen *screen, + const struct pipe_resource *base) +{ + struct r300_screen *rscreen = r300_screen(screen); + enum r300_buffer_tiling microtile, macrotile; /* Refuse to create a texture with size 0. */ if (!base->width0 || @@ -974,58 +759,70 @@ struct pipe_resource* r300_texture_create(struct pipe_screen* screen, fprintf(stderr, "r300: texture_create: " "Got invalid texture dimensions: %ix%ix%i\n", base->width0, base->height0, base->depth0); - FREE(tex); return NULL; } - tex->b.b = *base; - tex->b.vtbl = &r300_texture_vtbl; - pipe_reference_init(&tex->b.b.reference, 1); - tex->b.b.screen = screen; + if ((base->flags & R300_RESOURCE_FLAG_TRANSFER) || + (base->bind & PIPE_BIND_SCANOUT)) { + microtile = R300_BUFFER_LINEAR; + macrotile = R300_BUFFER_LINEAR; + } else { + microtile = R300_BUFFER_SELECT_LAYOUT; + macrotile = R300_BUFFER_SELECT_LAYOUT; + } + + return (struct pipe_resource*) + r300_texture_create_object(rscreen, base, microtile, macrotile, + 0, 0, NULL); +} + +struct pipe_resource *r300_texture_from_handle(struct pipe_screen *screen, + const struct pipe_resource *base, + struct winsys_handle *whandle) +{ + struct r300_winsys_screen *rws = (struct r300_winsys_screen*)screen->winsys; + struct r300_screen *rscreen = r300_screen(screen); + struct r300_winsys_buffer *buffer; + enum r300_buffer_tiling microtile, macrotile; + unsigned stride, size; - r300_setup_flags(tex); - if (!(base->flags & R300_RESOURCE_FLAG_TRANSFER) && - !(base->bind & PIPE_BIND_SCANOUT)) { - r300_setup_tiling(screen, tex); + /* Support only 2D textures without mipmaps */ + if (base->target != PIPE_TEXTURE_2D || + base->depth0 != 1 || + base->last_level != 0) { + return NULL; } - r300_setup_miptree(rscreen, tex); - r300_texture_3d_fix_mipmapping(rscreen, tex); - r300_texture_setup_immutable_state(rscreen, tex); - r300_texture_setup_fb_state(rscreen, tex); - SCREEN_DBG(rscreen, DBG_TEX, - "r300: texture_create: Macro: %s, Micro: %s, Pitch: %i, " - "Dim: %ix%ix%i, LastLevel: %i, Size: %i, Format: %s\n", - tex->macrotile ? "YES" : " NO", - tex->microtile ? "YES" : " NO", - tex->hwpitch[0], - base->width0, base->height0, base->depth0, base->last_level, - tex->size, - util_format_short_name(base->format)); + buffer = rws->buffer_from_handle(rws, whandle, &stride, &size); + if (!buffer) + return NULL; - tex->domain = base->flags & R300_RESOURCE_FLAG_TRANSFER ? - R300_DOMAIN_GTT : - R300_DOMAIN_VRAM | R300_DOMAIN_GTT; + rws->buffer_get_tiling(rws, buffer, µtile, ¯otile); - tex->buffer = rws->buffer_create(rws, tex->size, 2048, base->bind, - base->usage, tex->domain); + /* Enforce a microtiled zbuffer. */ + if (util_format_is_depth_or_stencil(base->format) && + microtile == R300_BUFFER_LINEAR) { + switch (util_format_get_blocksize(base->format)) { + case 4: + microtile = R300_BUFFER_TILED; + break; - if (!tex->buffer) { - FREE(tex); - return NULL; + case 2: + if (rws->get_value(rws, R300_VID_SQUARE_TILING_SUPPORT)) + microtile = R300_BUFFER_SQUARETILED; + break; + } } - rws->buffer_set_tiling(rws, tex->buffer, - tex->microtile, tex->macrotile, - tex->pitch[0] * util_format_get_blocksize(tex->b.b.format)); - - return (struct pipe_resource*)tex; + return (struct pipe_resource*) + r300_texture_create_object(rscreen, base, microtile, macrotile, + stride, size, buffer); } /* Not required to implement u_resource_vtbl, consider moving to another file: */ struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen, - struct pipe_resource* texture, + struct pipe_resource* texture, unsigned face, unsigned level, unsigned zslice, @@ -1035,7 +832,7 @@ struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen, struct r300_surface* surface = CALLOC_STRUCT(r300_surface); if (surface) { - uint32_t stride, offset, tile_height; + uint32_t offset, tile_height; pipe_reference_init(&surface->base.reference, 1); pipe_resource_reference(&surface->base.texture, texture); @@ -1054,23 +851,29 @@ struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen, if (surface->domain & R300_DOMAIN_VRAM) surface->domain &= ~R300_DOMAIN_GTT; - surface->offset = r300_texture_get_offset(tex, level, zslice, face); + surface->offset = r300_texture_get_offset(&tex->desc, + level, zslice, face); surface->pitch = tex->fb_state.pitch[level]; surface->format = tex->fb_state.format; /* Parameters for the CBZB clear. */ + surface->cbzb_allowed = tex->desc.cbzb_allowed[level]; surface->cbzb_width = align(surface->base.width, 64); /* Height must be aligned to the size of a tile. */ - tile_height = r300_get_pixel_alignment(tex, tex->mip_macrotile[level], + tile_height = r300_get_pixel_alignment(tex->desc.b.b.format, + tex->desc.b.b.nr_samples, + tex->desc.microtile, + tex->desc.macrotile[level], DIM_HEIGHT); + surface->cbzb_height = align((surface->base.height + 1) / 2, tile_height); /* Offset must be aligned to 2K and must point at the beginning * of a scanline. */ - stride = r300_texture_get_stride(r300_screen(screen), tex, level); - offset = surface->offset + stride * surface->cbzb_height; + offset = surface->offset + + tex->desc.stride_in_bytes[level] * surface->cbzb_height; surface->cbzb_midpoint_offset = offset & ~2047; surface->cbzb_pitch = surface->pitch & 0x1ffffc; @@ -1080,11 +883,11 @@ struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen, else surface->cbzb_format = R300_DEPTHFORMAT_16BIT_INT_Z; - SCREEN_DBG(r300_screen(screen), DBG_TEX, + SCREEN_DBG(r300_screen(screen), DBG_CBZB, "CBZB Dim: %ix%i, Misalignment: %i, Macro: %s\n", surface->cbzb_width, surface->cbzb_height, offset & 2047, - tex->mip_macrotile[level] ? "YES" : " NO"); + tex->desc.macrotile[level] ? "YES" : " NO"); } return &surface->base; @@ -1097,88 +900,3 @@ void r300_tex_surface_destroy(struct pipe_surface* s) pipe_resource_reference(&s->texture, NULL); FREE(s); } - -struct pipe_resource* -r300_texture_from_handle(struct pipe_screen* screen, - const struct pipe_resource* base, - struct winsys_handle *whandle) -{ - struct r300_winsys_screen *rws = (struct r300_winsys_screen*)screen->winsys; - struct r300_screen* rscreen = r300_screen(screen); - struct r300_winsys_buffer *buffer; - struct r300_texture* tex; - unsigned stride; - boolean override_zb_flags; - - /* Support only 2D textures without mipmaps */ - if (base->target != PIPE_TEXTURE_2D || - base->depth0 != 1 || - base->last_level != 0) { - return NULL; - } - - buffer = rws->buffer_from_handle(rws, whandle, &stride); - if (!buffer) { - return NULL; - } - - tex = CALLOC_STRUCT(r300_texture); - if (!tex) { - return NULL; - } - - tex->b.b = *base; - tex->b.vtbl = &r300_texture_vtbl; - pipe_reference_init(&tex->b.b.reference, 1); - tex->b.b.screen = screen; - tex->domain = R300_DOMAIN_VRAM; - - tex->stride_override = stride; - - /* one ref already taken */ - tex->buffer = buffer; - - rws->buffer_get_tiling(rws, buffer, &tex->microtile, &tex->macrotile); - r300_setup_flags(tex); - SCREEN_DBG(rscreen, DBG_TEX, - "r300: texture_from_handle: Macro: %s, Micro: %s, " - "Pitch: % 4i, Dim: %ix%i, Format: %s\n", - tex->macrotile ? "YES" : " NO", - tex->microtile ? "YES" : " NO", - stride / util_format_get_blocksize(base->format), - base->width0, base->height0, - util_format_short_name(base->format)); - - /* Enforce microtiled zbuffer. */ - override_zb_flags = util_format_is_depth_or_stencil(base->format) && - tex->microtile == R300_BUFFER_LINEAR; - - if (override_zb_flags) { - switch (util_format_get_blocksize(base->format)) { - case 4: - tex->microtile = R300_BUFFER_TILED; - break; - - case 2: - if (rws->get_value(rws, R300_VID_SQUARE_TILING_SUPPORT)) { - tex->microtile = R300_BUFFER_SQUARETILED; - break; - } - /* Pass through. */ - - default: - override_zb_flags = FALSE; - } - } - - r300_setup_miptree(rscreen, tex); - r300_texture_setup_immutable_state(rscreen, tex); - r300_texture_setup_fb_state(rscreen, tex); - - if (override_zb_flags) { - rws->buffer_set_tiling(rws, tex->buffer, - tex->microtile, tex->macrotile, - tex->pitch[0] * util_format_get_blocksize(tex->b.b.format)); - } - return (struct pipe_resource*)tex; -} diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index 99e7694254..a4524320fd 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -35,16 +35,11 @@ unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format, const unsigned char *swizzle_view); uint32_t r300_translate_texformat(enum pipe_format format, - const unsigned char *swizzle_view); + const unsigned char *swizzle_view, + boolean is_r500); uint32_t r500_tx_format_msb_bit(enum pipe_format format); -unsigned r300_texture_get_stride(struct r300_screen* screen, - struct r300_texture* tex, unsigned level); - -unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level, - unsigned zslice, unsigned face); - void r300_texture_reinterpret_format(struct pipe_screen *screen, struct pipe_resource *tex, enum pipe_format new_format); diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c new file mode 100644 index 0000000000..5d690e8c33 --- /dev/null +++ b/src/gallium/drivers/r300/r300_texture_desc.c @@ -0,0 +1,479 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * Copyright 2010 Marek Olšák <maraeo@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "r300_texture_desc.h" + +#include "r300_context.h" +#include "r300_winsys.h" + +#include "util/u_format.h" + +/* Returns the number of pixels that the texture should be aligned to + * in the given dimension. */ +unsigned r300_get_pixel_alignment(enum pipe_format format, + unsigned num_samples, + enum r300_buffer_tiling microtile, + enum r300_buffer_tiling macrotile, + enum r300_dim dim) +{ + static const unsigned table[2][5][3][2] = + { + { + /* Macro: linear linear linear + Micro: linear tiled square-tiled */ + {{ 32, 1}, { 8, 4}, { 0, 0}}, /* 8 bits per pixel */ + {{ 16, 1}, { 8, 2}, { 4, 4}}, /* 16 bits per pixel */ + {{ 8, 1}, { 4, 2}, { 0, 0}}, /* 32 bits per pixel */ + {{ 4, 1}, { 0, 0}, { 2, 2}}, /* 64 bits per pixel */ + {{ 2, 1}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */ + }, + { + /* Macro: tiled tiled tiled + Micro: linear tiled square-tiled */ + {{256, 8}, {64, 32}, { 0, 0}}, /* 8 bits per pixel */ + {{128, 8}, {64, 16}, {32, 32}}, /* 16 bits per pixel */ + {{ 64, 8}, {32, 16}, { 0, 0}}, /* 32 bits per pixel */ + {{ 32, 8}, { 0, 0}, {16, 16}}, /* 64 bits per pixel */ + {{ 16, 8}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */ + } + }; + static const unsigned aa_block[2] = {4, 8}; + unsigned tile = 0; + unsigned pixsize = util_format_get_blocksize(format); + + assert(macrotile <= R300_BUFFER_TILED); + assert(microtile <= R300_BUFFER_SQUARETILED); + assert(pixsize <= 16); + assert(dim <= DIM_HEIGHT); + + if (num_samples > 1) { + /* Multisampled textures have their own alignment scheme. */ + if (pixsize == 4) + tile = aa_block[dim]; + /* XXX FP16 AA. */ + } else { + /* Standard alignment. */ + tile = table[macrotile][util_logbase2(pixsize)][microtile][dim]; + } + + assert(tile); + return tile; +} + +/* Return true if macrotiling should be enabled on the miplevel. */ +static boolean r300_texture_macro_switch(struct r300_texture_desc *desc, + unsigned level, + boolean rv350_mode, + enum r300_dim dim) +{ + unsigned tile, texdim; + + tile = r300_get_pixel_alignment(desc->b.b.format, desc->b.b.nr_samples, + desc->microtile, R300_BUFFER_TILED, dim); + if (dim == DIM_WIDTH) { + texdim = u_minify(desc->b.b.width0, level); + } else { + texdim = u_minify(desc->b.b.height0, level); + } + + /* See TX_FILTER1_n.MACRO_SWITCH. */ + if (rv350_mode) { + return texdim >= tile; + } else { + return texdim > tile; + } +} + +/** + * Return the stride, in bytes, of the texture image of the given texture + * at the given level. + */ +static unsigned r300_texture_get_stride(struct r300_screen *screen, + struct r300_texture_desc *desc, + unsigned level) +{ + unsigned tile_width, width, stride; + + if (desc->stride_in_bytes_override) + return desc->stride_in_bytes_override; + + /* Check the level. */ + if (level > desc->b.b.last_level) { + SCREEN_DBG(screen, DBG_TEX, "%s: level (%u) > last_level (%u)\n", + __FUNCTION__, level, desc->b.b.last_level); + return 0; + } + + width = u_minify(desc->b.b.width0, level); + + if (util_format_is_plain(desc->b.b.format)) { + tile_width = r300_get_pixel_alignment(desc->b.b.format, + desc->b.b.nr_samples, + desc->microtile, + desc->macrotile[level], + DIM_WIDTH); + width = align(width, tile_width); + + stride = util_format_get_stride(desc->b.b.format, width); + + /* Some IGPs need a minimum stride of 64 bytes, hmm... */ + if (!desc->macrotile[level] && + (screen->caps.family == CHIP_FAMILY_RS600 || + screen->caps.family == CHIP_FAMILY_RS690 || + screen->caps.family == CHIP_FAMILY_RS740)) { + unsigned min_stride; + + if (desc->microtile) { + unsigned tile_height = + r300_get_pixel_alignment(desc->b.b.format, + desc->b.b.nr_samples, + desc->microtile, + desc->macrotile[level], + DIM_HEIGHT); + + min_stride = 64 / tile_height; + } else { + min_stride = 64; + } + + return stride < min_stride ? min_stride : stride; + } + + /* The alignment to 32 bytes is sort of implied by the layout... */ + return stride; + } else { + return align(util_format_get_stride(desc->b.b.format, width), 32); + } +} + +static unsigned r300_texture_get_nblocksy(struct r300_texture_desc *desc, + unsigned level, + boolean *out_aligned_for_cbzb) +{ + unsigned height, tile_height; + + height = u_minify(desc->b.b.height0, level); + + if (util_format_is_plain(desc->b.b.format)) { + tile_height = r300_get_pixel_alignment(desc->b.b.format, + desc->b.b.nr_samples, + desc->microtile, + desc->macrotile[level], + DIM_HEIGHT); + height = align(height, tile_height); + + /* This is needed for the kernel checker, unfortunately. */ + if ((desc->b.b.target != PIPE_TEXTURE_1D && + desc->b.b.target != PIPE_TEXTURE_2D) || + desc->b.b.last_level != 0) { + height = util_next_power_of_two(height); + } + + /* See if the CBZB clear can be used on the buffer, + * taking the texture size into account. */ + if (out_aligned_for_cbzb) { + if (desc->macrotile[level]) { + /* When clearing, the layer (width*height) is horizontally split + * into two, and the upper and lower halves are cleared by the CB + * and ZB units, respectively. Therefore, the number of macrotiles + * in the Y direction must be even. */ + + /* Align the height so that there is an even number of macrotiles. + * Do so for 3 or more macrotiles in the Y direction. */ + if (level == 0 && desc->b.b.last_level == 0 && + (desc->b.b.target == PIPE_TEXTURE_1D || + desc->b.b.target == PIPE_TEXTURE_2D) && + height >= tile_height * 3) { + height = align(height, tile_height * 2); + } + + *out_aligned_for_cbzb = height % (tile_height * 2) == 0; + } else { + *out_aligned_for_cbzb = FALSE; + } + } + } + + return util_format_get_nblocksy(desc->b.b.format, height); +} + +static void r300_texture_3d_fix_mipmapping(struct r300_screen *screen, + struct r300_texture_desc *desc) +{ + /* The kernels <= 2.6.34-rc4 compute the size of mipmapped 3D textures + * incorrectly. This is a workaround to prevent CS from being rejected. */ + + unsigned i, size; + + if (!screen->rws->get_value(screen->rws, R300_VID_DRM_2_3_0) && + desc->b.b.target == PIPE_TEXTURE_3D && + desc->b.b.last_level > 0) { + size = 0; + + for (i = 0; i <= desc->b.b.last_level; i++) { + size += desc->stride_in_bytes[i] * + r300_texture_get_nblocksy(desc, i, FALSE); + } + + size *= desc->b.b.depth0; + desc->size_in_bytes = size; + } +} + +/* Get a width in pixels from a stride in bytes. */ +static unsigned stride_to_width(enum pipe_format format, + unsigned stride_in_bytes) +{ + return (stride_in_bytes / util_format_get_blocksize(format)) * + util_format_get_blockwidth(format); +} + +static void r300_setup_miptree(struct r300_screen *screen, + struct r300_texture_desc *desc, + boolean align_for_cbzb) +{ + struct pipe_resource *base = &desc->b.b; + unsigned stride, size, layer_size, nblocksy, i; + boolean rv350_mode = screen->caps.is_rv350; + boolean aligned_for_cbzb; + + desc->size_in_bytes = 0; + + SCREEN_DBG(screen, DBG_TEXALLOC, + "r300: Making miptree for texture, format %s\n", + util_format_short_name(base->format)); + + for (i = 0; i <= base->last_level; i++) { + /* Let's see if this miplevel can be macrotiled. */ + desc->macrotile[i] = + (desc->macrotile[0] == R300_BUFFER_TILED && + r300_texture_macro_switch(desc, i, rv350_mode, DIM_WIDTH) && + r300_texture_macro_switch(desc, i, rv350_mode, DIM_HEIGHT)) ? + R300_BUFFER_TILED : R300_BUFFER_LINEAR; + + stride = r300_texture_get_stride(screen, desc, i); + + /* Compute the number of blocks in Y, see if the CBZB clear can be + * used on the texture. */ + aligned_for_cbzb = FALSE; + if (align_for_cbzb && desc->cbzb_allowed[i]) + nblocksy = r300_texture_get_nblocksy(desc, i, &aligned_for_cbzb); + else + nblocksy = r300_texture_get_nblocksy(desc, i, NULL); + + layer_size = stride * nblocksy; + + if (base->nr_samples) { + layer_size *= base->nr_samples; + } + + if (base->target == PIPE_TEXTURE_CUBE) + size = layer_size * 6; + else + size = layer_size * u_minify(base->depth0, i); + + desc->offset_in_bytes[i] = desc->size_in_bytes; + desc->size_in_bytes = desc->offset_in_bytes[i] + size; + desc->layer_size_in_bytes[i] = layer_size; + desc->stride_in_bytes[i] = stride; + desc->stride_in_pixels[i] = stride_to_width(desc->b.b.format, stride); + desc->cbzb_allowed[i] = desc->cbzb_allowed[i] && aligned_for_cbzb; + + SCREEN_DBG(screen, DBG_TEXALLOC, "r300: Texture miptree: Level %d " + "(%dx%dx%d px, pitch %d bytes) %d bytes total, macrotiled %s\n", + i, u_minify(base->width0, i), u_minify(base->height0, i), + u_minify(base->depth0, i), stride, desc->size_in_bytes, + desc->macrotile[i] ? "TRUE" : "FALSE"); + } +} + +static void r300_setup_flags(struct r300_texture_desc *desc) +{ + desc->uses_stride_addressing = + !util_is_power_of_two(desc->b.b.width0) || + !util_is_power_of_two(desc->b.b.height0) || + (desc->stride_in_bytes_override && + stride_to_width(desc->b.b.format, + desc->stride_in_bytes_override) != desc->b.b.width0); + + desc->is_npot = + desc->uses_stride_addressing || + !util_is_power_of_two(desc->b.b.height0); +} + +static void r300_setup_cbzb_flags(struct r300_screen *rscreen, + struct r300_texture_desc *desc) +{ + unsigned i, bpp; + boolean first_level_valid; + + bpp = util_format_get_blocksizebits(desc->b.b.format); + + /* 1) The texture must be point-sampled, + * 2) The depth must be 16 or 32 bits. + * 3) If the midpoint ZB offset is not aligned to 2048, it returns garbage + * with certain texture sizes. Macrotiling ensures the alignment. */ + first_level_valid = desc->b.b.nr_samples <= 1 && + (bpp == 16 || bpp == 32) && + desc->macrotile[0]; + + for (i = 0; i <= desc->b.b.last_level; i++) + desc->cbzb_allowed[i] = first_level_valid && desc->macrotile[i]; +} + +static void r300_setup_tiling(struct r300_screen *screen, + struct r300_texture_desc *desc) +{ + struct r300_winsys_screen *rws = screen->rws; + enum pipe_format format = desc->b.b.format; + boolean rv350_mode = screen->caps.is_rv350; + boolean is_zb = util_format_is_depth_or_stencil(format); + boolean dbg_no_tiling = SCREEN_DBG_ON(screen, DBG_NO_TILING); + + if (!util_format_is_plain(format)) { + return; + } + + /* If height == 1, disable microtiling except for zbuffer. */ + if (!is_zb && (desc->b.b.height0 == 1 || dbg_no_tiling)) { + return; + } + + /* Set microtiling. */ + switch (util_format_get_blocksize(format)) { + case 1: + case 4: + desc->microtile = R300_BUFFER_TILED; + break; + + case 2: + case 8: + if (rws->get_value(rws, R300_VID_SQUARE_TILING_SUPPORT)) { + desc->microtile = R300_BUFFER_SQUARETILED; + } + break; + } + + if (dbg_no_tiling) { + return; + } + + /* Set macrotiling. */ + if (r300_texture_macro_switch(desc, 0, rv350_mode, DIM_WIDTH) && + r300_texture_macro_switch(desc, 0, rv350_mode, DIM_HEIGHT)) { + desc->macrotile[0] = R300_BUFFER_TILED; + } +} + +static void r300_tex_print_info(struct r300_screen *rscreen, + struct r300_texture_desc *desc, + const char *func) +{ + fprintf(stderr, + "r300: %s: Macro: %s, Micro: %s, Pitch: %i, Dim: %ix%ix%i, " + "LastLevel: %i, Size: %i, Format: %s\n", + func, + desc->macrotile[0] ? "YES" : " NO", + desc->microtile ? "YES" : " NO", + desc->stride_in_pixels[0], + desc->b.b.width0, desc->b.b.height0, desc->b.b.depth0, + desc->b.b.last_level, desc->size_in_bytes, + util_format_short_name(desc->b.b.format)); +} + +boolean r300_texture_desc_init(struct r300_screen *rscreen, + struct r300_texture_desc *desc, + const struct pipe_resource *base, + enum r300_buffer_tiling microtile, + enum r300_buffer_tiling macrotile, + unsigned stride_in_bytes_override, + unsigned max_buffer_size) +{ + desc->b.b = *base; + desc->b.b.screen = &rscreen->screen; + + desc->stride_in_bytes_override = stride_in_bytes_override; + + if (microtile == R300_BUFFER_SELECT_LAYOUT || + macrotile == R300_BUFFER_SELECT_LAYOUT) { + r300_setup_tiling(rscreen, desc); + } else { + desc->microtile = microtile; + desc->macrotile[0] = macrotile; + assert(desc->b.b.last_level == 0); + } + + r300_setup_flags(desc); + r300_setup_cbzb_flags(rscreen, desc); + + /* Setup the miptree description. */ + r300_setup_miptree(rscreen, desc, TRUE); + /* If the required buffer size is larger the given max size, + * try again without the alignment for the CBZB clear. */ + if (max_buffer_size && desc->size_in_bytes > max_buffer_size) { + r300_setup_miptree(rscreen, desc, FALSE); + } + + r300_texture_3d_fix_mipmapping(rscreen, desc); + + if (max_buffer_size) { + /* Make sure the buffer we got is large enough. */ + if (desc->size_in_bytes > max_buffer_size) { + fprintf(stderr, "r300: texture_from_handle: The buffer is not " + "large enough. Got: %i, Need: %i, Info:\n", + max_buffer_size, desc->size_in_bytes); + r300_tex_print_info(rscreen, desc, "texture_from_handle"); + return FALSE; + } + + desc->buffer_size_in_bytes = max_buffer_size; + } else { + desc->buffer_size_in_bytes = desc->size_in_bytes; + } + + if (SCREEN_DBG_ON(rscreen, DBG_TEX)) + r300_tex_print_info(rscreen, desc, "texture_from_handle"); + + return TRUE; +} + +unsigned r300_texture_get_offset(struct r300_texture_desc *desc, + unsigned level, unsigned zslice, + unsigned face) +{ + unsigned offset = desc->offset_in_bytes[level]; + + switch (desc->b.b.target) { + case PIPE_TEXTURE_3D: + assert(face == 0); + return offset + zslice * desc->layer_size_in_bytes[level]; + + case PIPE_TEXTURE_CUBE: + assert(zslice == 0); + return offset + face * desc->layer_size_in_bytes[level]; + + default: + assert(zslice == 0 && face == 0); + return offset; + } +} diff --git a/src/gallium/drivers/r300/r300_texture_desc.h b/src/gallium/drivers/r300/r300_texture_desc.h new file mode 100644 index 0000000000..95de66f654 --- /dev/null +++ b/src/gallium/drivers/r300/r300_texture_desc.h @@ -0,0 +1,57 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * Copyright 2010 Marek Olšák <maraeo@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_TEXTURE_DESC_H +#define R300_TEXTURE_DESC_H + +#include "r300_defines.h" + +struct pipe_resource; +struct r300_screen; +struct r300_texture_desc; +struct r300_texture; + +enum r300_dim { + DIM_WIDTH = 0, + DIM_HEIGHT = 1 +}; + +unsigned r300_get_pixel_alignment(enum pipe_format format, + unsigned num_samples, + enum r300_buffer_tiling microtile, + enum r300_buffer_tiling macrotile, + enum r300_dim dim); + +boolean r300_texture_desc_init(struct r300_screen *rscreen, + struct r300_texture_desc *desc, + const struct pipe_resource *base, + enum r300_buffer_tiling microtile, + enum r300_buffer_tiling macrotile, + unsigned stride_in_bytes_override, + unsigned max_buffer_size); + +unsigned r300_texture_get_offset(struct r300_texture_desc *desc, + unsigned level, unsigned zslice, + unsigned face); + +#endif diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 51b2c55550..dd697b9c37 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -126,7 +126,7 @@ static unsigned translate_opcode(unsigned opcode) /* case TGSI_OPCODE_SAD: return RC_OPCODE_SAD; */ /* case TGSI_OPCODE_TXF: return RC_OPCODE_TXF; */ /* case TGSI_OPCODE_TXQ: return RC_OPCODE_TXQ; */ - /* case TGSI_OPCODE_CONT: return RC_OPCODE_CONT; */ + case TGSI_OPCODE_CONT: return RC_OPCODE_CONT; /* case TGSI_OPCODE_EMIT: return RC_OPCODE_EMIT; */ /* case TGSI_OPCODE_ENDPRIM: return RC_OPCODE_ENDPRIM; */ /* case TGSI_OPCODE_BGNLOOP2: return RC_OPCODE_BGNLOOP2; */ diff --git a/src/gallium/drivers/r300/r300_transfer.c b/src/gallium/drivers/r300/r300_transfer.c index 3cc4c8c958..e9333b35ef 100644 --- a/src/gallium/drivers/r300/r300_transfer.c +++ b/src/gallium/drivers/r300/r300_transfer.c @@ -22,7 +22,7 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "r300_transfer.h" -#include "r300_texture.h" +#include "r300_texture_desc.h" #include "r300_screen_buffer.h" #include "util/u_memory.h" @@ -35,8 +35,8 @@ struct r300_transfer { /* Offset from start of buffer. */ unsigned offset; - /* Detiled texture. */ - struct r300_texture *detiled_texture; + /* Linear texture. */ + struct r300_texture *linear_texture; }; /* Convenience cast wrapper. */ @@ -57,7 +57,7 @@ static void r300_copy_from_tiled_texture(struct pipe_context *ctx, subdst.face = 0; subdst.level = 0; - ctx->resource_copy_region(ctx, &r300transfer->detiled_texture->b.b, subdst, + ctx->resource_copy_region(ctx, &r300transfer->linear_texture->desc.b.b, subdst, 0, 0, 0, tex, transfer->sr, transfer->box.x, transfer->box.y, transfer->box.z, @@ -77,7 +77,7 @@ static void r300_copy_into_tiled_texture(struct pipe_context *ctx, ctx->resource_copy_region(ctx, tex, transfer->sr, transfer->box.x, transfer->box.y, transfer->box.z, - &r300transfer->detiled_texture->b.b, subsrc, + &r300transfer->linear_texture->desc.b.b, subsrc, 0, 0, 0, transfer->box.width, transfer->box.height); @@ -93,7 +93,6 @@ r300_texture_get_transfer(struct pipe_context *ctx, { struct r300_context *r300 = r300_context(ctx); struct r300_texture *tex = r300_texture(texture); - struct r300_screen *r300screen = r300_screen(ctx->screen); struct r300_transfer *trans; struct pipe_resource base; boolean referenced_cs, referenced_hw, blittable; @@ -124,7 +123,7 @@ r300_texture_get_transfer(struct pipe_context *ctx, /* If the texture is tiled, we must create a temporary detiled texture * for this transfer. * Also make write transfers pipelined. */ - if (tex->microtile || tex->macrotile || + if (tex->desc.microtile || tex->desc.macrotile[sr.level] || ((referenced_hw & !(usage & PIPE_TRANSFER_READ)) && blittable)) { base.target = PIPE_TEXTURE_2D; base.format = texture->format; @@ -149,23 +148,23 @@ r300_texture_get_transfer(struct pipe_context *ctx, } /* Create the temporary texture. */ - trans->detiled_texture = r300_texture( + trans->linear_texture = r300_texture( ctx->screen->resource_create(ctx->screen, &base)); - if (!trans->detiled_texture) { + if (!trans->linear_texture) { /* Oh crap, the thing can't create the texture. * Let's flush and try again. */ ctx->flush(ctx, 0, NULL); - trans->detiled_texture = r300_texture( + trans->linear_texture = r300_texture( ctx->screen->resource_create(ctx->screen, &base)); - if (!trans->detiled_texture) { + if (!trans->linear_texture) { /* For linear textures, it's safe to fallback to * an unpipelined transfer. */ - if (!tex->microtile && !tex->macrotile) { + if (!tex->desc.microtile && !tex->desc.macrotile[sr.level]) { goto unpipelined; } @@ -177,8 +176,8 @@ r300_texture_get_transfer(struct pipe_context *ctx, } } - assert(!trans->detiled_texture->microtile && - !trans->detiled_texture->macrotile); + assert(!trans->linear_texture->desc.microtile && + !trans->linear_texture->desc.macrotile[0]); /* Set the stride. * @@ -188,7 +187,7 @@ r300_texture_get_transfer(struct pipe_context *ctx, * right thing internally. */ trans->transfer.stride = - r300_texture_get_stride(r300screen, trans->detiled_texture, 0); + trans->linear_texture->desc.stride_in_bytes[0]; if (usage & PIPE_TRANSFER_READ) { /* We cannot map a tiled texture directly because the data is @@ -203,9 +202,9 @@ r300_texture_get_transfer(struct pipe_context *ctx, unpipelined: /* Unpipelined transfer. */ - trans->transfer.stride = - r300_texture_get_stride(r300screen, tex, sr.level); - trans->offset = r300_texture_get_offset(tex, sr.level, box->z, sr.face); + trans->transfer.stride = tex->desc.stride_in_bytes[sr.level]; + trans->offset = r300_texture_get_offset(&tex->desc, + sr.level, box->z, sr.face); if (referenced_cs) ctx->flush(ctx, PIPE_FLUSH_RENDER_CACHE, NULL); @@ -219,13 +218,13 @@ void r300_texture_transfer_destroy(struct pipe_context *ctx, { struct r300_transfer *r300transfer = r300_transfer(trans); - if (r300transfer->detiled_texture) { + if (r300transfer->linear_texture) { if (trans->usage & PIPE_TRANSFER_WRITE) { r300_copy_into_tiled_texture(ctx, r300transfer); } pipe_resource_reference( - (struct pipe_resource**)&r300transfer->detiled_texture, NULL); + (struct pipe_resource**)&r300transfer->linear_texture, NULL); } pipe_resource_reference(&trans->resource, NULL); FREE(trans); @@ -239,13 +238,13 @@ void* r300_texture_transfer_map(struct pipe_context *ctx, struct r300_transfer *r300transfer = r300_transfer(transfer); struct r300_texture *tex = r300_texture(transfer->resource); char *map; - enum pipe_format format = tex->b.b.format; + enum pipe_format format = tex->desc.b.b.format; - if (r300transfer->detiled_texture) { + if (r300transfer->linear_texture) { /* The detiled texture is of the same size as the region being mapped * (no offset needed). */ return rws->buffer_map(rws, - r300transfer->detiled_texture->buffer, + r300transfer->linear_texture->buffer, r300->cs, transfer->usage); } else { @@ -270,8 +269,8 @@ void r300_texture_transfer_unmap(struct pipe_context *ctx, struct r300_transfer *r300transfer = r300_transfer(transfer); struct r300_texture *tex = r300_texture(transfer->resource); - if (r300transfer->detiled_texture) { - rws->buffer_unmap(rws, r300transfer->detiled_texture->buffer); + if (r300transfer->linear_texture) { + rws->buffer_unmap(rws, r300transfer->linear_texture->buffer); } else { rws->buffer_unmap(rws, tex->buffer); } diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index b25c786d6b..54c8de1241 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -207,7 +207,7 @@ void r300_translate_vertex_shader(struct r300_context *r300, compiler.Base.max_temp_regs = 32; if (compiler.Base.Debug) { - debug_printf("r300: Initial vertex program\n"); + DBG(r300, DBG_VP, "r300: Initial vertex program\n"); tgsi_dump(vs->state.tokens, 0); } @@ -227,8 +227,7 @@ void r300_translate_vertex_shader(struct r300_context *r300, /* Invoke the compiler */ r3xx_compile_vertex_program(&compiler); if (compiler.Base.Error) { - /* XXX We should fallback using Draw. */ - fprintf(stderr, "r300 VP: Compiler error:\n%sUsing a dummy shader" + DBG(r300, DBG_VP, "r300 VP: Compiler error:\n%sUsing a dummy shader" " instead.\nIf there's an 'unknown opcode' message, please" " file a bug report and attach this log.\n", compiler.Base.ErrorMsg); diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index 7e115c2d62..187780750f 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -49,6 +49,8 @@ enum r300_value_id { R300_VID_Z_PIPES, R300_VID_SQUARE_TILING_SUPPORT, R300_VID_DRM_2_3_0, + R300_VID_DRM_2_6_0, + R300_CAN_HYPERZ, }; enum r300_reference_domain { /* bitfield */ @@ -184,12 +186,13 @@ struct r300_winsys_screen { * \param ws The winsys this function is called from. * \param whandle A winsys handle pointer as was received from a state * tracker. - * \param stride A pointer to the stride return variable. - * The stride is in bytes. + * \param stride The returned buffer stride in bytes. + * \param size The returned buffer size. */ struct r300_winsys_buffer *(*buffer_from_handle)(struct r300_winsys_screen *ws, struct winsys_handle *whandle, - unsigned *stride); + unsigned *stride, + unsigned *size); /** * Get a winsys handle from a winsys buffer. The internal structure |