diff options
Diffstat (limited to 'src/gallium/drivers/r300')
37 files changed, 2538 insertions, 2595 deletions
diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 6e886433bc..37b635fd12 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -22,7 +22,6 @@ #include "r300_context.h" #include "r300_emit.h" -#include "r300_hyperz.h" #include "r300_texture.h" #include "r300_winsys.h" @@ -55,11 +54,12 @@ static void r300_blitter_begin(struct r300_context* r300, enum r300_blitter_op o util_blitter_save_viewport(r300->blitter, &r300->viewport); util_blitter_save_clip(r300->blitter, (struct pipe_clip_state*)r300->clip_state.state); util_blitter_save_vertex_elements(r300->blitter, r300->velems); - util_blitter_save_vertex_buffers(r300->blitter, r300->vertex_buffer_count, - r300->vertex_buffer); + util_blitter_save_vertex_buffers(r300->blitter, r300->vbuf_mgr->nr_vertex_buffers, + r300->vbuf_mgr->vertex_buffer); - if (op & (R300_CLEAR_SURFACE | R300_COPY)) + if (op & (R300_CLEAR_SURFACE | R300_COPY)) { util_blitter_save_framebuffer(r300->blitter, r300->fb_state.state); + } if (op & R300_COPY) { struct r300_textures_state* state = @@ -108,6 +108,22 @@ static boolean r300_cbzb_clear_allowed(struct r300_context *r300, return r300_surface(fb->cbufs[0])->cbzb_allowed; } +static boolean r300_fast_zclear_allowed(struct r300_context *r300) +{ + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + + return r300_resource(fb->zsbuf->texture)->tex.zmask_dwords[fb->zsbuf->u.tex.level]; +} + +static boolean r300_hiz_clear_allowed(struct r300_context *r300) +{ + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + + return r300_resource(fb->zsbuf->texture)->tex.hiz_dwords[fb->zsbuf->u.tex.level]; +} + static uint32_t r300_depth_clear_value(enum pipe_format format, double depth, unsigned stencil) { @@ -125,6 +141,13 @@ static uint32_t r300_depth_clear_value(enum pipe_format format, } } +static uint32_t r300_hiz_clear_value(double depth) +{ + uint32_t r = (uint32_t)(CLAMP(depth, 0, 1) * 255.5); + assert(r <= 255); + return r | (r << 8) | (r << 16) | (r << 24); +} + /* Clear currently bound buffers. */ static void r300_clear(struct pipe_context* pipe, unsigned buffers, @@ -132,37 +155,46 @@ static void r300_clear(struct pipe_context* pipe, double depth, unsigned stencil) { - /* My notes about fastfill: + /* My notes about Zbuffer compression: * - * 1) Only the zbuffer is cleared. + * 1) The zbuffer must be micro-tiled and whole microtiles must be + * written if compression is enabled. If microtiling is disabled, + * it locks up. * - * 2) The zbuffer must be micro-tiled and whole microtiles must be - * written. If microtiling is disabled, it locks up. + * 2) There is ZMASK RAM which contains a compressed zbuffer. + * Each dword of the Z Mask contains compression information + * for 16 4x4 pixel tiles, that is 2 bits for each tile. + * On chips with 2 Z pipes, every other dword maps to a different + * pipe. On newer chipsets, there is a new compression mode + * with 8x8 pixel tiles per 2 bits. * - * 3) There is Z Mask RAM which contains a compressed zbuffer and - * it interacts with fastfill. We should figure out how to use it - * to get more performance. - * This is what we know about the Z Mask: + * 3) The FASTFILL bit has nothing to do with filling. It only tells hw + * it should look in the ZMASK RAM first before fetching from a real + * zbuffer. * - * Each dword of the Z Mask contains compression information - * for 16 4x4 pixel blocks, that is 2 bits for each block. - * On chips with 2 Z pipes, every other dword maps to a different - * pipe. + * 4) If a pixel is in a cleared state, ZB_DEPTHCLEARVALUE is returned + * during zbuffer reads instead of the value that is actually stored + * in the zbuffer memory. A pixel is in a cleared state when its ZMASK + * is equal to 0. Therefore, if you clear ZMASK with zeros, you may + * leave the zbuffer memory uninitialized, but then you must enable + * compression, so that the ZMASK RAM is actually used. * - * 4) ZB_DEPTHCLEARVALUE is used to clear the zbuffer and the Z Mask must - * be equal to 0. (clear the Z Mask RAM with zeros) + * 5) Each 4x4 (or 8x8) tile is automatically decompressed and recompressed + * during zbuffer updates. A special decompressing operation should be + * used to fully decompress a zbuffer, which basically just stores all + * compressed tiles in ZMASK to the zbuffer memory. * - * 5) For 16-bit zbuffer, compression causes a hung with one or + * 6) For a 16-bit zbuffer, compression causes a hung with one or * two samples and should not be used. * - * 6) FORCE_COMPRESSED_STENCIL_VALUE should be enabled for stencil clears + * 7) FORCE_COMPRESSED_STENCIL_VALUE should be enabled for stencil clears * to avoid needless decompression. * - * 7) Fastfill must not be used if reading of compressed Z data is disabled + * 8) Fastfill must not be used if reading of compressed Z data is disabled * and writing of compressed Z data is enabled (RD/WR_COMP_ENABLE), * i.e. it cannot be used to compress the zbuffer. * - * 8) ZB_CB_CLEAR does not interact with fastfill in any way. + * 9) ZB_CB_CLEAR does not interact with zbuffer compression in any way. * * - Marek */ @@ -172,8 +204,6 @@ static void r300_clear(struct pipe_context* pipe, (struct pipe_framebuffer_state*)r300->fb_state.state; struct r300_hyperz_state *hyperz = (struct r300_hyperz_state*)r300->hyperz_state.state; - struct r300_texture *zstex = - fb->zsbuf ? r300_texture(fb->zsbuf->texture) : NULL; uint32_t width = fb->width; uint32_t height = fb->height; boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); @@ -182,16 +212,18 @@ static void r300_clear(struct pipe_context* pipe, /* Enable fast Z clear. * The zbuffer must be in micro-tiled mode, otherwise it locks up. */ if ((buffers & PIPE_CLEAR_DEPTHSTENCIL) && can_hyperz) { - hyperz_dcv = hyperz->zb_depthclearvalue = - r300_depth_clear_value(fb->zsbuf->format, depth, stencil); + if (r300_fast_zclear_allowed(r300)) { + hyperz_dcv = hyperz->zb_depthclearvalue = + r300_depth_clear_value(fb->zsbuf->format, depth, stencil); - r300_mark_fb_state_dirty(r300, R300_CHANGED_ZCLEAR_FLAG); - if (zstex->zmask_mem[fb->zsbuf->u.tex.level]) { r300_mark_atom_dirty(r300, &r300->zmask_clear); buffers &= ~PIPE_CLEAR_DEPTHSTENCIL; } - if (zstex->hiz_mem[fb->zsbuf->u.tex.level]) + + if (r300_hiz_clear_allowed(r300)) { + r300->hiz_clear_value = r300_hiz_clear_value(depth); r300_mark_atom_dirty(r300, &r300->hiz_clear); + } } /* Enable CBZB clear. */ @@ -205,7 +237,7 @@ static void r300_clear(struct pipe_context* pipe, height = surf->cbzb_height; r300->cbzb_clear = TRUE; - r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG); + r300_mark_fb_state_dirty(r300, R300_CHANGED_HYPERZ_FLAG); } /* Clear. */ @@ -218,26 +250,28 @@ static void r300_clear(struct pipe_context* pipe, fb->nr_cbufs, buffers, rgba, depth, stencil); r300_blitter_end(r300); - } else if (r300->zmask_clear.dirty) { - /* Just clear zmask and hiz now, this does not use a standard draw + } else if (r300->zmask_clear.dirty || r300->hiz_clear.dirty) { + /* Just clear zmask and hiz now, this does not use the standard draw * procedure. */ unsigned dwords; /* Calculate zmask_clear and hiz_clear atom sizes. */ r300_update_hyperz_state(r300); - dwords = r300->zmask_clear.size + + dwords = (r300->zmask_clear.dirty ? r300->zmask_clear.size : 0) + (r300->hiz_clear.dirty ? r300->hiz_clear.size : 0) + r300_get_num_cs_end_dwords(r300); /* Reserve CS space. */ if (dwords > (R300_MAX_CMDBUF_DWORDS - r300->cs->cdw)) { - r300->context.flush(&r300->context, 0, NULL); + r300_flush(&r300->context, R300_FLUSH_ASYNC, NULL); } /* Emit clear packets. */ - r300_emit_zmask_clear(r300, r300->zmask_clear.size, - r300->zmask_clear.state); - r300->zmask_clear.dirty = FALSE; + if (r300->zmask_clear.dirty) { + r300_emit_zmask_clear(r300, r300->zmask_clear.size, + r300->zmask_clear.state); + r300->zmask_clear.dirty = FALSE; + } if (r300->hiz_clear.dirty) { r300_emit_hiz_clear(r300, r300->hiz_clear.size, r300->hiz_clear.state); @@ -251,16 +285,14 @@ static void r300_clear(struct pipe_context* pipe, if (r300->cbzb_clear) { r300->cbzb_clear = FALSE; hyperz->zb_depthclearvalue = hyperz_dcv; - r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG); + r300_mark_fb_state_dirty(r300, R300_CHANGED_HYPERZ_FLAG); } /* Enable fastfill and/or hiz. * * If we cleared zmask/hiz, it's in use now. The Hyper-Z state update - * looks if zmask/hiz is in use and enables fastfill accordingly. */ - if (zstex && - (zstex->zmask_in_use[fb->zsbuf->u.tex.level] || - zstex->hiz_in_use[fb->zsbuf->u.tex.level])) { + * looks if zmask/hiz is in use and programs hardware accordingly. */ + if (r300->zmask_in_use || r300->hiz_in_use) { r300_mark_atom_dirty(r300, &r300->hyperz_state); } } @@ -274,10 +306,16 @@ static void r300_clear_render_target(struct pipe_context *pipe, { struct r300_context *r300 = r300_context(pipe); + r300->hyperz_locked = TRUE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); + r300_blitter_begin(r300, R300_CLEAR_SURFACE); util_blitter_clear_render_target(r300->blitter, dst, rgba, dstx, dsty, width, height); r300_blitter_end(r300); + + r300->hyperz_locked = FALSE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); } /* Clear a region of a depth stencil surface. */ @@ -290,42 +328,70 @@ static void r300_clear_depth_stencil(struct pipe_context *pipe, unsigned width, unsigned height) { struct r300_context *r300 = r300_context(pipe); + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + + if (r300->zmask_in_use && !r300->hyperz_locked) { + if (fb->zsbuf->texture == dst->texture) { + r300_decompress_zmask(r300); + } else { + r300->hyperz_locked = TRUE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); + } + } r300_blitter_begin(r300, R300_CLEAR_SURFACE); util_blitter_clear_depth_stencil(r300->blitter, dst, clear_flags, depth, stencil, dstx, dsty, width, height); r300_blitter_end(r300); + + if (r300->hyperz_locked) { + r300->hyperz_locked = FALSE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); + } } -/* Flush a depth stencil buffer. */ -void r300_flush_depth_stencil(struct pipe_context *pipe, - struct pipe_resource *dst, - unsigned level, - unsigned layer) +void r300_decompress_zmask(struct r300_context *r300) { - struct r300_context *r300 = r300_context(pipe); - struct pipe_surface *dstsurf, surf_tmpl; - struct r300_texture *tex = r300_texture(dst); + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; - if (!tex->zmask_mem[level]) - return; - if (!tex->zmask_in_use[level]) + if (!r300->zmask_in_use || r300->hyperz_locked) return; - surf_tmpl.format = dst->format; - surf_tmpl.usage = PIPE_BIND_DEPTH_STENCIL; - surf_tmpl.u.tex.level = level; - surf_tmpl.u.tex.first_layer = layer; - surf_tmpl.u.tex.last_layer = layer; - dstsurf = pipe->create_surface(pipe, dst, &surf_tmpl); + r300->zmask_decompress = TRUE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); - r300->z_decomp_rd = TRUE; - r300_blitter_begin(r300, R300_CLEAR_SURFACE); - util_blitter_flush_depth_stencil(r300->blitter, dstsurf); + r300_blitter_begin(r300, R300_CLEAR); + util_blitter_clear_depth_custom(r300->blitter, fb->width, fb->height, 0, + r300->dsa_decompress_zmask); r300_blitter_end(r300); - r300->z_decomp_rd = FALSE; - tex->zmask_in_use[level] = FALSE; + r300->zmask_decompress = FALSE; + r300->zmask_in_use = FALSE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); +} + +void r300_decompress_zmask_locked_unsafe(struct r300_context *r300) +{ + struct pipe_framebuffer_state fb = {0}; + fb.width = r300->locked_zbuffer->width; + fb.height = r300->locked_zbuffer->height; + fb.nr_cbufs = 0; + fb.zsbuf = r300->locked_zbuffer; + + r300->context.set_framebuffer_state(&r300->context, &fb); + r300_decompress_zmask(r300); +} + +void r300_decompress_zmask_locked(struct r300_context *r300) +{ + struct pipe_framebuffer_state saved_fb = {0}; + + util_copy_framebuffer_state(&saved_fb, r300->fb_state.state); + r300_decompress_zmask_locked_unsafe(r300); + r300->context.set_framebuffer_state(&r300->context, &saved_fb); + util_unreference_framebuffer_state(&saved_fb); } /* Copy a block of pixels from one surface to another using HW. */ @@ -340,8 +406,6 @@ static void r300_hw_copy_region(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); r300_blitter_begin(r300, R300_COPY); - - /* Do a copy */ util_blitter_copy_region(r300->blitter, dst, dst_level, dstx, dsty, dstz, src, src_level, src_box, TRUE); r300_blitter_end(r300); @@ -356,54 +420,103 @@ static void r300_resource_copy_region(struct pipe_context *pipe, unsigned src_level, const struct pipe_box *src_box) { - enum pipe_format old_format = dst->format; - enum pipe_format new_format = old_format; - boolean is_depth; - if (!pipe->screen->is_format_supported(pipe->screen, - old_format, src->target, - src->nr_samples, - PIPE_BIND_RENDER_TARGET | - PIPE_BIND_SAMPLER_VIEW, 0) && - util_format_is_plain(old_format)) { - switch (util_format_get_blocksize(old_format)) { + struct r300_context *r300 = r300_context(pipe); + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + struct pipe_resource old_src = *src; + struct pipe_resource old_dst = *dst; + struct pipe_resource new_src = old_src; + struct pipe_resource new_dst = old_dst; + const struct util_format_description *desc = + util_format_description(dst->format); + struct pipe_box box; + + if (r300->zmask_in_use && !r300->hyperz_locked) { + if (fb->zsbuf->texture == src || + fb->zsbuf->texture == dst) { + r300_decompress_zmask(r300); + } else { + r300->hyperz_locked = TRUE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); + } + } + + /* Handle non-renderable plain formats. */ + if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN && + (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB || + !pipe->screen->is_format_supported(pipe->screen, + src->format, src->target, + src->nr_samples, + PIPE_BIND_SAMPLER_VIEW) || + !pipe->screen->is_format_supported(pipe->screen, + dst->format, dst->target, + dst->nr_samples, + PIPE_BIND_RENDER_TARGET))) { + switch (util_format_get_blocksize(old_dst.format)) { case 1: - new_format = PIPE_FORMAT_I8_UNORM; + new_dst.format = PIPE_FORMAT_I8_UNORM; break; case 2: - new_format = PIPE_FORMAT_B4G4R4A4_UNORM; + new_dst.format = PIPE_FORMAT_B4G4R4A4_UNORM; break; case 4: - new_format = PIPE_FORMAT_B8G8R8A8_UNORM; + new_dst.format = PIPE_FORMAT_B8G8R8A8_UNORM; break; case 8: - new_format = PIPE_FORMAT_R16G16B16A16_UNORM; + new_dst.format = PIPE_FORMAT_R16G16B16A16_UNORM; break; default: debug_printf("r300: surface_copy: Unhandled format: %s. Falling back to software.\n" "r300: surface_copy: Software fallback doesn't work for tiled textures.\n", - util_format_short_name(old_format)); + util_format_short_name(dst->format)); } + new_src.format = new_dst.format; } - is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0; - if (is_depth) { - r300_flush_depth_stencil(pipe, src, src_level, src_box->z); - } - if (old_format != new_format) { - r300_texture_reinterpret_format(pipe->screen, - dst, new_format); - r300_texture_reinterpret_format(pipe->screen, - src, new_format); + /* Handle compressed formats. */ + if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC || + desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { + switch (util_format_get_blocksize(old_dst.format)) { + case 8: + /* 1 pixel = 4 bits, + * we set 1 pixel = 2 bytes ===> 4 times larger pixels. */ + new_dst.format = PIPE_FORMAT_B4G4R4A4_UNORM; + break; + case 16: + /* 1 pixel = 8 bits, + * we set 1 pixel = 4 bytes ===> 4 times larger pixels. */ + new_dst.format = PIPE_FORMAT_B8G8R8A8_UNORM; + break; + } + + /* Since the pixels are 4 times larger, we must decrease + * the image size and the coordinates 4 times. */ + new_src.format = new_dst.format; + new_dst.height0 = (new_dst.height0 + 3) / 4; + new_src.height0 = (new_src.height0 + 3) / 4; + dsty /= 4; + box = *src_box; + box.y /= 4; + box.height = (box.height + 3) / 4; + src_box = &box; } + if (old_src.format != new_src.format) + r300_resource_set_properties(pipe->screen, src, 0, &new_src); + if (old_dst.format != new_dst.format) + r300_resource_set_properties(pipe->screen, dst, 0, &new_dst); + r300_hw_copy_region(pipe, dst, dst_level, dstx, dsty, dstz, src, src_level, src_box); - if (old_format != new_format) { - r300_texture_reinterpret_format(pipe->screen, - dst, old_format); - r300_texture_reinterpret_format(pipe->screen, - src, old_format); + if (old_src.format != new_src.format) + r300_resource_set_properties(pipe->screen, src, 0, &old_src); + if (old_dst.format != new_dst.format) + r300_resource_set_properties(pipe->screen, dst, 0, &old_dst); + + if (r300->hyperz_locked) { + r300->hyperz_locked = FALSE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); } } diff --git a/src/gallium/drivers/r300/r300_cb.h b/src/gallium/drivers/r300/r300_cb.h index 9d3d4fc1b1..b373937a1f 100644 --- a/src/gallium/drivers/r300/r300_cb.h +++ b/src/gallium/drivers/r300/r300_cb.h @@ -61,40 +61,52 @@ * that they neatly hide away, and don't have the cost of function setup, so * we're going to use them. */ -#ifdef DEBUG -#define CB_DEBUG(x) x -#else -#define CB_DEBUG(x) -#endif - - /** * Command buffer setup. */ +#ifdef DEBUG + #define CB_LOCALS \ - CB_DEBUG(int cs_count = 0;) \ + int cs_count = 0; \ uint32_t *cs_ptr = NULL; \ - CB_DEBUG((void) cs_count;) (void) cs_ptr; + (void) cs_count; (void) cs_ptr -#define NEW_CB(ptr, size) do { \ - assert(sizeof(*ptr) == sizeof(uint32_t)); \ - cs_ptr = (ptr) = (uint32_t*)malloc((size) * sizeof(uint32_t)); \ - CB_DEBUG(cs_count = size;) \ +#define BEGIN_CB(ptr, size) do { \ + assert(sizeof(*(ptr)) == sizeof(uint32_t)); \ + cs_count = (size); \ + cs_ptr = (ptr); \ } while (0) -#define BEGIN_CB(ptr, size) do { \ - assert(sizeof(*ptr) == sizeof(uint32_t)); \ - cs_ptr = ptr; \ - CB_DEBUG(cs_count = size;) \ +#define NEW_CB(ptr, size) \ + do { \ + assert(sizeof(*(ptr)) == sizeof(uint32_t)); \ + cs_count = (size); \ + cs_ptr = (ptr) = (uint32_t*)malloc((size) * sizeof(uint32_t)); \ } while (0) #define END_CB do { \ - CB_DEBUG(if (cs_count != 0) \ + if (cs_count != 0) \ debug_printf("r300: Warning: cs_count off by %d at (%s, %s:%i)\n", \ - cs_count, __FUNCTION__, __FILE__, __LINE__);) \ + cs_count, __FUNCTION__, __FILE__, __LINE__); \ } while (0) +#define CB_USED_DW(x) cs_count -= x + +#else + +#define CB_LOCALS \ + uint32_t *cs_ptr = NULL; (void) cs_ptr + +#define NEW_CB(ptr, size) \ + cs_ptr = (ptr) = (uint32_t*)malloc((size) * sizeof(uint32_t)) + +#define BEGIN_CB(ptr, size) cs_ptr = (ptr) +#define END_CB +#define CB_USED_DW(x) + +#endif + /** * Storing pure DWORDs. @@ -103,13 +115,13 @@ #define OUT_CB(value) do { \ *cs_ptr = (value); \ cs_ptr++; \ - CB_DEBUG(cs_count--;) \ + CB_USED_DW(1); \ } while (0) #define OUT_CB_TABLE(values, count) do { \ memcpy(cs_ptr, values, count * sizeof(uint32_t)); \ cs_ptr += count; \ - CB_DEBUG(cs_count -= count;) \ + CB_USED_DW(count); \ } while (0) #define OUT_CB_32F(value) \ diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c index 583e981a4d..990acea9f4 100644 --- a/src/gallium/drivers/r300/r300_chipset.c +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -241,7 +241,6 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5A42: caps->family = CHIP_FAMILY_RS400; caps->has_tcl = FALSE; - caps->zmask_ram = RV3xx_ZMASK_SIZE; break; case 0x5A61: @@ -256,8 +255,6 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RS690; caps->has_tcl = FALSE; caps->is_r400 = TRUE; - caps->hiz_ram = R300_HIZ_LIMIT; - caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x793F: @@ -266,8 +263,6 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RS600; caps->has_tcl = FALSE; caps->is_r400 = TRUE; - caps->hiz_ram = R300_HIZ_LIMIT; - caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x796C: @@ -277,8 +272,6 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RS740; caps->has_tcl = FALSE; caps->is_r400 = TRUE; - caps->hiz_ram = R300_HIZ_LIMIT; - caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x7100: @@ -366,7 +359,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RV530; caps->num_vert_fpus = 5; caps->is_r500 = TRUE; - /*caps->hiz_ram = RV530_HIZ_LIMIT;*/ + caps->hiz_ram = RV530_HIZ_LIMIT; caps->zmask_ram = PIPE_ZMASK_SIZE; break; @@ -424,5 +417,6 @@ void r300_parse_chipset(struct r300_capabilities* caps) } caps->is_rv350 = caps->family >= CHIP_FAMILY_RV350; + caps->z_compress = caps->is_rv350 ? R300_ZCOMP_8X8 : R300_ZCOMP_4X4; caps->dxtc_swizzle = caps->is_r400 || caps->is_r500; } diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h index f2035d2009..68943d561b 100644 --- a/src/gallium/drivers/r300/r300_chipset.h +++ b/src/gallium/drivers/r300/r300_chipset.h @@ -33,6 +33,13 @@ #define PIPE_ZMASK_SIZE 4096 #define RV3xx_ZMASK_SIZE 5120 +/* The size of a compressed tile. Each compressed tile takes 2 bits + * in the ZMASK RAM, so there is always 16 tiles per one dword. */ +enum r300_zmask_compression { + R300_ZCOMP_4X4 = 4, + R300_ZCOMP_8X8 = 8 +}; + /* Structure containing all the possible information about a specific Radeon * in the R3xx, R4xx, and R5xx families. */ struct r300_capabilities { @@ -50,10 +57,12 @@ struct r300_capabilities { unsigned num_tex_units; /* Whether or not TCL is physically present */ boolean has_tcl; - /* Some chipsets do not have HiZ RAM - other have varying amounts . */ + /* Some chipsets do not have HiZ RAM - other have varying amounts. */ int hiz_ram; - /* some chipsets have zmask ram per pipe some don't */ + /* Some chipsets have zmask ram per pipe some don't. */ int zmask_ram; + /* Compression mode for ZMASK. */ + enum r300_zmask_compression z_compress; /* Whether or not this is RV350 or newer, including all r400 and r500 * chipsets. The differences compared to the oldest r300 chips are: * - Blend LTE/GTE thresholds @@ -81,8 +90,6 @@ struct r300_capabilities { boolean high_second_pipe; /* DXTC texture swizzling. */ boolean dxtc_swizzle; - /* Index bias (AKA index offset). */ - boolean index_bias_supported; }; /* Enumerations for legibility and telling which card we're running on. */ diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 67b011a145..720d666d98 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -30,29 +30,28 @@ #include "r300_cb.h" #include "r300_context.h" #include "r300_emit.h" -#include "r300_hyperz.h" #include "r300_screen.h" #include "r300_screen_buffer.h" #include "r300_winsys.h" -#include <inttypes.h> - static void r300_update_num_contexts(struct r300_screen *r300screen, int diff) { + pipe_mutex_lock(r300screen->num_contexts_mutex); if (diff > 0) { - p_atomic_inc(&r300screen->num_contexts); + r300screen->num_contexts++; if (r300screen->num_contexts > 1) util_slab_set_thread_safety(&r300screen->pool_buffers, UTIL_SLAB_MULTITHREADED); } else { - p_atomic_dec(&r300screen->num_contexts); + r300screen->num_contexts--; if (r300screen->num_contexts <= 1) util_slab_set_thread_safety(&r300screen->pool_buffers, UTIL_SLAB_SINGLETHREADED); } + pipe_mutex_unlock(r300screen->num_contexts_mutex); } static void r300_release_referenced_objects(struct r300_context *r300) @@ -79,22 +78,18 @@ static void r300_release_referenced_objects(struct r300_context *r300) NULL); } - /* The dummy VBO. */ + /* Manually-created vertex buffers. */ pipe_resource_reference(&r300->dummy_vb, NULL); - - /* The SWTCL VBO. */ pipe_resource_reference(&r300->vbo, NULL); - /* Vertex buffers. */ - for (i = 0; i < r300->vertex_buffer_count; i++) { - pipe_resource_reference(&r300->vertex_buffer[i].buffer, NULL); - } - /* If there are any queries pending or not destroyed, remove them now. */ foreach_s(query, temp, &r300->query_list) { remove_from_list(query); FREE(query); } + + r300->context.delete_depth_stencil_alpha_state(&r300->context, + r300->dsa_decompress_zmask); } static void r300_destroy_context(struct pipe_context* context) @@ -106,20 +101,12 @@ static void r300_destroy_context(struct pipe_context* context) if (r300->draw) draw_destroy(r300->draw); - if (r300->upload_vb) - u_upload_destroy(r300->upload_vb); - if (r300->upload_ib) - u_upload_destroy(r300->upload_ib); - - if (r300->tran.translate_cache) - translate_cache_destroy(r300->tran.translate_cache); + if (r300->vbuf_mgr) + u_vbuf_mgr_destroy(r300->vbuf_mgr); /* XXX: This function assumes r300->query_list was initialized */ r300_release_referenced_objects(r300); - if (r300->zmask_mm) - r300_hyperz_destroy_mm(r300); - if (r300->cs) r300->rws->cs_destroy(r300->cs); @@ -152,11 +139,11 @@ static void r300_destroy_context(struct pipe_context* context) FREE(r300); } -void r300_flush_cb(void *data) +static void r300_flush_callback(void *data, unsigned flags) { struct r300_context* const cs_context_copy = data; - cs_context_copy->context.flush(&cs_context_copy->context, 0, NULL); + r300_flush(&cs_context_copy->context, flags, NULL); } #define R300_INIT_ATOM(atomname, atomsize) \ @@ -180,7 +167,6 @@ static boolean r300_setup_atoms(struct r300_context* r300) boolean is_rv350 = r300->screen->caps.is_rv350; boolean is_r500 = r300->screen->caps.is_r500; boolean has_tcl = r300->screen->caps.has_tcl; - boolean drm_2_3_0 = r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0); boolean drm_2_6_0 = r300->rws->get_value(r300->rws, R300_VID_DRM_2_6_0); boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); boolean has_hiz_ram = r300->screen->caps.hiz_ram > 0; @@ -206,18 +192,18 @@ static boolean r300_setup_atoms(struct r300_context* r300) /* ZB (unpipelined), SC. */ R300_INIT_ATOM(ztop_state, 2); /* ZB, FG. */ - R300_INIT_ATOM(dsa_state, is_r500 ? 8 : 6); + R300_INIT_ATOM(dsa_state, is_r500 ? (drm_2_6_0 ? 10 : 8) : 6); /* RB3D. */ R300_INIT_ATOM(blend_state, 8); R300_INIT_ATOM(blend_color_state, is_r500 ? 3 : 2); /* SC. */ R300_INIT_ATOM(scissor_state, 3); /* GB, FG, GA, SU, SC, RB3D. */ - R300_INIT_ATOM(invariant_state, 16 + (is_rv350 ? 4 : 0)); + R300_INIT_ATOM(invariant_state, 16 + (is_rv350 ? 4 : 0) + (is_r500 ? 4 : 0)); /* VAP. */ R300_INIT_ATOM(viewport_state, 9); R300_INIT_ATOM(pvs_flush, 2); - R300_INIT_ATOM(vap_invariant_state, 9); + R300_INIT_ATOM(vap_invariant_state, is_r500 ? 11 : 9); R300_INIT_ATOM(vertex_stream_state, 0); R300_INIT_ATOM(vs_state, 0); R300_INIT_ATOM(vs_constants, 0); @@ -226,7 +212,7 @@ static boolean r300_setup_atoms(struct r300_context* r300) R300_INIT_ATOM(rs_block_state, 0); R300_INIT_ATOM(rs_state, 0); /* SC, US. */ - R300_INIT_ATOM(fb_state_pipelined, 5 + (drm_2_3_0 ? 3 : 0)); + R300_INIT_ATOM(fb_state_pipelined, 8); /* US. */ R300_INIT_ATOM(fs, 0); R300_INIT_ATOM(fs_rc_constant_state, 0); @@ -237,9 +223,9 @@ static boolean r300_setup_atoms(struct r300_context* r300) if (can_hyperz) { /* HiZ Clear */ if (has_hiz_ram) - R300_INIT_ATOM(hiz_clear, 0); + R300_INIT_ATOM(hiz_clear, 4); /* zmask clear */ - R300_INIT_ATOM(zmask_clear, 0); + R300_INIT_ATOM(zmask_clear, 4); } /* ZB (unpipelined), SU. */ R300_INIT_ATOM(query_start, 4); @@ -341,7 +327,7 @@ static void r300_init_states(struct pipe_context *pipe) /* Initialize the VAP invariant state. */ { - BEGIN_CB(vap_invariant->cb, 9); + BEGIN_CB(vap_invariant->cb, r300->vap_invariant_state.size); OUT_CB_REG(VAP_PVS_VTX_TIMEOUT_REG, 0xffff); OUT_CB_REG_SEQ(R300_VAP_GB_VERT_CLIP_ADJ, 4); OUT_CB_32F(1.0); @@ -349,6 +335,10 @@ static void r300_init_states(struct pipe_context *pipe) OUT_CB_32F(1.0); OUT_CB_32F(1.0); OUT_CB_REG(R300_VAP_PSC_SGN_NORM_CNTL, R300_SGN_NORM_NO_ZERO); + + if (r300->screen->caps.is_r500) { + OUT_CB_REG(R500_VAP_TEX_TO_COLOR_CNTL, 0); + } END_CB; } @@ -357,17 +347,22 @@ static void r300_init_states(struct pipe_context *pipe) BEGIN_CB(invariant->cb, r300->invariant_state.size); OUT_CB_REG(R300_GB_SELECT, 0); OUT_CB_REG(R300_FG_FOG_BLEND, 0); - OUT_CB_REG(R300_GA_ROUND_MODE, 1); OUT_CB_REG(R300_GA_OFFSET, 0); OUT_CB_REG(R300_SU_TEX_WRAP, 0); OUT_CB_REG(R300_SU_DEPTH_SCALE, 0x4B7FFFFF); OUT_CB_REG(R300_SU_DEPTH_OFFSET, 0); OUT_CB_REG(R300_SC_EDGERULE, 0x2DA49525); + OUT_CB_REG(R300_SC_SCREENDOOR, 0xffffff); if (r300->screen->caps.is_rv350) { OUT_CB_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x01010101); OUT_CB_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFEFEFEFE); } + + if (r300->screen->caps.is_r500) { + OUT_CB_REG(R500_GA_COLOR_CONTROL_PS3, 0); + OUT_CB_REG(R500_SU_TEX_WRAP_PS3, 0); + } END_CB; } @@ -443,39 +438,23 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300_init_state_functions(r300); r300_init_resource_functions(r300); + r300->vbuf_mgr = u_vbuf_mgr_create(&r300->context, 1024 * 1024, 16, + PIPE_BIND_VERTEX_BUFFER | + PIPE_BIND_INDEX_BUFFER, + U_VERTEX_FETCH_DWORD_ALIGNED); + if (!r300->vbuf_mgr) + goto fail; + r300->blitter = util_blitter_create(&r300->context); if (r300->blitter == NULL) goto fail; /* Render functions must be initialized after blitter. */ r300_init_render_functions(r300); - - rws->cs_set_flush(r300->cs, r300_flush_cb, r300); - - /* setup hyper-z mm */ - if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) - if (!r300_hyperz_init_mm(r300)) - goto fail; - - r300->upload_ib = u_upload_create(&r300->context, - 32 * 1024, 16, - PIPE_BIND_INDEX_BUFFER); - - if (r300->upload_ib == NULL) - goto fail; - - r300->upload_vb = u_upload_create(&r300->context, - 128 * 1024, 16, - PIPE_BIND_VERTEX_BUFFER); - if (r300->upload_vb == NULL) - goto fail; - - r300->tran.translate_cache = translate_cache_create(); - if (r300->tran.translate_cache == NULL) - goto fail; - r300_init_states(&r300->context); + rws->cs_set_flush(r300->cs, r300_flush_callback, r300); + /* The KIL opcode needs the first texture unit to be enabled * on r3xx-r4xx. In order to calm down the CS checker, we bind this * dummy texture there. */ @@ -502,7 +481,8 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, } { - struct pipe_resource vb = {}; + struct pipe_resource vb; + memset(&vb, 0, sizeof(vb)); vb.target = PIPE_BUFFER; vb.format = PIPE_FORMAT_R8_UNORM; vb.bind = PIPE_BIND_VERTEX_BUFFER; @@ -514,36 +494,45 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->dummy_vb = screen->resource_create(screen, &vb); } + { + struct pipe_depth_stencil_alpha_state dsa; + memset(&dsa, 0, sizeof(dsa)); + dsa.depth.writemask = 1; + + r300->dsa_decompress_zmask = + r300->context.create_depth_stencil_alpha_state(&r300->context, + &dsa); + } + + /* Print driver info. */ +#ifdef DEBUG + { +#else + if (DBG_ON(r300, DBG_INFO)) { +#endif + fprintf(stderr, + "r300: DRM version: %d.%d.%d, Name: %s, ID: 0x%04x, GB: %d, Z: %d\n" + "r300: GART size: %d MB, VRAM size: %d MB\n" + "r300: AA compression: %s, Z compression: %s, HiZ: %s\n", + rws->get_value(rws, R300_VID_DRM_MAJOR), + rws->get_value(rws, R300_VID_DRM_MINOR), + rws->get_value(rws, R300_VID_DRM_PATCHLEVEL), + screen->get_name(screen), + rws->get_value(rws, R300_VID_PCI_ID), + rws->get_value(rws, R300_VID_GB_PIPES), + rws->get_value(rws, R300_VID_Z_PIPES), + rws->get_value(rws, R300_VID_GART_SIZE) >> 20, + rws->get_value(rws, R300_VID_VRAM_SIZE) >> 20, + rws->get_value(rws, R300_CAN_AACOMPRESS) ? "YES" : "NO", + rws->get_value(rws, R300_CAN_HYPERZ) && + r300->screen->caps.zmask_ram ? "YES" : "NO", + rws->get_value(rws, R300_CAN_HYPERZ) && + r300->screen->caps.hiz_ram ? "YES" : "NO"); + } + return &r300->context; - fail: +fail: r300_destroy_context(&r300->context); return NULL; } - -void r300_finish(struct r300_context *r300) -{ - struct pipe_framebuffer_state *fb; - unsigned i; - - /* This is a preliminary implementation of glFinish. - * - * The ideal implementation should use something like EmitIrqLocked and - * WaitIrq, or better, real fences. - */ - if (r300->fb_state.state) { - fb = r300->fb_state.state; - - for (i = 0; i < fb->nr_cbufs; i++) { - if (fb->cbufs[i]->texture) { - r300->rws->buffer_wait(r300->rws, - r300_texture(fb->cbufs[i]->texture)->buffer); - return; - } - } - if (fb->zsbuf && fb->zsbuf->texture) { - r300->rws->buffer_wait(r300->rws, - r300_texture(fb->zsbuf->texture)->buffer); - } - } -} diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 39dcde0610..e395f41290 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -30,11 +30,11 @@ #include "pipe/p_context.h" #include "util/u_inlines.h" #include "util/u_transfer.h" - -#include "translate/translate_cache.h" +#include "util/u_vbuf_mgr.h" #include "r300_defines.h" #include "r300_screen.h" +#include "r300_winsys.h" struct u_upload_mgr; struct r300_context; @@ -65,11 +65,15 @@ struct r300_aa_state { }; struct r300_blend_state { - uint32_t cb[8]; + struct pipe_blend_state state; + + uint32_t cb_clamp[8]; + uint32_t cb_noclamp[8]; uint32_t cb_no_readwrite[8]; }; struct r300_blend_color_state { + struct pipe_blend_color state; uint32_t cb[3]; }; @@ -91,9 +95,24 @@ struct r300_dsa_state { uint32_t stencil_ref_mask; /* R300_ZB_STENCILREFMASK: 0x4f08 */ uint32_t cb_reg; uint32_t stencil_ref_bf; /* R500_ZB_STENCILREFMASK_BF: 0x4fd4 */ + uint32_t cb_reg1; + uint32_t alpha_value; /* R500_FG_ALPHA_VALUE: 0x4be0 */ + + /* The same, but for FP16 alpha test. */ + uint32_t cb_begin_fp16; + uint32_t alpha_function_fp16; /* R300_FG_ALPHA_FUNC: 0x4bd4 */ + uint32_t cb_reg_seq_fp16; + uint32_t z_buffer_control_fp16; /* R300_ZB_CNTL: 0x4f00 */ + uint32_t z_stencil_control_fp16; /* R300_ZB_ZSTENCILCNTL: 0x4f04 */ + uint32_t stencil_ref_mask_fp16; /* R300_ZB_STENCILREFMASK: 0x4f08 */ + uint32_t cb_reg_fp16; + uint32_t stencil_ref_bf_fp16; /* R500_ZB_STENCILREFMASK_BF: 0x4fd4 */ + uint32_t cb_reg1_fp16; + uint32_t alpha_value_fp16; /* R500_FG_ALPHA_VALUE: 0x4be0 */ /* The second command buffer disables zbuffer reads and writes. */ - uint32_t cb_no_readwrite[8]; + uint32_t cb_zb_no_readwrite[10]; + uint32_t cb_fp16_zb_no_readwrite[10]; /* Whether a two-sided stencil is enabled. */ boolean two_sided; @@ -102,7 +121,6 @@ struct r300_dsa_state { }; struct r300_hyperz_state { - int current_func; /* -1 after a clear before first op */ int flush; /* This is actually a command buffer with named dwords. */ uint32_t cb_flush_begin; @@ -121,7 +139,7 @@ struct r300_gpu_flush { uint32_t cb_flush_clean[6]; }; -#define RS_STATE_MAIN_SIZE 23 +#define RS_STATE_MAIN_SIZE 25 struct r300_rs_state { /* Original rasterizer state. */ @@ -189,11 +207,6 @@ struct r300_sampler_view { uint32_t texcache_region; }; -struct r300_texture_fb_state { - uint32_t pitch[R300_MAX_TEXTURE_LEVELS]; /* COLORPITCH or DEPTHPITCH. */ - uint32_t format; /* US_OUT_FMT or R300_ZB_FORMAT */ -}; - struct r300_texture_sampler_state { struct r300_texture_format_state format; uint32_t filter0; /* R300_TX_FILTER0: 0x4400 */ @@ -225,11 +238,11 @@ struct r300_vertex_stream_state { }; struct r300_invariant_state { - uint32_t cb[20]; + uint32_t cb[24]; }; struct r300_vap_invariant_state { - uint32_t cb[9]; + uint32_t cb[11]; }; struct r300_viewport_state { @@ -273,14 +286,12 @@ struct r300_query { /* How many results have been written, in dwords. It's incremented * after end_query and flush. */ unsigned num_results; - /* if we've flushed the query */ - boolean flushed; /* if begin has been emitted */ boolean begin_emitted; /* The buffer where query results are stored. */ - struct r300_winsys_buffer *buffer; - struct r300_winsys_cs_buffer *cs_buffer; + struct r300_winsys_bo *buf; + struct r300_winsys_cs_handle *cs_buf; /* The size of the buffer. */ unsigned buffer_size; /* The domain of the buffer. */ @@ -291,33 +302,19 @@ struct r300_query { struct r300_query* next; }; -/* Fence object. - * - * This is a fake fence. Instead of syncing with the fence, we sync - * with the context, which is inefficient but compliant. - * - * This is not a subclass of pipe_fence_handle because pipe_fence_handle is - * never actually fully defined. So, rather than have it as a member, and do - * subclass-style casting, we treat pipe_fence_handle as an opaque, and just - * trust that our state tracker does not ever mess up fence objects. - */ -struct r300_fence { - struct pipe_reference reference; - struct r300_context *ctx; - boolean signalled; -}; - struct r300_surface { struct pipe_surface base; /* Winsys buffer backing the texture. */ - struct r300_winsys_buffer *buffer; - struct r300_winsys_cs_buffer *cs_buffer; + struct r300_winsys_bo *buf; + struct r300_winsys_cs_handle *cs_buf; enum r300_buffer_domain domain; uint32_t offset; /* COLOROFFSET or DEPTHOFFSET. */ uint32_t pitch; /* COLORPITCH or DEPTHPITCH. */ + uint32_t pitch_zmask; /* ZMASK_PITCH */ + uint32_t pitch_hiz; /* HIZ_PITCH */ uint32_t format; /* US_OUT_FMT or ZB_FORMAT. */ /* Parameters dedicated to the CBZB clear. */ @@ -329,13 +326,9 @@ struct r300_surface { /* Whether the CBZB clear is allowed on the surface. */ boolean cbzb_allowed; - }; struct r300_texture_desc { - /* Parent class. */ - struct u_resource b; - /* Width, height, and depth. * Most of the time, these are equal to pipe_texture::width0, height0, * and depth0. However, NPOT 3D textures must have dimensions aligned @@ -387,28 +380,39 @@ struct r300_texture_desc { /* Whether CBZB fast color clear is allowed on the miplevel. */ boolean cbzb_allowed[R300_MAX_TEXTURE_LEVELS]; + + /* Zbuffer compression info for each miplevel. */ + boolean zcomp8x8[R300_MAX_TEXTURE_LEVELS]; + /* If zero, then disable Z compression/HiZ. */ + unsigned zmask_dwords[R300_MAX_TEXTURE_LEVELS]; + unsigned hiz_dwords[R300_MAX_TEXTURE_LEVELS]; + /* Zmask/HiZ strides for each miplevel. */ + unsigned zmask_stride_in_pixels[R300_MAX_TEXTURE_LEVELS]; + unsigned hiz_stride_in_pixels[R300_MAX_TEXTURE_LEVELS]; }; -struct r300_texture { - struct r300_texture_desc desc; +struct r300_resource +{ + struct u_vbuf_resource b; + /* Winsys buffer backing this resource. */ + struct r300_winsys_bo *buf; + struct r300_winsys_cs_handle *cs_buf; enum r300_buffer_domain domain; + unsigned buf_size; + + /* Constant buffers are in user memory. */ + uint8_t *constant_buffer; - /* Pipe buffer backing this texture. */ - struct r300_winsys_buffer *buffer; - struct r300_winsys_cs_buffer *cs_buffer; + /* Texture description (addressing, layout, special features). */ + struct r300_texture_desc tex; /* Registers carrying texture format data. */ /* Only format-independent bits should be filled in. */ struct r300_texture_format_state tx_format; - /* All bits should be filled in. */ - struct r300_texture_fb_state fb_state; - /* hyper-z memory allocs */ - struct mem_block *hiz_mem[R300_MAX_TEXTURE_LEVELS]; - struct mem_block *zmask_mem[R300_MAX_TEXTURE_LEVELS]; - boolean zmask_in_use[R300_MAX_TEXTURE_LEVELS]; - boolean hiz_in_use[R300_MAX_TEXTURE_LEVELS]; + /* Where the texture starts in the buffer. */ + unsigned tex_offset; /* This is the level tiling flags were last time set for. * It's used to prevent redundant tiling-flags changes from happening.*/ @@ -418,33 +422,29 @@ struct r300_texture { struct r300_vertex_element_state { unsigned count; struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS]; + unsigned format_size[PIPE_MAX_ATTRIBS]; - /* If (velem[i].src_format != hw_format[i]), the vertex buffer - * referenced by this vertex element cannot be used for rendering and - * its vertex data must be translated to hw_format[i]. */ - enum pipe_format hw_format[PIPE_MAX_ATTRIBS]; - unsigned hw_format_size[PIPE_MAX_ATTRIBS]; + struct u_vbuf_mgr_elements *vmgr_elements; /* The size of the vertex, in dwords. */ unsigned vertex_size_dwords; - /* This might mean two things: - * - src_format != hw_format, as discussed above. - * - src_offset % 4 != 0. */ - boolean incompatible_layout; - struct r300_vertex_stream_state vertex_stream; }; -struct r300_translate_context { - /* Translate cache for incompatible vertex offset/stride/format fallback. */ - struct translate_cache *translate_cache; +enum r300_hiz_func { + HIZ_FUNC_NONE, + + /* The function, when determined, is set in stone + * until the next HiZ clear. */ - /* The vertex buffer slot containing the translated buffer. */ - unsigned vb_slot; + /* MAX is written to the HiZ buffer. + * Used for LESS, LEQUAL. */ + HIZ_FUNC_MAX, - /* Saved and new vertex element state. */ - void *saved_velems, *new_velems; + /* MIN is written to the HiZ buffer. + * Used for GREATER, GEQUAL. */ + HIZ_FUNC_MIN, }; struct r300_context { @@ -473,8 +473,6 @@ struct r300_context { struct blitter_context* blitter; /* Stencil two-sided reference value fallback. */ struct r300_stencilref_context *stencilref_fallback; - /* For translating vertex buffers having incompatible vertex layout. */ - struct r300_translate_context tran; /* The KIL opcode needs the first texture unit to be enabled * on r3xx-r4xx. In order to calm down the CS checker, we bind this @@ -556,13 +554,8 @@ struct r300_context { /* The pointers to the first and the last atom. */ struct r300_atom *first_dirty, *last_dirty; - /* Vertex buffers for Gallium. */ - struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; - int vertex_buffer_count; - int vertex_buffer_max_index; /* Vertex elements for Gallium. */ struct r300_vertex_element_state *velems; - bool any_user_vbs; struct pipe_index_buffer index_buffer; @@ -587,21 +580,26 @@ struct r300_context { int sprite_coord_enable; /* Whether two-sided color selection is enabled (AKA light_twoside). */ boolean two_sided_color; - /* Incompatible vertex buffer layout? (misaligned stride or buffer_offset) */ - boolean incompatible_vb_layout; -#define R300_Z_COMPRESS_44 1 -#define RV350_Z_COMPRESS_88 2 - int z_compression; + /* Whether fast color clear is enabled. */ boolean cbzb_clear; - boolean z_decomp_rd; - - /* two mem block managers for hiz/zmask ram space */ - struct mem_block *hiz_mm; - struct mem_block *zmask_mm; - - /* upload managers */ - struct u_upload_mgr *upload_vb; - struct u_upload_mgr *upload_ib; + /* Whether ZMASK is enabled. */ + boolean zmask_in_use; + /* Whether ZMASK is being decompressed. */ + boolean zmask_decompress; + /* Whether ZMASK/HIZ is locked, i.e. should be disabled and cannot be taken over. */ + boolean hyperz_locked; + /* The zbuffer the ZMASK of which is locked. */ + struct pipe_surface *locked_zbuffer; + /* Whether HIZ is enabled. */ + boolean hiz_in_use; + /* HiZ function. Can be either MIN or MAX. */ + enum r300_hiz_func hiz_func; + /* HiZ clear value. */ + uint32_t hiz_clear_value; + + void *dsa_decompress_zmask; + + struct u_vbuf_mgr *vbuf_mgr; struct util_slab_mempool pool_transfers; @@ -611,13 +609,12 @@ struct r300_context { /* const tracking for VS */ int vs_const_base; - /* AOS (PACKET3_3D_LOAD_VBPNTR) command buffer for the case offset=0. */ - uint32_t aos_cb[(16 * 3 + 1) / 2]; - boolean aos_dirty; - - /* Whether any buffer (FB, textures, VBOs) has been set, but buffers - * haven't been validated yet. */ - boolean validate_buffers; + /* Vertex array state info */ + boolean vertex_arrays_dirty; + boolean vertex_arrays_indexed; + int vertex_arrays_offset; + int vertex_arrays_instance_id; + boolean instancing_enabled; }; #define foreach_atom(r300, atom) \ @@ -637,9 +634,9 @@ static INLINE struct r300_surface* r300_surface(struct pipe_surface* surf) return (struct r300_surface*)surf; } -static INLINE struct r300_texture* r300_texture(struct pipe_resource* tex) +static INLINE struct r300_resource* r300_resource(struct pipe_resource* tex) { - return (struct r300_texture*)tex; + return (struct r300_resource*)tex; } static INLINE struct r300_context* r300_context(struct pipe_context* context) @@ -652,12 +649,25 @@ static INLINE struct r300_fragment_shader *r300_fs(struct r300_context *r300) return (struct r300_fragment_shader*)r300->fs.state; } +static INLINE void r300_mark_atom_dirty(struct r300_context *r300, + struct r300_atom *atom) +{ + atom->dirty = TRUE; + + if (!r300->first_dirty) { + r300->first_dirty = atom; + r300->last_dirty = atom+1; + } else { + if (atom < r300->first_dirty) + r300->first_dirty = atom; + else if (atom+1 > r300->last_dirty) + r300->last_dirty = atom+1; + } +} + struct pipe_context* r300_create_context(struct pipe_screen* screen, void *priv); -void r300_finish(struct r300_context *r300); -void r300_flush_cb(void *data); - /* Context initialization. */ struct draw_stage* r300_draw_stage(struct r300_context* r300); void r300_init_blit_functions(struct r300_context *r300); @@ -668,10 +678,17 @@ void r300_init_state_functions(struct r300_context* r300); void r300_init_resource_functions(struct r300_context* r300); /* r300_blit.c */ -void r300_flush_depth_stencil(struct pipe_context *pipe, - struct pipe_resource *dst, - unsigned level, - unsigned layer); +void r300_decompress_zmask(struct r300_context *r300); +void r300_decompress_zmask_locked_unsafe(struct r300_context *r300); +void r300_decompress_zmask_locked(struct r300_context *r300); + +/* r300_flush.c */ +void r300_flush(struct pipe_context *pipe, + unsigned flags, + struct pipe_fence_handle **fence); + +/* r300_hyperz.c */ +void r300_update_hyperz_state(struct r300_context* r300); /* r300_query.c */ void r300_resume_query(struct r300_context *r300, @@ -679,8 +696,6 @@ void r300_resume_query(struct r300_context *r300, void r300_stop_query(struct r300_context *r300); /* r300_render_translate.c */ -void r300_begin_vertex_translate(struct r300_context *r300); -void r300_end_vertex_translate(struct r300_context *r300); void r300_translate_index_buffer(struct r300_context *r300, struct pipe_resource **index_buffer, unsigned *index_size, unsigned index_offset, @@ -696,29 +711,16 @@ void r500_emit_index_bias(struct r300_context *r300, int index_bias); /* r300_state.c */ enum r300_fb_state_change { R300_CHANGED_FB_STATE = 0, - R300_CHANGED_CBZB_FLAG, - R300_CHANGED_ZCLEAR_FLAG + R300_CHANGED_HYPERZ_FLAG, + R300_CHANGED_MULTIWRITE }; void r300_mark_fb_state_dirty(struct r300_context *r300, enum r300_fb_state_change change); void r300_mark_fs_code_dirty(struct r300_context *r300); -static INLINE void r300_mark_atom_dirty(struct r300_context *r300, - struct r300_atom *atom) -{ - atom->dirty = TRUE; - - if (!r300->first_dirty) { - r300->first_dirty = atom; - r300->last_dirty = atom+1; - } else { - if (atom < r300->first_dirty) - r300->first_dirty = atom; - if (atom+1 > r300->last_dirty) - r300->last_dirty = atom+1; - } -} +/* r300_state_derived.c */ +void r300_update_derived_state(struct r300_context* r300); /* r300_debug.c */ void r500_dump_rs_block(struct r300_rs_block *rs); diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index 67fb0096a8..2e52dfa43c 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -35,12 +35,6 @@ * that they neatly hide away, and don't have the cost of function setup,so * we're going to use them. */ -#ifdef DEBUG -#define CS_DEBUG(x) x -#else -#define CS_DEBUG(x) -#endif - /** * Command submission setup. */ @@ -50,22 +44,29 @@ struct r300_winsys_screen *cs_winsys = (context)->rws; \ int cs_count = 0; (void) cs_count; (void) cs_winsys; +#ifdef DEBUG + #define BEGIN_CS(size) do { \ assert(size <= (R300_MAX_CMDBUF_DWORDS - cs_copy->cdw)); \ - CS_DEBUG(cs_count = size;) \ + cs_count = size; \ } while (0) -#ifdef DEBUG #define END_CS do { \ if (cs_count != 0) \ debug_printf("r300: Warning: cs_count off by %d at (%s, %s:%i)\n", \ cs_count, __FUNCTION__, __FILE__, __LINE__); \ cs_count = 0; \ } while (0) + +#define CS_USED_DW(x) cs_count -= (x) + #else + +#define BEGIN_CS(size) #define END_CS -#endif +#define CS_USED_DW(x) +#endif /** * Writing pure DWORDs. @@ -73,7 +74,7 @@ #define OUT_CS(value) do { \ cs_copy->buf[cs_copy->cdw++] = (value); \ - CS_DEBUG(cs_count--;) \ + CS_USED_DW(1); \ } while (0) #define OUT_CS_32F(value) \ @@ -98,7 +99,7 @@ #define OUT_CS_TABLE(values, count) do { \ memcpy(cs_copy->buf + cs_copy->cdw, values, count * 4); \ cs_copy->cdw += count; \ - CS_DEBUG(cs_count -= count;) \ + CS_USED_DW(count); \ } while (0) @@ -106,27 +107,11 @@ * Writing relocations. */ -#define OUT_CS_RELOC(bo, offset, rd, wd) do { \ - assert(bo); \ - OUT_CS(offset); \ - cs_winsys->cs_write_reloc(cs_copy, bo, rd, wd); \ - CS_DEBUG(cs_count -= 2;) \ -} while (0) - -#define OUT_CS_BUF_RELOC(bo, offset, rd, wd) do { \ - assert(bo); \ - OUT_CS_RELOC(r300_buffer(bo)->cs_buf, offset, rd, wd); \ -} while (0) - -#define OUT_CS_TEX_RELOC(tex, offset, rd, wd) do { \ - assert(tex); \ - OUT_CS_RELOC(tex->cs_buffer, offset, rd, wd); \ -} while (0) - -#define OUT_CS_BUF_RELOC_NO_OFFSET(bo, rd, wd) do { \ - assert(bo); \ - cs_winsys->cs_write_reloc(cs_copy, r300_buffer(bo)->cs_buf, rd, wd); \ - CS_DEBUG(cs_count -= 2;) \ +#define OUT_CS_RELOC(r) do { \ + assert((r)); \ + assert((r)->cs_buf); \ + cs_winsys->cs_write_reloc(cs_copy, (r)->cs_buf); \ + CS_USED_DW(2); \ } while (0) @@ -135,7 +120,7 @@ */ #define WRITE_CS_TABLE(values, count) do { \ - CS_DEBUG(assert(cs_count == 0);) \ + assert(cs_count == 0); \ memcpy(cs_copy->buf + cs_copy->cdw, (values), (count) * 4); \ cs_copy->cdw += (count); \ } while (0) diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index 52031dd97b..b60cfd1f24 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -27,6 +27,7 @@ #include <stdio.h> static const struct debug_named_value debug_options[] = { + { "info", DBG_INFO, "Print hardware info"}, { "fp", DBG_FP, "Log fragment program compilation" }, { "vp", DBG_VP, "Log vertex program compilation" }, { "pstat", DBG_P_STAT, "Log vertex/fragment program stats" }, @@ -41,6 +42,7 @@ static const struct debug_named_value debug_options[] = { { "fb", DBG_FB, "Log framebuffer" }, { "cbzb", DBG_CBZB, "Log fast color clear info" }, { "hyperz", DBG_HYPERZ, "Log HyperZ info" }, + { "upload", DBG_UPLOAD, "Log user buffer upload info" }, { "scissor", DBG_SCISSOR, "Log scissor info" }, { "fakeocc", DBG_FAKE_OCC, "Use fake occlusion queries" }, { "anisohq", DBG_ANISOHQ, "Use high quality anisotropic filtering" }, @@ -48,6 +50,8 @@ static const struct debug_named_value debug_options[] = { { "noimmd", DBG_NO_IMMD, "Disable immediate mode" }, { "noopt", DBG_NO_OPT, "Disable shader optimizations" }, { "nocbzb", DBG_NO_CBZB, "Disable fast color clear" }, + { "nozmask", DBG_NO_ZMASK, "Disable zbuffer compression" }, + { "nohiz", DBG_NO_HIZ, "Disable hierarchical zbuffer" }, /* must be last */ DEBUG_NAMED_VALUE_END diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 04a5bd92d1..e17a907e77 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -26,7 +26,6 @@ #include "util/u_format.h" #include "util/u_math.h" #include "util/u_mm.h" -#include "util/u_simple_list.h" #include "r300_context.h" #include "r300_cb.h" @@ -46,7 +45,10 @@ void r300_emit_blend_state(struct r300_context* r300, CS_LOCALS(r300); if (fb->nr_cbufs) { - WRITE_CS_TABLE(blend->cb, size); + if (fb->cbufs[0]->format == PIPE_FORMAT_R16G16B16A16_FLOAT) + WRITE_CS_TABLE(blend->cb_noclamp, size); + else + WRITE_CS_TABLE(blend->cb_clamp, size); } else { WRITE_CS_TABLE(blend->cb_no_readwrite, size); } @@ -78,9 +80,15 @@ void r300_emit_dsa_state(struct r300_context* r300, unsigned size, void* state) CS_LOCALS(r300); if (fb->zsbuf) { - WRITE_CS_TABLE(&dsa->cb_begin, size); + if (fb->nr_cbufs && fb->cbufs[0]->format == PIPE_FORMAT_R16G16B16A16_FLOAT) + WRITE_CS_TABLE(&dsa->cb_begin_fp16, size); + else + WRITE_CS_TABLE(&dsa->cb_begin, size); } else { - WRITE_CS_TABLE(dsa->cb_no_readwrite, size); + if (fb->nr_cbufs && fb->cbufs[0]->format == PIPE_FORMAT_R16G16B16A16_FLOAT) + WRITE_CS_TABLE(dsa->cb_fp16_zb_no_readwrite, size); + else + WRITE_CS_TABLE(dsa->cb_zb_no_readwrite, size); } } @@ -90,7 +98,7 @@ static void get_rc_constant_state( struct rc_constant * constant) { struct r300_textures_state* texstate = r300->textures_state.state; - struct r300_texture *tex; + struct r300_resource *tex; assert(constant->Type == RC_CONSTANT_STATE); @@ -102,19 +110,19 @@ static void get_rc_constant_state( /* Factor for converting rectangle coords to * normalized coords. Should only show up on non-r500. */ case RC_STATE_R300_TEXRECT_FACTOR: - tex = r300_texture(texstate->sampler_views[constant->u.State[1]]->base.texture); - vec[0] = 1.0 / tex->desc.width0; - vec[1] = 1.0 / tex->desc.height0; + tex = r300_resource(texstate->sampler_views[constant->u.State[1]]->base.texture); + vec[0] = 1.0 / tex->tex.width0; + vec[1] = 1.0 / tex->tex.height0; vec[2] = 0; vec[3] = 1; break; case RC_STATE_R300_TEXSCALE_FACTOR: - tex = r300_texture(texstate->sampler_views[constant->u.State[1]]->base.texture); + tex = r300_resource(texstate->sampler_views[constant->u.State[1]]->base.texture); /* Add a small number to the texture size to work around rounding errors in hw. */ - vec[0] = tex->desc.b.b.width0 / (tex->desc.width0 + 0.001f); - vec[1] = tex->desc.b.b.height0 / (tex->desc.height0 + 0.001f); - vec[2] = tex->desc.b.b.depth0 / (tex->desc.depth0 + 0.001f); + vec[0] = tex->b.b.b.width0 / (tex->tex.width0 + 0.001f); + vec[1] = tex->b.b.b.height0 / (tex->tex.height0 + 0.001f); + vec[2] = tex->b.b.b.depth0 / (tex->tex.depth0 + 0.001f); vec[3] = 1; break; @@ -353,11 +361,9 @@ void r300_emit_aa_state(struct r300_context *r300, unsigned size, void *state) OUT_CS_REG(R300_GB_AA_CONFIG, aa->aa_config); if (aa->dest) { - OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_OFFSET, 1); - OUT_CS_RELOC(aa->dest->cs_buffer, aa->dest->offset, 0, aa->dest->domain); - - OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_PITCH, 1); - OUT_CS_RELOC(aa->dest->cs_buffer, aa->dest->pitch, 0, aa->dest->domain); + OUT_CS_REG(R300_RB3D_AARESOLVE_OFFSET, aa->dest->offset); + OUT_CS_RELOC(aa->dest); + OUT_CS_REG(R300_RB3D_AARESOLVE_PITCH, aa->dest->pitch); } OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, aa->aaresolve_ctl); @@ -370,6 +376,8 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) struct r300_surface* surf; unsigned i; boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); + uint32_t rb3d_cctl = 0; + CS_LOCALS(r300); BEGIN_CS(size); @@ -377,21 +385,24 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) /* NUM_MULTIWRITES replicates COLOR[0] to all colorbuffers, which is not * what we usually want. */ if (r300->screen->caps.is_r500) { - OUT_CS_REG(R300_RB3D_CCTL, - R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_ENABLE); - } else { - OUT_CS_REG(R300_RB3D_CCTL, 0); + rb3d_cctl = R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_ENABLE; } + if (fb->nr_cbufs && + r300_fragment_shader_writes_all(r300_fs(r300))) { + rb3d_cctl |= R300_RB3D_CCTL_NUM_MULTIWRITES(fb->nr_cbufs); + } + + OUT_CS_REG(R300_RB3D_CCTL, rb3d_cctl); /* Set up colorbuffers. */ for (i = 0; i < fb->nr_cbufs; i++) { surf = r300_surface(fb->cbufs[i]); - OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0 + (4 * i), 1); - OUT_CS_RELOC(surf->cs_buffer, surf->offset, 0, surf->domain); + OUT_CS_REG(R300_RB3D_COLOROFFSET0 + (4 * i), surf->offset); + OUT_CS_RELOC(surf); - OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0 + (4 * i), 1); - OUT_CS_RELOC(surf->cs_buffer, surf->pitch, 0, surf->domain); + OUT_CS_REG(R300_RB3D_COLORPITCH0 + (4 * i), surf->pitch); + OUT_CS_RELOC(surf); } /* Set up the ZB part of the CBZB clear. */ @@ -400,11 +411,11 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_REG(R300_ZB_FORMAT, surf->cbzb_format); - OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1); - OUT_CS_RELOC(surf->cs_buffer, surf->cbzb_midpoint_offset, 0, surf->domain); + OUT_CS_REG(R300_ZB_DEPTHOFFSET, surf->cbzb_midpoint_offset); + OUT_CS_RELOC(surf); - OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); - OUT_CS_RELOC(surf->cs_buffer, surf->cbzb_pitch, 0, surf->domain); + OUT_CS_REG(R300_ZB_DEPTHPITCH, surf->cbzb_pitch); + OUT_CS_RELOC(surf); DBG(r300, DBG_CBZB, "CBZB clearing cbuf %08x %08x\n", surf->cbzb_format, @@ -416,37 +427,19 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_REG(R300_ZB_FORMAT, surf->format); - OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1); - OUT_CS_RELOC(surf->cs_buffer, surf->offset, 0, surf->domain); + OUT_CS_REG(R300_ZB_DEPTHOFFSET, surf->offset); + OUT_CS_RELOC(surf); - OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); - OUT_CS_RELOC(surf->cs_buffer, surf->pitch, 0, surf->domain); + OUT_CS_REG(R300_ZB_DEPTHPITCH, surf->pitch); + OUT_CS_RELOC(surf); if (can_hyperz) { - uint32_t surf_pitch; - struct r300_texture *tex; - int level = surf->base.u.tex.level; - tex = r300_texture(surf->base.texture); - - surf_pitch = surf->pitch & R300_DEPTHPITCH_MASK; /* HiZ RAM. */ - if (r300->screen->caps.hiz_ram) { - if (tex->hiz_mem[level]) { - OUT_CS_REG(R300_ZB_HIZ_OFFSET, tex->hiz_mem[level]->ofs << 2); - OUT_CS_REG(R300_ZB_HIZ_PITCH, surf_pitch); - } else { - OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0); - OUT_CS_REG(R300_ZB_HIZ_PITCH, 0); - } - } + OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0); + OUT_CS_REG(R300_ZB_HIZ_PITCH, surf->pitch_hiz); /* Z Mask RAM. (compressed zbuffer) */ - if (tex->zmask_mem[level]) { - OUT_CS_REG(R300_ZB_ZMASK_OFFSET, tex->zmask_mem[level]->ofs << 2); - OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf_pitch); - } else { - OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0); - OUT_CS_REG(R300_ZB_ZMASK_PITCH, 0); - } + OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0); + OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf->pitch_zmask); } } @@ -458,6 +451,7 @@ void r300_emit_hyperz_state(struct r300_context *r300, { struct r300_hyperz_state *z = state; CS_LOCALS(r300); + if (z->flush) WRITE_CS_TABLE(&z->cb_flush_begin, size); else @@ -483,15 +477,22 @@ void r300_emit_fb_state_pipelined(struct r300_context *r300, { struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)r300->fb_state.state; - unsigned i; + unsigned i, num_cbufs = fb->nr_cbufs; + unsigned mspos0, mspos1; CS_LOCALS(r300); + /* If we use the multiwrite feature, the colorbuffers 2,3,4 must be + * marked as UNUSED in the US block. */ + if (r300_fragment_shader_writes_all(r300_fs(r300))) { + num_cbufs = MIN2(num_cbufs, 1); + } + BEGIN_CS(size); /* Colorbuffer format in the US block. * (must be written after unpipelined regs) */ OUT_CS_REG_SEQ(R300_US_OUT_FMT_0, 4); - for (i = 0; i < fb->nr_cbufs; i++) { + for (i = 0; i < num_cbufs; i++) { OUT_CS(r300_surface(fb->cbufs[i])->format); } for (; i < 4; i++) { @@ -501,38 +502,36 @@ void r300_emit_fb_state_pipelined(struct r300_context *r300, /* Multisampling. Depends on framebuffer sample count. * These are pipelined regs and as such cannot be moved * to the AA state. */ - if (r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0)) { - unsigned mspos0 = 0x66666666; - unsigned mspos1 = 0x6666666; - - if (fb->nr_cbufs && fb->cbufs[0]->texture->nr_samples > 1) { - /* Subsample placement. These may not be optimal. */ - switch (fb->cbufs[0]->texture->nr_samples) { - case 2: - mspos0 = 0x33996633; - mspos1 = 0x6666663; - break; - case 3: - mspos0 = 0x33936933; - mspos1 = 0x6666663; - break; - case 4: - mspos0 = 0x33939933; - mspos1 = 0x3966663; - break; - case 6: - mspos0 = 0x22a2aa22; - mspos1 = 0x2a65672; - break; - default: - debug_printf("r300: Bad number of multisamples!\n"); - } - } + mspos0 = 0x66666666; + mspos1 = 0x6666666; - OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2); - OUT_CS(mspos0); - OUT_CS(mspos1); + if (fb->nr_cbufs && fb->cbufs[0]->texture->nr_samples > 1) { + /* Subsample placement. These may not be optimal. */ + switch (fb->cbufs[0]->texture->nr_samples) { + case 2: + mspos0 = 0x33996633; + mspos1 = 0x6666663; + break; + case 3: + mspos0 = 0x33936933; + mspos1 = 0x6666663; + break; + case 4: + mspos0 = 0x33939933; + mspos1 = 0x3966663; + break; + case 6: + mspos0 = 0x22a2aa22; + mspos1 = 0x2a65672; + break; + default: + debug_printf("r300: Bad number of multisamples!\n"); + } } + + OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2); + OUT_CS(mspos0); + OUT_CS(mspos1); END_CS; } @@ -553,14 +552,12 @@ void r300_emit_query_start(struct r300_context *r300, unsigned size, void*state) OUT_CS_REG(R300_ZB_ZPASS_DATA, 0); END_CS; query->begin_emitted = TRUE; - query->flushed = FALSE; } static void r300_emit_query_end_frag_pipes(struct r300_context *r300, struct r300_query *query) { struct r300_capabilities* caps = &r300->screen->caps; - struct r300_winsys_cs_buffer *buf = r300->query_current->cs_buffer; CS_LOCALS(r300); assert(caps->num_frag_pipes); @@ -578,29 +575,25 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300, case 4: /* pipe 3 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 3); - OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 3) * 4, - 0, query->domain); + OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 3) * 4); + OUT_CS_RELOC(r300->query_current); case 3: /* pipe 2 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 2); - OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 2) * 4, - 0, query->domain); + OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 2) * 4); + OUT_CS_RELOC(r300->query_current); case 2: /* pipe 1 only */ /* As mentioned above, accomodate RV380 and older. */ OUT_CS_REG(R300_SU_REG_DEST, 1 << (caps->high_second_pipe ? 3 : 1)); - OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 1) * 4, - 0, query->domain); + OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 1) * 4); + OUT_CS_RELOC(r300->query_current); case 1: /* pipe 0 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 0); - OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 0) * 4, - 0, query->domain); + OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 0) * 4); + OUT_CS_RELOC(r300->query_current); break; default: fprintf(stderr, "r300: Implementation error: Chipset reports %d" @@ -616,13 +609,12 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300, static void rv530_emit_query_end_single_z(struct r300_context *r300, struct r300_query *query) { - struct r300_winsys_cs_buffer *buf = r300->query_current->cs_buffer; CS_LOCALS(r300); BEGIN_CS(8); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); - OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, query->num_results * 4, 0, query->domain); + OUT_CS_REG(R300_ZB_ZPASS_ADDR, query->num_results * 4); + OUT_CS_RELOC(r300->query_current); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); END_CS; } @@ -630,16 +622,15 @@ static void rv530_emit_query_end_single_z(struct r300_context *r300, static void rv530_emit_query_end_double_z(struct r300_context *r300, struct r300_query *query) { - struct r300_winsys_cs_buffer *buf = r300->query_current->cs_buffer; CS_LOCALS(r300); BEGIN_CS(14); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); - OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 0) * 4, 0, query->domain); + OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 0) * 4); + OUT_CS_RELOC(r300->query_current); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_1); - OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 1) * 4, 0, query->domain); + OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 1) * 4); + OUT_CS_RELOC(r300->query_current); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); END_CS; } @@ -778,7 +769,7 @@ void r300_emit_textures_state(struct r300_context *r300, { struct r300_textures_state *allstate = (struct r300_textures_state*)state; struct r300_texture_sampler_state *texstate; - struct r300_texture *tex; + struct r300_resource *tex; unsigned i; CS_LOCALS(r300); @@ -788,7 +779,7 @@ void r300_emit_textures_state(struct r300_context *r300, for (i = 0; i < allstate->count; i++) { if ((1 << i) & allstate->tx_enable) { texstate = &allstate->regs[i]; - tex = r300_texture(allstate->sampler_views[i]->base.texture); + tex = r300_resource(allstate->sampler_views[i]->base.texture); OUT_CS_REG(R300_TX_FILTER0_0 + (i * 4), texstate->filter0); OUT_CS_REG(R300_TX_FILTER1_0 + (i * 4), texstate->filter1); @@ -799,73 +790,35 @@ void r300_emit_textures_state(struct r300_context *r300, OUT_CS_REG(R300_TX_FORMAT1_0 + (i * 4), texstate->format.format1); OUT_CS_REG(R300_TX_FORMAT2_0 + (i * 4), texstate->format.format2); - OUT_CS_REG_SEQ(R300_TX_OFFSET_0 + (i * 4), 1); - OUT_CS_TEX_RELOC(tex, texstate->format.tile_config, tex->domain, - 0); + OUT_CS_REG(R300_TX_OFFSET_0 + (i * 4), texstate->format.tile_config); + OUT_CS_RELOC(tex); } } END_CS; } -static void r300_update_aos_cb(struct r300_context *r300, unsigned packet_size) +void r300_emit_vertex_arrays(struct r300_context* r300, int offset, + boolean indexed, int instance_id) { - struct pipe_vertex_buffer *vb1, *vb2, *vbuf = r300->vertex_buffer; + struct pipe_vertex_buffer *vbuf = r300->vbuf_mgr->vertex_buffer; + struct pipe_resource **valid_vbuf = r300->vbuf_mgr->real_vertex_buffer; struct pipe_vertex_element *velem = r300->velems->velem; - unsigned *hw_format_size = r300->velems->hw_format_size; - unsigned size1, size2, aos_count = r300->velems->count; + struct r300_resource *buf; int i; - CB_LOCALS; - - BEGIN_CB(r300->aos_cb, packet_size); - for (i = 0; i < aos_count - 1; i += 2) { - vb1 = &vbuf[velem[i].vertex_buffer_index]; - vb2 = &vbuf[velem[i+1].vertex_buffer_index]; - size1 = hw_format_size[i]; - size2 = hw_format_size[i+1]; - - OUT_CB(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride) | - R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(vb2->stride)); - OUT_CB(vb1->buffer_offset + velem[i].src_offset); - OUT_CB(vb2->buffer_offset + velem[i+1].src_offset); - } - - if (aos_count & 1) { - vb1 = &vbuf[velem[i].vertex_buffer_index]; - size1 = hw_format_size[i]; - - OUT_CB(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride)); - OUT_CB(vb1->buffer_offset + velem[i].src_offset); - } - END_CB; - - r300->aos_dirty = FALSE; -} - -void r300_emit_aos(struct r300_context* r300, int offset, boolean indexed) -{ - struct pipe_vertex_buffer *vbuf = r300->vertex_buffer; - struct pipe_vertex_element *velem = r300->velems->velem; - struct r300_buffer *buf; - int i; - unsigned aos_count = r300->velems->count; - unsigned packet_size = (aos_count * 3 + 1) / 2; + unsigned vertex_array_count = r300->velems->count; + unsigned packet_size = (vertex_array_count * 3 + 1) / 2; + struct pipe_vertex_buffer *vb1, *vb2; + unsigned *hw_format_size = r300->velems->format_size; + unsigned size1, size2, offset1, offset2, stride1, stride2; CS_LOCALS(r300); - BEGIN_CS(2 + packet_size + aos_count * 2); + BEGIN_CS(2 + packet_size + vertex_array_count * 2); OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size); - OUT_CS(aos_count | (!indexed ? R300_VC_FORCE_PREFETCH : 0)); - - if (!offset) { - if (r300->aos_dirty) { - r300_update_aos_cb(r300, packet_size); - } - OUT_CS_TABLE(r300->aos_cb, packet_size); - } else { - struct pipe_vertex_buffer *vb1, *vb2; - unsigned *hw_format_size = r300->velems->hw_format_size; - unsigned size1, size2; + OUT_CS(vertex_array_count | (!indexed ? R300_VC_FORCE_PREFETCH : 0)); - for (i = 0; i < aos_count - 1; i += 2) { + if (instance_id == -1) { + /* Non-instanced arrays. This ignores instance_divisor and instance_id. */ + for (i = 0; i < vertex_array_count - 1; i += 2) { vb1 = &vbuf[velem[i].vertex_buffer_index]; vb2 = &vbuf[velem[i+1].vertex_buffer_index]; size1 = hw_format_size[i]; @@ -877,23 +830,75 @@ void r300_emit_aos(struct r300_context* r300, int offset, boolean indexed) OUT_CS(vb2->buffer_offset + velem[i+1].src_offset + offset * vb2->stride); } - if (aos_count & 1) { + if (vertex_array_count & 1) { vb1 = &vbuf[velem[i].vertex_buffer_index]; size1 = hw_format_size[i]; OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride)); OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride); } - } - for (i = 0; i < aos_count; i++) { - buf = r300_buffer(vbuf[velem[i].vertex_buffer_index].buffer); - OUT_CS_BUF_RELOC_NO_OFFSET(&buf->b.b, buf->domain, 0); + for (i = 0; i < vertex_array_count; i++) { + buf = r300_resource(valid_vbuf[velem[i].vertex_buffer_index]); + OUT_CS_RELOC(buf); + } + } else { + /* Instanced arrays. */ + for (i = 0; i < vertex_array_count - 1; i += 2) { + vb1 = &vbuf[velem[i].vertex_buffer_index]; + vb2 = &vbuf[velem[i+1].vertex_buffer_index]; + size1 = hw_format_size[i]; + size2 = hw_format_size[i+1]; + + if (velem[i].instance_divisor) { + stride1 = 0; + offset1 = vb1->buffer_offset + velem[i].src_offset + + (instance_id / velem[i].instance_divisor) * vb1->stride; + } else { + stride1 = vb1->stride; + offset1 = vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride; + } + if (velem[i+1].instance_divisor) { + stride2 = 0; + offset2 = vb2->buffer_offset + velem[i+1].src_offset + + (instance_id / velem[i+1].instance_divisor) * vb2->stride; + } else { + stride2 = vb2->stride; + offset2 = vb2->buffer_offset + velem[i+1].src_offset + offset * vb2->stride; + } + + OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(stride1) | + R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(stride2)); + OUT_CS(offset1); + OUT_CS(offset2); + } + + if (vertex_array_count & 1) { + vb1 = &vbuf[velem[i].vertex_buffer_index]; + size1 = hw_format_size[i]; + + if (velem[i].instance_divisor) { + stride1 = 0; + offset1 = vb1->buffer_offset + velem[i].src_offset + + (instance_id / velem[i].instance_divisor) * vb1->stride; + } else { + stride1 = vb1->stride; + offset1 = vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride; + } + + OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(stride1)); + OUT_CS(offset1); + } + + for (i = 0; i < vertex_array_count; i++) { + buf = r300_resource(valid_vbuf[velem[i].vertex_buffer_index]); + OUT_CS_RELOC(buf); + } } END_CS; } -void r300_emit_aos_swtcl(struct r300_context *r300, boolean indexed) +void r300_emit_vertex_arrays_swtcl(struct r300_context *r300, boolean indexed) { CS_LOCALS(r300); @@ -913,7 +918,8 @@ void r300_emit_aos_swtcl(struct r300_context *r300, boolean indexed) OUT_CS(r300->vertex_info.size | (r300->vertex_info.size << 8)); OUT_CS(r300->draw_vbo_offset); - OUT_CS_BUF_RELOC(r300->vbo, 0, r300_buffer(r300->vbo)->domain, 0); + OUT_CS(0); + OUT_CS_RELOC(r300_resource(r300->vbo)); END_CS; } @@ -1080,109 +1086,47 @@ void r300_emit_viewport_state(struct r300_context* r300, END_CS; } -static void r300_emit_hiz_line_clear(struct r300_context *r300, int start, uint16_t count, uint32_t val) -{ - CS_LOCALS(r300); - BEGIN_CS(4); - OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_HIZ, 2); - OUT_CS(start); - OUT_CS(count); - OUT_CS(val); - END_CS; -} - -static void r300_emit_zmask_line_clear(struct r300_context *r300, int start, uint16_t count, uint32_t val) -{ - CS_LOCALS(r300); - BEGIN_CS(4); - OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_ZMASK, 2); - OUT_CS(start); - OUT_CS(count); - OUT_CS(val); - END_CS; -} - -#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) - void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state) { struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; - struct r300_hyperz_state *z = - (struct r300_hyperz_state*)r300->hyperz_state.state; - struct r300_screen* r300screen = r300->screen; - uint32_t stride, offset = 0, height, offset_shift; - struct r300_texture* tex; - int i; - - tex = r300_texture(fb->zsbuf->texture); - - offset = tex->hiz_mem[fb->zsbuf->u.tex.level]->ofs; - stride = tex->desc.stride_in_pixels[fb->zsbuf->u.tex.level]; - - /* convert from pixels to 4x4 blocks */ - stride = ALIGN_DIVUP(stride, 4); - - stride = ALIGN_DIVUP(stride, r300screen->caps.num_frag_pipes); - /* there are 4 blocks per dwords */ - stride = ALIGN_DIVUP(stride, 4); - - height = ALIGN_DIVUP(fb->zsbuf->height, 4); + struct r300_resource* tex; + CS_LOCALS(r300); - offset_shift = 2; - offset_shift += (r300screen->caps.num_frag_pipes / 2); + tex = r300_resource(fb->zsbuf->texture); - for (i = 0; i < height; i++) { - offset = i * stride; - offset <<= offset_shift; - r300_emit_hiz_line_clear(r300, offset, stride, 0xffffffff); - } - z->current_func = -1; + BEGIN_CS(size); + OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_HIZ, 2); + OUT_CS(0); + OUT_CS(tex->tex.hiz_dwords[fb->zsbuf->u.tex.level]); + OUT_CS(r300->hiz_clear_value); + END_CS; /* Mark the current zbuffer's hiz ram as in use. */ - tex->hiz_in_use[fb->zsbuf->u.tex.level] = TRUE; + r300->hiz_in_use = TRUE; + r300->hiz_func = HIZ_FUNC_NONE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); } void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state) { struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; - struct r300_screen* r300screen = r300->screen; - uint32_t stride, offset = 0; - struct r300_texture* tex; - uint32_t i, height; - int mult, offset_shift; - - tex = r300_texture(fb->zsbuf->texture); - stride = tex->desc.stride_in_pixels[fb->zsbuf->u.tex.level]; - - offset = tex->zmask_mem[fb->zsbuf->u.tex.level]->ofs; - - if (r300->z_compression == RV350_Z_COMPRESS_88) - mult = 8; - else - mult = 4; - - height = ALIGN_DIVUP(fb->zsbuf->height, mult); - - offset_shift = 4; - offset_shift += (r300screen->caps.num_frag_pipes / 2); - stride = ALIGN_DIVUP(stride, r300screen->caps.num_frag_pipes); + struct r300_resource *tex; + CS_LOCALS(r300); - /* okay have width in pixels - divide by block width */ - stride = ALIGN_DIVUP(stride, mult); - /* have width in blocks - divide by number of fragment pipes screen width */ - /* 16 blocks per dword */ - stride = ALIGN_DIVUP(stride, 16); + tex = r300_resource(fb->zsbuf->texture); - for (i = 0; i < height; i++) { - offset = i * stride; - offset <<= offset_shift; - r300_emit_zmask_line_clear(r300, offset, stride, 0x0);//0xffffffff); - } + BEGIN_CS(size); + OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_ZMASK, 2); + OUT_CS(0); + OUT_CS(tex->tex.zmask_dwords[fb->zsbuf->u.tex.level]); + OUT_CS(0); + END_CS; /* Mark the current zbuffer's zmask as in use. */ - tex->zmask_in_use[fb->zsbuf->u.tex.level] = TRUE; + r300->zmask_in_use = TRUE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); } void r300_emit_ztop_state(struct r300_context* r300, @@ -1209,68 +1153,77 @@ boolean r300_emit_buffer_validate(struct r300_context *r300, boolean do_validate_vertex_buffers, struct pipe_resource *index_buffer) { - struct pipe_framebuffer_state* fb = + struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; struct r300_textures_state *texstate = (struct r300_textures_state*)r300->textures_state.state; - struct r300_texture* tex; - struct pipe_vertex_buffer *vbuf = r300->vertex_buffer; - struct pipe_vertex_element *velem = r300->velems->velem; - struct pipe_resource *pbuf; + struct r300_resource *tex; unsigned i; - - /* Clean out BOs. */ - r300->rws->cs_reset_buffers(r300->cs); - - /* Color buffers... */ - for (i = 0; i < fb->nr_cbufs; i++) { - tex = r300_texture(fb->cbufs[i]->texture); - assert(tex && tex->buffer && "cbuf is marked, but NULL!"); - r300->rws->cs_add_buffer(r300->cs, tex->cs_buffer, 0, - r300_surface(fb->cbufs[i])->domain); - } - /* ...depth buffer... */ - if (fb->zsbuf) { - tex = r300_texture(fb->zsbuf->texture); - assert(tex && tex->buffer && "zsbuf is marked, but NULL!"); - r300->rws->cs_add_buffer(r300->cs, tex->cs_buffer, 0, - r300_surface(fb->zsbuf)->domain); - } - /* ...textures... */ - for (i = 0; i < texstate->count; i++) { - if (!(texstate->tx_enable & (1 << i))) { - continue; + boolean flushed = FALSE; + +validate: + if (r300->fb_state.dirty) { + /* Color buffers... */ + for (i = 0; i < fb->nr_cbufs; i++) { + tex = r300_resource(fb->cbufs[i]->texture); + assert(tex && tex->buf && "cbuf is marked, but NULL!"); + r300->rws->cs_add_reloc(r300->cs, tex->cs_buf, 0, + r300_surface(fb->cbufs[i])->domain); + } + /* ...depth buffer... */ + if (fb->zsbuf) { + tex = r300_resource(fb->zsbuf->texture); + assert(tex && tex->buf && "zsbuf is marked, but NULL!"); + r300->rws->cs_add_reloc(r300->cs, tex->cs_buf, 0, + r300_surface(fb->zsbuf)->domain); } + } + if (r300->textures_state.dirty) { + /* ...textures... */ + for (i = 0; i < texstate->count; i++) { + if (!(texstate->tx_enable & (1 << i))) { + continue; + } - tex = r300_texture(texstate->sampler_views[i]->base.texture); - r300->rws->cs_add_buffer(r300->cs, tex->cs_buffer, tex->domain, 0); + tex = r300_resource(texstate->sampler_views[i]->base.texture); + r300->rws->cs_add_reloc(r300->cs, tex->cs_buf, tex->domain, 0); + } } /* ...occlusion query buffer... */ if (r300->query_current) - r300->rws->cs_add_buffer(r300->cs, r300->query_current->cs_buffer, - 0, r300->query_current->domain); + r300->rws->cs_add_reloc(r300->cs, r300->query_current->cs_buf, + 0, r300->query_current->domain); /* ...vertex buffer for SWTCL path... */ if (r300->vbo) - r300->rws->cs_add_buffer(r300->cs, r300_buffer(r300->vbo)->cs_buf, - r300_buffer(r300->vbo)->domain, 0); + r300->rws->cs_add_reloc(r300->cs, r300_resource(r300->vbo)->cs_buf, + r300_resource(r300->vbo)->domain, 0); /* ...vertex buffers for HWTCL path... */ - if (do_validate_vertex_buffers) { - for (i = 0; i < r300->velems->count; i++) { - pbuf = vbuf[velem[i].vertex_buffer_index].buffer; - if (!pbuf) + if (do_validate_vertex_buffers && r300->vertex_arrays_dirty) { + struct pipe_resource **buf = r300->vbuf_mgr->real_vertex_buffer; + struct pipe_resource **last = r300->vbuf_mgr->real_vertex_buffer + + r300->vbuf_mgr->nr_real_vertex_buffers; + for (; buf != last; buf++) { + if (!*buf) continue; - r300->rws->cs_add_buffer(r300->cs, r300_buffer(pbuf)->cs_buf, - r300_buffer(pbuf)->domain, 0); + r300->rws->cs_add_reloc(r300->cs, r300_resource(*buf)->cs_buf, + r300_resource(*buf)->domain, 0); } } /* ...and index buffer for HWTCL path. */ if (index_buffer) - r300->rws->cs_add_buffer(r300->cs, r300_buffer(index_buffer)->cs_buf, - r300_buffer(index_buffer)->domain, 0); + r300->rws->cs_add_reloc(r300->cs, r300_resource(index_buffer)->cs_buf, + r300_resource(index_buffer)->domain, 0); + /* Now do the validation. */ if (!r300->rws->cs_validate(r300->cs)) { - return FALSE; + /* Ooops, an infinite loop, give up. */ + if (flushed) + return FALSE; + + r300_flush(&r300->context, R300_FLUSH_ASYNC, NULL); + flushed = TRUE; + goto validate; } return TRUE; @@ -1300,7 +1253,7 @@ unsigned r300_get_num_cs_end_dwords(struct r300_context *r300) /* Emitted in flush. */ dwords += 26; /* emit_query_end */ dwords += r300->hyperz_state.size + 2; /* emit_hyperz_end + zcache flush */ - if (r300->screen->caps.index_bias_supported) + if (r300->screen->caps.is_r500) dwords += 2; return dwords; diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 278dbcb4c7..6c1c9d2fb1 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -31,7 +31,8 @@ struct r300_vertex_program_code; uint32_t pack_float24(float f); -void r300_emit_aos(struct r300_context* r300, int offset, boolean indexed); +void r300_emit_vertex_arrays(struct r300_context* r300, int offset, + boolean indexed, int instance_id); void r300_emit_blend_state(struct r300_context* r300, unsigned size, void* state); @@ -86,7 +87,7 @@ void r300_emit_scissor_state(struct r300_context* r300, void r300_emit_textures_state(struct r300_context *r300, unsigned size, void *state); -void r300_emit_aos_swtcl(struct r300_context *r300, boolean indexed); +void r300_emit_vertex_arrays_swtcl(struct r300_context *r300, boolean indexed); void r300_emit_vap_invariant_state(struct r300_context *r300, unsigned size, void *state); diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index 451fe525b4..b3d0d344ec 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -31,29 +31,38 @@ #include "r300_cs.h" #include "r300_emit.h" -static void r300_flush(struct pipe_context* pipe, - unsigned flags, - struct pipe_fence_handle** fence) + +void r300_flush(struct pipe_context *pipe, + unsigned flags, + struct pipe_fence_handle **fence) { struct r300_context *r300 = r300_context(pipe); - struct r300_query *query; struct r300_atom *atom; - struct r300_fence **rfence = (struct r300_fence**)fence; - - u_upload_flush(r300->upload_vb); - u_upload_flush(r300->upload_ib); + struct r300_winsys_bo **rfence = (struct r300_winsys_bo**)fence; if (r300->draw && !r300->draw_vbo_locked) r300_draw_flush_vbuf(r300); + if (rfence) { + /* Create a fence, which is a dummy BO. */ + *rfence = r300->rws->buffer_create(r300->rws, 1, 1, + PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STATIC, + R300_DOMAIN_GTT); + /* Add the fence as a dummy relocation. */ + r300->rws->cs_add_reloc(r300->cs, + r300->rws->buffer_get_cs_handle(*rfence), + R300_DOMAIN_GTT, R300_DOMAIN_GTT); + } + if (r300->dirty_hw) { r300_emit_hyperz_end(r300); r300_emit_query_end(r300); - if (r300->screen->caps.index_bias_supported) + if (r300->screen->caps.is_r500) r500_emit_index_bias(r300, 0); r300->flush_counter++; - r300->rws->cs_flush(r300->cs); + r300->rws->cs_flush(r300->cs, flags); r300->dirty_hw = 0; /* New kitchen sink, baby. */ @@ -62,30 +71,35 @@ static void r300_flush(struct pipe_context* pipe, r300_mark_atom_dirty(r300, atom); } } + r300->vertex_arrays_dirty = TRUE; /* Unmark HWTCL state for SWTCL. */ if (!r300->screen->caps.has_tcl) { r300->vs_state.dirty = FALSE; r300->vs_constants.dirty = FALSE; } - - r300->validate_buffers = TRUE; - } - - /* reset flushed query */ - foreach(query, &r300->query_list) { - query->flushed = TRUE; + } else { + if (rfence) { + /* We have to create a fence object, but the command stream is empty + * and we cannot emit an empty CS. We must write some regs then. */ + CS_LOCALS(r300); + OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0); + r300->rws->cs_flush(r300->cs, flags); + } else { + /* Even if hw is not dirty, we should at least reset the CS in case + * the space checking failed for the first draw operation. */ + r300->rws->cs_flush(r300->cs, flags); + } } +} - /* Create a new fence. */ - if (rfence) { - *rfence = CALLOC_STRUCT(r300_fence); - pipe_reference_init(&(*rfence)->reference, 1); - (*rfence)->ctx = r300; - } +static void r300_flush_wrapped(struct pipe_context *pipe, + struct pipe_fence_handle **fence) +{ + r300_flush(pipe, 0, fence); } void r300_init_flush_functions(struct r300_context* r300) { - r300->context.flush = r300_flush; + r300->context.flush = r300_flush_wrapped; } diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index 2936c3486e..4c502fefb3 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -149,16 +149,18 @@ static void get_external_state( unsigned i; unsigned char *swizzle; + state->frag_clamp = 0; + for (i = 0; i < texstate->sampler_state_count; i++) { struct r300_sampler_state *s = texstate->sampler_states[i]; struct r300_sampler_view *v = texstate->sampler_views[i]; - struct r300_texture *t; + struct r300_resource *t; if (!s || !v) { continue; } - t = r300_texture(texstate->sampler_views[i]->base.texture); + t = r300_resource(texstate->sampler_views[i]->base.texture); if (s->state.compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { state->unit[i].compare_mode_enabled = 1; @@ -181,7 +183,7 @@ static void get_external_state( state->unit[i].non_normalized_coords = !s->state.normalized_coords; /* XXX this should probably take into account STR, not just S. */ - if (t->desc.is_npot) { + if (t->tex.is_npot) { switch (s->state.wrap_s) { case PIPE_TEX_WRAP_REPEAT: state->unit[i].wrap_mode = RC_WRAP_REPEAT; @@ -201,7 +203,7 @@ static void get_external_state( state->unit[i].wrap_mode = RC_WRAP_NONE; } - if (t->desc.b.b.target == PIPE_TEXTURE_3D) + if (t->b.b.b.target == PIPE_TEXTURE_3D) state->unit[i].clamp_and_scale_before_fetch = TRUE; } } @@ -298,44 +300,98 @@ static void r300_emit_fs_code_to_buffer( } } else { /* r300 */ struct r300_fragment_program_code *code = &generic_code->code.r300; - - shader->cb_code_size = 19 + - (r300->screen->caps.is_r400 ? 2 : 0) + - code->alu.length * 4 + - (code->tex.length ? (1 + code->tex.length) : 0) + - imm_count * 5; + unsigned int alu_length = code->alu.length; + unsigned int alu_iterations = ((alu_length - 1) / 64) + 1; + unsigned int tex_length = code->tex.length; + unsigned int tex_iterations = + tex_length > 0 ? ((tex_length - 1) / 32) + 1 : 0; + unsigned int iterations = + alu_iterations > tex_iterations ? alu_iterations : tex_iterations; + unsigned int bank = 0; + + shader->cb_code_size = 15 + + /* R400_US_CODE_BANK */ + (r300->screen->caps.is_r400 ? 2 * (iterations + 1): 0) + + /* R400_US_CODE_EXT */ + (r300->screen->caps.is_r400 ? 2 : 0) + + /* R300_US_ALU_{RGB,ALPHA}_{INST,ADDR}_0, R400_US_ALU_EXT_ADDR_0 */ + (code->r390_mode ? (5 * alu_iterations) : 4) + + /* R400_US_ALU_EXT_ADDR_[0-63] */ + (code->r390_mode ? (code->alu.length) : 0) + + /* R300_US_ALU_{RGB,ALPHA}_{INST,ADDR}_0 */ + code->alu.length * 4 + + /* R300_US_TEX_INST_0, R300_US_TEX_INST_[0-31] */ + (code->tex.length > 0 ? code->tex.length + tex_iterations : 0) + + imm_count * 5; NEW_CB(shader->cb_code, shader->cb_code_size); - if (r300->screen->caps.is_r400) - OUT_CB_REG(R400_US_CODE_BANK, 0); - OUT_CB_REG(R300_US_CONFIG, code->config); OUT_CB_REG(R300_US_PIXSIZE, code->pixsize); OUT_CB_REG(R300_US_CODE_OFFSET, code->code_offset); + if (code->r390_mode) { + OUT_CB_REG(R400_US_CODE_EXT, code->r400_code_offset_ext); + } else if (r300->screen->caps.is_r400) { + /* This register appears to affect shaders even if r390_mode is + * disabled, so it needs to be set to 0 for shaders that + * don't use r390_mode. */ + OUT_CB_REG(R400_US_CODE_EXT, 0); + } + OUT_CB_REG_SEQ(R300_US_CODE_ADDR_0, 4); OUT_CB_TABLE(code->code_addr, 4); - OUT_CB_REG_SEQ(R300_US_ALU_RGB_INST_0, code->alu.length); - for (i = 0; i < code->alu.length; i++) - OUT_CB(code->alu.inst[i].rgb_inst); + do { + unsigned int bank_alu_length = (alu_length < 64 ? alu_length : 64); + unsigned int bank_alu_offset = bank * 64; + unsigned int bank_tex_length = (tex_length < 32 ? tex_length : 32); + unsigned int bank_tex_offset = bank * 32; + + if (r300->screen->caps.is_r400) { + OUT_CB_REG(R400_US_CODE_BANK, code->r390_mode ? + (bank << R400_BANK_SHIFT) | R400_R390_MODE_ENABLE : 0);//2 + } + + if (bank_alu_length > 0) { + OUT_CB_REG_SEQ(R300_US_ALU_RGB_INST_0, bank_alu_length); + for (i = 0; i < bank_alu_length; i++) + OUT_CB(code->alu.inst[i + bank_alu_offset].rgb_inst); + + OUT_CB_REG_SEQ(R300_US_ALU_RGB_ADDR_0, bank_alu_length); + for (i = 0; i < bank_alu_length; i++) + OUT_CB(code->alu.inst[i + bank_alu_offset].rgb_addr); + + OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_INST_0, bank_alu_length); + for (i = 0; i < bank_alu_length; i++) + OUT_CB(code->alu.inst[i + bank_alu_offset].alpha_inst); - OUT_CB_REG_SEQ(R300_US_ALU_RGB_ADDR_0, code->alu.length); - for (i = 0; i < code->alu.length; i++) - OUT_CB(code->alu.inst[i].rgb_addr); + OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_ADDR_0, bank_alu_length); + for (i = 0; i < bank_alu_length; i++) + OUT_CB(code->alu.inst[i + bank_alu_offset].alpha_addr); - OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_INST_0, code->alu.length); - for (i = 0; i < code->alu.length; i++) - OUT_CB(code->alu.inst[i].alpha_inst); + if (code->r390_mode) { + OUT_CB_REG_SEQ(R400_US_ALU_EXT_ADDR_0, bank_alu_length); + for (i = 0; i < bank_alu_length; i++) + OUT_CB(code->alu.inst[i + bank_alu_offset].r400_ext_addr); + } + } + + if (bank_tex_length > 0) { + OUT_CB_REG_SEQ(R300_US_TEX_INST_0, bank_tex_length); + OUT_CB_TABLE(code->tex.inst + bank_tex_offset, bank_tex_length); + } - OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_ADDR_0, code->alu.length); - for (i = 0; i < code->alu.length; i++) - OUT_CB(code->alu.inst[i].alpha_addr); + alu_length -= bank_alu_length; + tex_length -= bank_tex_length; + bank++; + } while(code->r390_mode && (alu_length > 0 || tex_length > 0)); - if (code->tex.length) { - OUT_CB_REG_SEQ(R300_US_TEX_INST_0, code->tex.length); - OUT_CB_TABLE(code->tex.inst, code->tex.length); + /* R400_US_CODE_BANK needs to be reset to 0, otherwise some shaders + * will be rendered incorrectly. */ + if (r300->screen->caps.is_r400) { + OUT_CB_REG(R400_US_CODE_BANK, + code->r390_mode ? R400_R390_MODE_ENABLE : 0); } /* Emit immediates. */ @@ -384,17 +440,29 @@ static void r300_translate_fragment_shader( compiler.code = &shader->code; compiler.state = shader->compare_state; compiler.Base.is_r500 = r300->screen->caps.is_r500; + compiler.Base.is_r400 = r300->screen->caps.is_r400; compiler.Base.disable_optimizations = DBG_ON(r300, DBG_NO_OPT); compiler.Base.has_half_swizzles = TRUE; compiler.Base.has_presub = TRUE; - compiler.Base.max_temp_regs = compiler.Base.is_r500 ? 128 : 32; + compiler.Base.max_temp_regs = + compiler.Base.is_r500 ? 128 : (compiler.Base.is_r400 ? 64 : 32); compiler.Base.max_constants = compiler.Base.is_r500 ? 256 : 32; - compiler.Base.max_alu_insts = compiler.Base.is_r500 ? 512 : 64; + compiler.Base.max_alu_insts = + (compiler.Base.is_r500 || compiler.Base.is_r400) ? 512 : 64; + compiler.Base.max_tex_insts = + (compiler.Base.is_r500 || compiler.Base.is_r400) ? 512 : 32; compiler.AllocateHwInputs = &allocate_hardware_inputs; compiler.UserData = &shader->inputs; find_output_registers(&compiler, shader); + shader->write_all = FALSE; + for (i = 0; i < shader->info.num_properties; i++) { + if (shader->info.properties[i].name == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) { + shader->write_all = TRUE; + } + } + if (compiler.Base.Debug & RC_DBG_LOG) { DBG(r300, DBG_FP, "r300: Initial fragment program\n"); tgsi_dump(tokens, 0); @@ -407,6 +475,13 @@ static void r300_translate_fragment_shader( r300_tgsi_to_rc(&ttr, tokens); + if (ttr.error) { + fprintf(stderr, "r300 FP: Cannot translate a shader. " + "Using a dummy shader instead.\n"); + r300_dummy_fragment_shader(r300, shader); + return; + } + if (!r300->screen->caps.is_r500 || compiler.Base.Program.Constants.Count > 200) { compiler.Base.remove_unused_constants = TRUE; diff --git a/src/gallium/drivers/r300/r300_fs.h b/src/gallium/drivers/r300/r300_fs.h index 51bfa88c5e..c86a90b85a 100644 --- a/src/gallium/drivers/r300/r300_fs.h +++ b/src/gallium/drivers/r300/r300_fs.h @@ -54,6 +54,9 @@ struct r300_fragment_shader_code { uint32_t *cb_code; struct r300_fragment_shader_code* next; + + boolean write_all; + }; struct r300_fragment_shader { @@ -81,4 +84,10 @@ static INLINE boolean r300_fragment_shader_writes_depth(struct r300_fragment_sha return (fs->shader->code.writes_depth) ? TRUE : FALSE; } +static INLINE boolean r300_fragment_shader_writes_all(struct r300_fragment_shader *fs) +{ + if (!fs) + return FALSE; + return (fs->shader->write_all) ? TRUE : FALSE; +} #endif /* R300_FS_H */ diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c index c22e307c67..ecaadf4af8 100644 --- a/src/gallium/drivers/r300/r300_hyperz.c +++ b/src/gallium/drivers/r300/r300_hyperz.c @@ -22,7 +22,6 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "r300_context.h" -#include "r300_hyperz.h" #include "r300_reg.h" #include "r300_fs.h" #include "r300_winsys.h" @@ -41,58 +40,74 @@ /* The HyperZ setup */ /*****************************************************************************/ -static bool r300_get_sc_hz_max(struct r300_context *r300) +static enum r300_hiz_func r300_get_hiz_func(struct r300_context *r300) { - struct r300_dsa_state *dsa_state = r300->dsa_state.state; - int func = dsa_state->z_stencil_control & R300_ZS_MASK; - int ret = R300_SC_HYPERZ_MIN; + struct r300_dsa_state *dsa = r300->dsa_state.state; - if (func >= R300_ZS_GEQUAL && func <= R300_ZS_ALWAYS) - ret = R300_SC_HYPERZ_MAX; - return ret; + if (!dsa->dsa.depth.enabled || !dsa->dsa.depth.writemask) + return HIZ_FUNC_NONE; + + switch (dsa->dsa.depth.func) { + case PIPE_FUNC_NEVER: + case PIPE_FUNC_EQUAL: + case PIPE_FUNC_NOTEQUAL: + case PIPE_FUNC_ALWAYS: + return HIZ_FUNC_NONE; + + case PIPE_FUNC_LESS: + case PIPE_FUNC_LEQUAL: + return HIZ_FUNC_MAX; + + case PIPE_FUNC_GREATER: + case PIPE_FUNC_GEQUAL: + return HIZ_FUNC_MIN; + + default: + assert(0); + return HIZ_FUNC_NONE; + } } -static bool r300_zfunc_same_direction(int func1, int func2) +/* Return what's used for the depth test (either minimum or maximum). */ +static unsigned r300_get_sc_hz_max(struct r300_context *r300) { - /* func1 is less/lessthan */ - if ((func1 == R300_ZS_LESS || func1 == R300_ZS_LEQUAL) && - (func2 == R300_ZS_EQUAL || func2 == R300_ZS_GEQUAL || - func2 == R300_ZS_GREATER)) - return FALSE; - - /* func1 is greater/greaterthan */ - if ((func1 == R300_ZS_GEQUAL || func1 == R300_ZS_GREATER) && - (func2 == R300_ZS_LESS || func2 == R300_ZS_LEQUAL)) - return FALSE; + struct r300_dsa_state *dsa = r300->dsa_state.state; + unsigned func = dsa->dsa.depth.func; - return TRUE; + return func >= PIPE_FUNC_GREATER ? R300_SC_HYPERZ_MAX : R300_SC_HYPERZ_MIN; } -static int r300_get_hiz_min(struct r300_context *r300) +static boolean r300_is_hiz_func_valid(struct r300_context *r300) { - struct r300_dsa_state *dsa_state = r300->dsa_state.state; - int func = dsa_state->z_stencil_control & R300_ZS_MASK; - int ret = R300_HIZ_MIN; + struct r300_dsa_state *dsa = r300->dsa_state.state; + unsigned func = dsa->dsa.depth.func; - if (func == R300_ZS_LESS || func == R300_ZS_LEQUAL) - ret = R300_HIZ_MAX; - return ret; + if (r300->hiz_func == HIZ_FUNC_NONE) + return TRUE; + + /* func1 is less/lessthan */ + if (r300->hiz_func == HIZ_FUNC_MAX && + (func == PIPE_FUNC_GEQUAL || func == PIPE_FUNC_GREATER)) + return FALSE; + + /* func1 is greater/greaterthan */ + if (r300->hiz_func == HIZ_FUNC_MIN && + (func == PIPE_FUNC_LESS || func == PIPE_FUNC_LEQUAL)) + return FALSE; + + return TRUE; } static boolean r300_dsa_stencil_op_not_keep(struct pipe_stencil_state *s) { - if (s->enabled && (s->fail_op != PIPE_STENCIL_OP_KEEP || - s->zfail_op != PIPE_STENCIL_OP_KEEP)) - return TRUE; - return FALSE; + return s->enabled && (s->fail_op != PIPE_STENCIL_OP_KEEP || + s->zfail_op != PIPE_STENCIL_OP_KEEP); } static boolean r300_can_hiz(struct r300_context *r300) { - struct r300_dsa_state *dsa_state = r300->dsa_state.state; - struct pipe_depth_stencil_alpha_state *dsa = &dsa_state->dsa; - struct r300_screen* r300screen = r300->screen; - struct r300_hyperz_state *z = r300->hyperz_state.state; + struct r300_dsa_state *dsa = r300->dsa_state.state; + struct r300_screen *r300screen = r300->screen; /* shader writes depth - no HiZ */ if (r300_fragment_shader_writes_depth(r300_fs(r300))) /* (5) */ @@ -100,34 +115,21 @@ static boolean r300_can_hiz(struct r300_context *r300) if (r300->query_current) return FALSE; + /* if stencil fail/zfail op is not KEEP */ - if (r300_dsa_stencil_op_not_keep(&dsa->stencil[0]) || - r300_dsa_stencil_op_not_keep(&dsa->stencil[1])) + if (r300_dsa_stencil_op_not_keep(&dsa->dsa.stencil[0]) || + r300_dsa_stencil_op_not_keep(&dsa->dsa.stencil[1])) return FALSE; - if (dsa->depth.enabled) { + if (dsa->dsa.depth.enabled) { /* if depth func is EQUAL pre-r500 */ - if (dsa->depth.func == PIPE_FUNC_EQUAL && !r300screen->caps.is_r500) + if (dsa->dsa.depth.func == PIPE_FUNC_EQUAL && !r300screen->caps.is_r500) return FALSE; + /* if depth func is NOTEQUAL */ - if (dsa->depth.func == PIPE_FUNC_NOTEQUAL) + if (dsa->dsa.depth.func == PIPE_FUNC_NOTEQUAL) return FALSE; } - /* depth comparison function - if just cleared save and return okay */ - if (z->current_func == -1) { - int func = dsa_state->z_stencil_control & R300_ZS_MASK; - if (func != 0 && func != 7) - z->current_func = dsa_state->z_stencil_control & R300_ZS_MASK; - } else { - /* simple don't change */ - if (!r300_zfunc_same_direction(z->current_func, - (dsa_state->z_stencil_control & R300_ZS_MASK))) { - DBG(r300, DBG_HYPERZ, - "z func changed direction - disabling hyper-z %d -> %d\n", - z->current_func, dsa_state->z_stencil_control); - return FALSE; - } - } return TRUE; } @@ -137,10 +139,8 @@ static void r300_update_hyperz(struct r300_context* r300) (struct r300_hyperz_state*)r300->hyperz_state.state; struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; - struct r300_texture *zstex = - fb->zsbuf ? r300_texture(fb->zsbuf->texture) : NULL; - boolean zmask_in_use = FALSE; - boolean hiz_in_use = FALSE; + struct r300_resource *zstex = + fb->zsbuf ? r300_resource(fb->zsbuf->texture) : NULL; z->gb_z_peq_config = 0; z->zb_bw_cntl = 0; @@ -152,48 +152,54 @@ static void r300_update_hyperz(struct r300_context* r300) return; } - if (!zstex) - return; - - if (!r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) + if (!zstex || + !r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) return; - zmask_in_use = zstex->zmask_in_use[fb->zsbuf->u.tex.level]; - hiz_in_use = zstex->hiz_in_use[fb->zsbuf->u.tex.level]; - - /* Z fastfill. */ - if (zmask_in_use) { - z->zb_bw_cntl |= R300_FAST_FILL_ENABLE; /* | R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE;*/ - } - /* Zbuffer compression. */ - if (zmask_in_use && r300->z_compression) { - z->zb_bw_cntl |= R300_RD_COMP_ENABLE; - if (r300->z_decomp_rd == false) + if (r300->zmask_in_use && !r300->hyperz_locked) { + z->zb_bw_cntl |= R300_FAST_FILL_ENABLE | + /*R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE |*/ + R300_RD_COMP_ENABLE; + + if (!r300->zmask_decompress) { z->zb_bw_cntl |= R300_WR_COMP_ENABLE; + } } - /* RV350 and up optimizations. */ - /* The section 10.4.9 in the docs is a lie. */ - if (r300->z_compression == RV350_Z_COMPRESS_88) + + if (zstex->tex.zcomp8x8[fb->zsbuf->u.tex.level]) { z->gb_z_peq_config |= R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8; + } + + /* HiZ. */ + if (r300->hiz_in_use && !r300->hyperz_locked) { + /* Set the HiZ function if needed. */ + if (r300->hiz_func == HIZ_FUNC_NONE) { + r300->hiz_func = r300_get_hiz_func(r300); + } - if (hiz_in_use) { - bool can_hiz = r300_can_hiz(r300); - if (can_hiz) { - z->zb_bw_cntl |= R300_HIZ_ENABLE; - z->sc_hyperz |= R300_SC_HYPERZ_ENABLE; - z->sc_hyperz |= r300_get_sc_hz_max(r300); - z->zb_bw_cntl |= r300_get_hiz_min(r300); + /* If the depth function is inverted, HiZ must be disabled. */ + if (!r300_is_hiz_func_valid(r300)) { + r300->hiz_in_use = FALSE; + } else if (r300_can_hiz(r300)) { + /* Setup the HiZ bits. */ + z->zb_bw_cntl |= + R300_HIZ_ENABLE | + (r300->hiz_func == HIZ_FUNC_MIN ? R300_HIZ_MIN : R300_HIZ_MAX); + + z->sc_hyperz |= R300_SC_HYPERZ_ENABLE | + r300_get_sc_hz_max(r300); + + if (r300->screen->caps.is_r500) { + z->zb_bw_cntl |= R500_HIZ_EQUAL_REJECT_ENABLE; + } } } /* R500-specific features and optimizations. */ if (r300->screen->caps.is_r500) { - z->zb_bw_cntl |= R500_HIZ_FP_EXP_BITS_3; - z->zb_bw_cntl |= - R500_HIZ_EQUAL_REJECT_ENABLE | - R500_PEQ_PACKING_ENABLE | - R500_COVERED_PTR_MASKING_ENABLE; + z->zb_bw_cntl |= R500_PEQ_PACKING_ENABLE | + R500_COVERED_PTR_MASKING_ENABLE; } } @@ -285,135 +291,11 @@ static void r300_update_ztop(struct r300_context* r300) r300_mark_atom_dirty(r300, &r300->ztop_state); } -#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) - -static void r300_update_hiz_clear(struct r300_context *r300) -{ - struct pipe_framebuffer_state *fb = - (struct pipe_framebuffer_state*)r300->fb_state.state; - uint32_t height; - - height = ALIGN_DIVUP(fb->zsbuf->height, 4); - r300->hiz_clear.size = height * 4; -} - -static void r300_update_zmask_clear(struct r300_context *r300) -{ - struct pipe_framebuffer_state *fb = - (struct pipe_framebuffer_state*)r300->fb_state.state; - uint32_t height; - int mult; - - if (r300->z_compression == RV350_Z_COMPRESS_88) - mult = 8; - else - mult = 4; - - height = ALIGN_DIVUP(fb->zsbuf->height, mult); - - r300->zmask_clear.size = height * 4; -} - void r300_update_hyperz_state(struct r300_context* r300) { r300_update_ztop(r300); + if (r300->hyperz_state.dirty) { r300_update_hyperz(r300); } - - if (r300->hiz_clear.dirty) { - r300_update_hiz_clear(r300); - } - if (r300->zmask_clear.dirty) { - r300_update_zmask_clear(r300); - } -} - -void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf) -{ - struct r300_texture *tex; - uint32_t zsize, ndw; - int level = surf->base.u.tex.level; - - tex = r300_texture(surf->base.texture); - - if (tex->hiz_mem[level]) - return; - - zsize = tex->desc.layer_size_in_bytes[level]; - zsize /= util_format_get_blocksize(tex->desc.b.b.format); - ndw = ALIGN_DIVUP(zsize, 64); - - tex->hiz_mem[level] = u_mmAllocMem(r300->hiz_mm, ndw, 0, 0); - return; -} - -void r300_zmask_alloc_block(struct r300_context *r300, struct r300_surface *surf, int compress) -{ - int bsize = 256; - uint32_t zsize, ndw; - int level = surf->base.u.tex.level; - struct r300_texture *tex; - - tex = r300_texture(surf->base.texture); - - /* We currently don't handle decompression for 3D textures and cubemaps - * correctly. */ - if (tex->desc.b.b.target != PIPE_TEXTURE_1D && - tex->desc.b.b.target != PIPE_TEXTURE_2D && - tex->desc.b.b.target != PIPE_TEXTURE_RECT) - return; - - /* Cannot flush zmask of 16-bit zbuffers. */ - if (util_format_get_blocksizebits(tex->desc.b.b.format) == 16) - return; - - if (tex->zmask_mem[level]) - return; - - zsize = tex->desc.layer_size_in_bytes[level]; - zsize /= util_format_get_blocksize(tex->desc.b.b.format); - - /* each zmask dword represents 16 4x4 blocks - which is 256 pixels - or 16 8x8 depending on the gb peq flag = 1024 pixels */ - if (compress == RV350_Z_COMPRESS_88) - bsize = 1024; - - ndw = ALIGN_DIVUP(zsize, bsize); - tex->zmask_mem[level] = u_mmAllocMem(r300->zmask_mm, ndw, 0, 0); - return; -} - -boolean r300_hyperz_init_mm(struct r300_context *r300) -{ - struct r300_screen* r300screen = r300->screen; - int frag_pipes = r300screen->caps.num_frag_pipes; - - r300->zmask_mm = u_mmInit(0, r300screen->caps.zmask_ram * frag_pipes); - if (!r300->zmask_mm) - return FALSE; - - if (r300screen->caps.hiz_ram) { - r300->hiz_mm = u_mmInit(0, r300screen->caps.hiz_ram * frag_pipes); - if (!r300->hiz_mm) { - u_mmDestroy(r300->zmask_mm); - r300->zmask_mm = NULL; - return FALSE; - } - } - - return TRUE; -} - -void r300_hyperz_destroy_mm(struct r300_context *r300) -{ - struct r300_screen* r300screen = r300->screen; - - if (r300screen->caps.hiz_ram) { - u_mmDestroy(r300->hiz_mm); - r300->hiz_mm = NULL; - } - - u_mmDestroy(r300->zmask_mm); - r300->zmask_mm = NULL; } diff --git a/src/gallium/drivers/r300/r300_hyperz.h b/src/gallium/drivers/r300/r300_hyperz.h deleted file mode 100644 index 30a23ec649..0000000000 --- a/src/gallium/drivers/r300/r300_hyperz.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright 2010 Marek Olšák <maraeo@gmail.com> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#ifndef R300_HYPERZ_H -#define R300_HYPERZ_H - -struct r300_context; - -void r300_update_hyperz_state(struct r300_context* r300); - -void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf); -void r300_zmask_alloc_block(struct r300_context *r300, struct r300_surface *surf, int compress); - -boolean r300_hyperz_init_mm(struct r300_context *r300); -void r300_hyperz_destroy_mm(struct r300_context *r300); -#endif diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c index 6223e04321..717485f43c 100644 --- a/src/gallium/drivers/r300/r300_query.c +++ b/src/gallium/drivers/r300/r300_query.c @@ -57,10 +57,10 @@ static struct pipe_query *r300_create_query(struct pipe_context *pipe, insert_at_tail(&r300->query_list, q); /* Open up the occlusion query buffer. */ - q->buffer = r300->rws->buffer_create(r300->rws, q->buffer_size, 4096, + q->buf = r300->rws->buffer_create(r300->rws, q->buffer_size, 4096, PIPE_BIND_CUSTOM, PIPE_USAGE_STREAM, q->domain); - q->cs_buffer = r300->rws->buffer_get_cs_handle(r300->rws, q->buffer); + q->cs_buf = r300->rws->buffer_get_cs_handle(q->buf); return (struct pipe_query*)q; } @@ -68,10 +68,9 @@ static struct pipe_query *r300_create_query(struct pipe_context *pipe, static void r300_destroy_query(struct pipe_context* pipe, struct pipe_query* query) { - struct r300_context *r300 = r300_context(pipe); struct r300_query* q = r300_query(query); - r300->rws->buffer_reference(r300->rws, &q->buffer, NULL); + r300_winsys_bo_reference(&q->buf, NULL); remove_from_list(q); FREE(query); } @@ -128,16 +127,12 @@ static boolean r300_get_query_result(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); struct r300_query *q = r300_query(query); - unsigned flags, i; + unsigned i; uint32_t temp, *map; - uint64_t *result = (uint64_t*)vresult; - - if (!q->flushed) - pipe->flush(pipe, 0, NULL); - - flags = PIPE_TRANSFER_READ | (!wait ? PIPE_TRANSFER_DONTBLOCK : 0); - map = r300->rws->buffer_map(r300->rws, q->buffer, r300->cs, flags); + map = r300->rws->buffer_map(q->buf, r300->cs, + PIPE_TRANSFER_READ | + (!wait ? PIPE_TRANSFER_DONTBLOCK : 0)); if (!map) return FALSE; @@ -148,9 +143,9 @@ static boolean r300_get_query_result(struct pipe_context* pipe, map++; } - r300->rws->buffer_unmap(r300->rws, q->buffer); + r300->rws->buffer_unmap(q->buf); - *result = temp; + *((uint64_t*)vresult) = temp; return TRUE; } diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index 613186e815..bb30b1ab0b 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -467,6 +467,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * * See bug #9871. http://bugs.freedesktop.org/attachment.cgi?id=10672&action=view */ +#define R500_VAP_TEX_TO_COLOR_CNTL 0x2218 + #define R300_VAP_CLIP_CNTL 0x221C # define R300_VAP_UCP_ENABLE_0 (1 << 0) # define R300_VAP_UCP_ENABLE_1 (1 << 1) @@ -857,6 +859,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R500_TX_DIRECTION_HORIZONTAL (0<<27) # define R500_TX_DIRECTION_VERITCAL (1<<27) +#define R500_SU_TEX_WRAP_PS3 0x4114 + /* S Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) */ #define R300_GA_POINT_S0 0x4200 @@ -2162,14 +2166,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* R4xx extended fragment shader registers. */ #define R400_US_ALU_EXT_ADDR_0 0x4ac0 /* up to 63 (0x4bbc) */ -# define R400_ADDR0_EXT_RGB_MSB_BIT 0x01 -# define R400_ADDR1_EXT_RGB_MSB_BIT 0x02 -# define R400_ADDR2_EXT_RGB_MSB_BIT 0x04 +# define R400_ADDR_EXT_RGB_MSB_BIT(x) (1 << (x)) # define R400_ADDRD_EXT_RGB_MSB_BIT 0x08 -# define R400_ADDR0_EXT_A_MSB_BIT 0x10 -# define R400_ADDR1_EXT_A_MSB_BIT 0x20 -# define R400_ADDR2_EXT_A_MSB_BIT 0x40 +# define R400_ADDR_EXT_A_MSB_BIT(x) (1 << ((x) + 4)) # define R400_ADDRD_EXT_A_MSB_BIT 0x80 + #define R400_US_CODE_BANK 0x46b8 # define R400_BANK_SHIFT 0 # define R400_BANK_MASK 0xf @@ -2631,8 +2632,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R300_ZB_BW_CNTL 0x4f1c # define R300_HIZ_DISABLE (0 << 0) # define R300_HIZ_ENABLE (1 << 0) -# define R300_HIZ_MIN (0 << 1) -# define R300_HIZ_MAX (1 << 1) +# define R300_HIZ_MAX (0 << 1) +# define R300_HIZ_MIN (1 << 1) # define R300_FAST_FILL_DISABLE (0 << 2) # define R300_FAST_FILL_ENABLE (1 << 2) # define R300_RD_COMP_DISABLE (0 << 3) diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index b4197e0352..26594dabe4 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -39,7 +39,6 @@ #include "r300_screen_buffer.h" #include "r300_emit.h" #include "r300_reg.h" -#include "r300_state_derived.h" #include <limits.h> @@ -128,16 +127,30 @@ void r500_emit_index_bias(struct r300_context *r300, int index_bias) END_CS; } +static void r300_emit_draw_init(struct r300_context *r300, unsigned mode, + unsigned min_index, unsigned max_index) +{ + CS_LOCALS(r300); + + BEGIN_CS(5); + OUT_CS_REG(R300_GA_COLOR_CONTROL, + r300_provoking_vertex_fixes(r300, mode)); + OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2); + OUT_CS(max_index); + OUT_CS(min_index); + END_CS; +} + /* This function splits the index bias value into two parts: * - buffer_offset: the value that can be safely added to buffer offsets - * in r300_emit_aos (it must yield a positive offset when added to + * in r300_emit_vertex_arrays (it must yield a positive offset when added to * a vertex buffer offset) * - index_offset: the value that must be manually subtracted from indices * in an index buffer to achieve negative offsets. */ static void r300_split_index_bias(struct r300_context *r300, int index_bias, int *buffer_offset, int *index_offset) { - struct pipe_vertex_buffer *vb, *vbufs = r300->vertex_buffer; + struct pipe_vertex_buffer *vb, *vbufs = r300->vbuf_mgr->vertex_buffer; struct pipe_vertex_element *velem = r300->velems->velem; unsigned i, size; int max_neg_bias; @@ -164,10 +177,10 @@ static void r300_split_index_bias(struct r300_context *r300, int index_bias, } enum r300_prepare_flags { - PREP_FIRST_DRAW = (1 << 0), /* call emit_dirty_state and friends? */ + PREP_EMIT_STATES = (1 << 0), /* call emit_dirty_state and friends? */ PREP_VALIDATE_VBOS = (1 << 1), /* validate VBOs? */ - PREP_EMIT_AOS = (1 << 2), /* call emit_aos? */ - PREP_EMIT_AOS_SWTCL = (1 << 3), /* call emit_aos_swtcl? */ + PREP_EMIT_AOS = (1 << 2), /* call emit_vertex_arrays? */ + PREP_EMIT_AOS_SWTCL = (1 << 3), /* call emit_vertex_arrays_swtcl? */ PREP_INDEXED = (1 << 4) /* is this draw_elements? */ }; @@ -180,33 +193,33 @@ enum r300_prepare_flags { * \return TRUE if the CS was flushed */ static boolean r300_reserve_cs_dwords(struct r300_context *r300, - enum r300_prepare_flags flags, - unsigned cs_dwords) + enum r300_prepare_flags flags, + unsigned cs_dwords) { boolean flushed = FALSE; - boolean first_draw = flags & PREP_FIRST_DRAW; - boolean emit_aos = flags & PREP_EMIT_AOS; - boolean emit_aos_swtcl = flags & PREP_EMIT_AOS_SWTCL; + boolean first_draw = flags & PREP_EMIT_STATES; + boolean emit_vertex_arrays = flags & PREP_EMIT_AOS; + boolean emit_vertex_arrays_swtcl = flags & PREP_EMIT_AOS_SWTCL; /* Add dirty state, index offset, and AOS. */ if (first_draw) { cs_dwords += r300_get_num_dirty_dwords(r300); - if (r300->screen->caps.index_bias_supported) + if (r300->screen->caps.is_r500) cs_dwords += 2; /* emit_index_offset */ - if (emit_aos) - cs_dwords += 55; /* emit_aos */ + if (emit_vertex_arrays) + cs_dwords += 55; /* emit_vertex_arrays */ - if (emit_aos_swtcl) - cs_dwords += 7; /* emit_aos_swtcl */ + if (emit_vertex_arrays_swtcl) + cs_dwords += 7; /* emit_vertex_arrays_swtcl */ } cs_dwords += r300_get_num_cs_end_dwords(r300); /* Reserve requested CS space. */ if (cs_dwords > (R300_MAX_CMDBUF_DWORDS - r300->cs->cdw)) { - r300->context.flush(&r300->context, 0, NULL); + r300_flush(&r300->context, R300_FLUSH_ASYNC, NULL); flushed = TRUE; } @@ -218,57 +231,55 @@ static boolean r300_reserve_cs_dwords(struct r300_context *r300, * \param r300 The context. * \param flags See r300_prepare_flags. * \param index_buffer The index buffer to validate. The parameter may be NULL. - * \param aos_offset The offset passed to emit_aos. + * \param buffer_offset The offset passed to emit_vertex_arrays. * \param index_bias The index bias to emit. + * \param instance_id Index of instance to render * \return TRUE if rendering should be skipped */ static boolean r300_emit_states(struct r300_context *r300, enum r300_prepare_flags flags, struct pipe_resource *index_buffer, - int aos_offset, - int index_bias) + int buffer_offset, + int index_bias, int instance_id) { - boolean first_draw = flags & PREP_FIRST_DRAW; - boolean emit_aos = flags & PREP_EMIT_AOS; - boolean emit_aos_swtcl = flags & PREP_EMIT_AOS_SWTCL; + boolean first_draw = flags & PREP_EMIT_STATES; + boolean emit_vertex_arrays = flags & PREP_EMIT_AOS; + boolean emit_vertex_arrays_swtcl = flags & PREP_EMIT_AOS_SWTCL; boolean indexed = flags & PREP_INDEXED; boolean validate_vbos = flags & PREP_VALIDATE_VBOS; /* Validate buffers and emit dirty state if needed. */ if (first_draw) { - /* upload buffers first */ - if (r300->screen->caps.has_tcl && r300->any_user_vbs) { - r300_upload_user_buffers(r300); - r300->any_user_vbs = false; - } - - if (r300->validate_buffers) { - if (!r300_emit_buffer_validate(r300, validate_vbos, - index_buffer)) { - fprintf(stderr, "r300: CS space validation failed. " - "(not enough memory?) Skipping rendering.\n"); - return FALSE; - } - - /* Consider the validation done only if everything was validated. */ - if (validate_vbos) { - r300->validate_buffers = FALSE; - } + if (!r300_emit_buffer_validate(r300, validate_vbos, + index_buffer)) { + fprintf(stderr, "r300: CS space validation failed. " + "(not enough memory?) Skipping rendering.\n"); + return FALSE; } r300_emit_dirty_state(r300); - if (r300->screen->caps.index_bias_supported) { + if (r300->screen->caps.is_r500) { if (r300->screen->caps.has_tcl) r500_emit_index_bias(r300, index_bias); else r500_emit_index_bias(r300, 0); } - if (emit_aos) - r300_emit_aos(r300, aos_offset, indexed); + if (emit_vertex_arrays && + (r300->vertex_arrays_dirty || + r300->vertex_arrays_indexed != indexed || + r300->vertex_arrays_offset != buffer_offset || + r300->vertex_arrays_instance_id != instance_id)) { + r300_emit_vertex_arrays(r300, buffer_offset, indexed, instance_id); + + r300->vertex_arrays_dirty = FALSE; + r300->vertex_arrays_indexed = indexed; + r300->vertex_arrays_offset = buffer_offset; + r300->vertex_arrays_instance_id = instance_id; + } - if (emit_aos_swtcl) - r300_emit_aos_swtcl(r300, indexed); + if (emit_vertex_arrays_swtcl) + r300_emit_vertex_arrays_swtcl(r300, indexed); } return TRUE; @@ -281,28 +292,32 @@ static boolean r300_emit_states(struct r300_context *r300, * \param flags See r300_prepare_flags. * \param index_buffer The index buffer to validate. The parameter may be NULL. * \param cs_dwords The number of dwords to reserve in CS. - * \param aos_offset The offset passed to emit_aos. + * \param buffer_offset The offset passed to emit_vertex_arrays. * \param index_bias The index bias to emit. + * \param instance_id The instance to render. * \return TRUE if rendering should be skipped */ static boolean r300_prepare_for_rendering(struct r300_context *r300, enum r300_prepare_flags flags, struct pipe_resource *index_buffer, unsigned cs_dwords, - int aos_offset, - int index_bias) + int buffer_offset, + int index_bias, + int instance_id) { + /* Make sure there is enough space in the command stream and emit states. */ if (r300_reserve_cs_dwords(r300, flags, cs_dwords)) - flags |= PREP_FIRST_DRAW; + flags |= PREP_EMIT_STATES; - return r300_emit_states(r300, flags, index_buffer, aos_offset, index_bias); + return r300_emit_states(r300, flags, index_buffer, buffer_offset, + index_bias, instance_id); } static boolean immd_is_good_idea(struct r300_context *r300, unsigned count) { struct pipe_vertex_element* velem; - struct pipe_vertex_buffer* vbuf; + struct pipe_resource *buf; boolean checked[PIPE_MAX_ATTRIBS] = {0}; unsigned vertex_element_count = r300->velems->count; unsigned i, vbi; @@ -326,18 +341,12 @@ static boolean immd_is_good_idea(struct r300_context *r300, vbi = velem->vertex_buffer_index; if (!checked[vbi]) { - vbuf = &r300->vertex_buffer[vbi]; + buf = r300->vbuf_mgr->real_vertex_buffer[vbi]; - if (!(r300_buffer(vbuf->buffer)->domain & R300_DOMAIN_GTT)) { + if ((r300_resource(buf)->domain != R300_DOMAIN_GTT)) { return FALSE; } - if (r300_buffer_is_referenced(&r300->context, - vbuf->buffer, - R300_REF_CS | R300_REF_HW)) { - /* It's a very bad idea to map it... */ - return FALSE; - } checked[vbi] = TRUE; } } @@ -348,10 +357,8 @@ static boolean immd_is_good_idea(struct r300_context *r300, * The HWTCL draw functions. * ****************************************************************************/ -static void r300_emit_draw_arrays_immediate(struct r300_context *r300, - unsigned mode, - unsigned start, - unsigned count) +static void r300_draw_arrays_immediate(struct r300_context *r300, + const struct pipe_draw_info *info) { struct pipe_vertex_element* velem; struct pipe_vertex_buffer* vbuf; @@ -362,7 +369,7 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300, unsigned vertex_size = r300->velems->vertex_size_dwords; /* The number of dwords for this draw operation. */ - unsigned dwords = 9 + count * vertex_size; + unsigned dwords = 4 + info->count * vertex_size; /* Size of the vertex element, in dwords. */ unsigned size[PIPE_MAX_ATTRIBS]; @@ -372,47 +379,42 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300, unsigned stride[PIPE_MAX_ATTRIBS]; /* Mapped vertex buffers. */ - uint32_t* map[PIPE_MAX_ATTRIBS]; + uint32_t* map[PIPE_MAX_ATTRIBS] = {0}; uint32_t* mapelem[PIPE_MAX_ATTRIBS]; - struct pipe_transfer* transfer[PIPE_MAX_ATTRIBS] = {0}; CS_LOCALS(r300); - if (!r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0)) + if (!r300_prepare_for_rendering(r300, PREP_EMIT_STATES, NULL, dwords, 0, 0, -1)) return; /* Calculate the vertex size, offsets, strides etc. and map the buffers. */ for (i = 0; i < vertex_element_count; i++) { velem = &r300->velems->velem[i]; - size[i] = r300->velems->hw_format_size[i] / 4; + size[i] = r300->velems->format_size[i] / 4; vbi = velem->vertex_buffer_index; - vbuf = &r300->vertex_buffer[vbi]; + vbuf = &r300->vbuf_mgr->vertex_buffer[vbi]; stride[i] = vbuf->stride / 4; /* Map the buffer. */ - if (!transfer[vbi]) { - map[vbi] = (uint32_t*)pipe_buffer_map(&r300->context, - vbuf->buffer, - PIPE_TRANSFER_READ, - &transfer[vbi]); - map[vbi] += (vbuf->buffer_offset / 4) + stride[i] * start; + if (!map[vbi]) { + map[vbi] = (uint32_t*)r300->rws->buffer_map( + r300_resource(r300->vbuf_mgr->real_vertex_buffer[vbi])->buf, + r300->cs, PIPE_TRANSFER_READ | PIPE_TRANSFER_UNSYNCHRONIZED); + map[vbi] += (vbuf->buffer_offset / 4) + stride[i] * info->start; } mapelem[i] = map[vbi] + (velem->src_offset / 4); } + r300_emit_draw_init(r300, info->mode, 0, info->count-1); + BEGIN_CS(dwords); - OUT_CS_REG(R300_GA_COLOR_CONTROL, - r300_provoking_vertex_fixes(r300, mode)); OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size); - OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2); - OUT_CS(count - 1); - OUT_CS(0); - OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size); - OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) | - r300_translate_primitive(mode)); + OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, info->count * vertex_size); + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (info->count << 16) | + r300_translate_primitive(info->mode)); /* Emit vertices. */ - for (v = 0; v < count; v++) { + for (v = 0; v < info->count; v++) { for (i = 0; i < vertex_element_count; i++) { OUT_CS_TABLE(&mapelem[i][stride[i] * v], size[i]); } @@ -423,10 +425,9 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300, for (i = 0; i < vertex_element_count; i++) { vbi = r300->velems->velem[i].vertex_buffer_index; - if (transfer[vbi]) { - vbuf = &r300->vertex_buffer[vbi]; - pipe_buffer_unmap(&r300->context, vbuf->buffer, transfer[vbi]); - transfer[vbi] = NULL; + if (map[vbi]) { + r300->rws->buffer_unmap(r300_resource(r300->vbuf_mgr->real_vertex_buffer[vbi])->buf); + map[vbi] = NULL; } } } @@ -444,15 +445,12 @@ static void r300_emit_draw_arrays(struct r300_context *r300, return; } - BEGIN_CS(7 + (alt_num_verts ? 2 : 0)); + r300_emit_draw_init(r300, mode, 0, count-1); + + BEGIN_CS(2 + (alt_num_verts ? 2 : 0)); if (alt_num_verts) { OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count); } - OUT_CS_REG(R300_GA_COLOR_CONTROL, - r300_provoking_vertex_fixes(r300, mode)); - OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2); - OUT_CS(count - 1); - OUT_CS(0); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) | r300_translate_primitive(mode) | @@ -463,37 +461,53 @@ static void r300_emit_draw_arrays(struct r300_context *r300, static void r300_emit_draw_elements(struct r300_context *r300, struct pipe_resource* indexBuffer, unsigned indexSize, - unsigned minIndex, - unsigned maxIndex, + unsigned min_index, + unsigned max_index, unsigned mode, unsigned start, - unsigned count) + unsigned count, + uint16_t *imm_indices3) { - uint32_t count_dwords; - uint32_t offset_dwords = indexSize * start / sizeof(uint32_t); + uint32_t count_dwords, offset_dwords; boolean alt_num_verts = count > 65535; CS_LOCALS(r300); - if (count >= (1 << 24)) { + if (count >= (1 << 24) || max_index >= (1 << 24)) { fprintf(stderr, "r300: Got a huge number of vertices: %i, " - "refusing to render.\n", count); + "refusing to render (max_index: %i).\n", count, max_index); return; } - maxIndex = MIN2(maxIndex, r300->vertex_buffer_max_index); - DBG(r300, DBG_DRAW, "r300: Indexbuf of %u indices, min %u max %u\n", - count, minIndex, maxIndex); + count, min_index, max_index); + + r300_emit_draw_init(r300, mode, min_index, max_index); + + /* If start is odd, render the first triangle with indices embedded + * in the command stream. This will increase start by 3 and make it + * even. We can then proceed without a fallback. */ + if (indexSize == 2 && (start & 1) && + mode == PIPE_PRIM_TRIANGLES) { + BEGIN_CS(4); + OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 2); + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (3 << 16) | + R300_VAP_VF_CNTL__PRIM_TRIANGLES); + OUT_CS(imm_indices3[1] << 16 | imm_indices3[0]); + OUT_CS(imm_indices3[2]); + END_CS; - BEGIN_CS(13 + (alt_num_verts ? 2 : 0)); + start += 3; + count -= 3; + if (!count) + return; + } + + offset_dwords = indexSize * start / sizeof(uint32_t); + + BEGIN_CS(8 + (alt_num_verts ? 2 : 0)); if (alt_num_verts) { OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count); } - OUT_CS_REG(R300_GA_COLOR_CONTROL, - r300_provoking_vertex_fixes(r300, mode)); - OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2); - OUT_CS(maxIndex); - OUT_CS(minIndex); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0); if (indexSize == 4) { count_dwords = count; @@ -508,68 +522,169 @@ static void r300_emit_draw_elements(struct r300_context *r300, (alt_num_verts ? R500_VAP_VF_CNTL__USE_ALT_NUM_VERTS : 0)); } - /* INDX_BUFFER is a truly special packet3. - * Unlike most other packet3, where the offset is after the count, - * the order is reversed, so the relocation ends up carrying the - * size of the indexbuf instead of the offset. - */ OUT_CS_PKT3(R300_PACKET3_INDX_BUFFER, 2); OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2) | (0 << R300_INDX_BUFFER_SKIP_SHIFT)); OUT_CS(offset_dwords << 2); - OUT_CS_BUF_RELOC(indexBuffer, count_dwords, - r300_buffer(indexBuffer)->domain, 0); + OUT_CS(count_dwords); + OUT_CS_RELOC(r300_resource(indexBuffer)); + END_CS; +} + +static void r300_draw_elements_immediate(struct r300_context *r300, + const struct pipe_draw_info *info) +{ + uint8_t *ptr1; + uint16_t *ptr2; + uint32_t *ptr4; + unsigned index_size = r300->index_buffer.index_size; + unsigned i, count_dwords = index_size == 4 ? info->count : + (info->count + 1) / 2; + CS_LOCALS(r300); + + /* 19 dwords for r300_draw_elements_immediate. Give up if the function fails. */ + if (!r300_prepare_for_rendering(r300, + PREP_EMIT_STATES | PREP_VALIDATE_VBOS | PREP_EMIT_AOS | + PREP_INDEXED, NULL, 2+count_dwords, 0, info->index_bias, -1)) + return; + + r300_emit_draw_init(r300, info->mode, info->min_index, info->max_index); + + BEGIN_CS(2 + count_dwords); + OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, count_dwords); + switch (index_size) { + case 1: + ptr1 = r300_resource(r300->index_buffer.buffer)->b.user_ptr; + ptr1 += info->start; + + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (info->count << 16) | + r300_translate_primitive(info->mode)); + + if (info->index_bias && !r300->screen->caps.is_r500) { + for (i = 0; i < info->count-1; i += 2) + OUT_CS(((ptr1[i+1] + info->index_bias) << 16) | + (ptr1[i] + info->index_bias)); + + if (info->count & 1) + OUT_CS(ptr1[i] + info->index_bias); + } else { + for (i = 0; i < info->count-1; i += 2) + OUT_CS(((ptr1[i+1]) << 16) | + (ptr1[i] )); + + if (info->count & 1) + OUT_CS(ptr1[i]); + } + break; + + case 2: + ptr2 = (uint16_t*)r300_resource(r300->index_buffer.buffer)->b.user_ptr; + ptr2 += info->start; + + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (info->count << 16) | + r300_translate_primitive(info->mode)); + + if (info->index_bias && !r300->screen->caps.is_r500) { + for (i = 0; i < info->count-1; i += 2) + OUT_CS(((ptr2[i+1] + info->index_bias) << 16) | + (ptr2[i] + info->index_bias)); + + if (info->count & 1) + OUT_CS(ptr2[i] + info->index_bias); + } else { + OUT_CS_TABLE(ptr2, count_dwords); + } + break; + + case 4: + ptr4 = (uint32_t*)r300_resource(r300->index_buffer.buffer)->b.user_ptr; + ptr4 += info->start; + + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (info->count << 16) | + R300_VAP_VF_CNTL__INDEX_SIZE_32bit | + r300_translate_primitive(info->mode)); + + if (info->index_bias && !r300->screen->caps.is_r500) { + for (i = 0; i < info->count; i++) + OUT_CS(ptr4[i] + info->index_bias); + } else { + OUT_CS_TABLE(ptr4, count_dwords); + } + break; + } END_CS; } -/* This is the fast-path drawing & emission for HW TCL. */ -static void r300_draw_range_elements(struct pipe_context* pipe, - struct pipe_resource* indexBuffer, - unsigned indexSize, - int indexBias, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count) +static void r300_draw_elements(struct r300_context *r300, + const struct pipe_draw_info *info, + int instance_id) { - struct r300_context* r300 = r300_context(pipe); + struct pipe_resource *indexBuffer = r300->index_buffer.buffer; + unsigned indexSize = r300->index_buffer.index_size; struct pipe_resource* orgIndexBuffer = indexBuffer; + unsigned start = info->start; + unsigned count = info->count; boolean alt_num_verts = r300->screen->caps.is_r500 && - count > 65536 && - r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0); + count > 65536; unsigned short_count; int buffer_offset = 0, index_offset = 0; /* for index bias emulation */ - unsigned new_offset; + uint16_t indices3[3]; - if (indexBias && !r300->screen->caps.index_bias_supported) { - r300_split_index_bias(r300, indexBias, &buffer_offset, &index_offset); + if (info->index_bias && !r300->screen->caps.is_r500) { + r300_split_index_bias(r300, info->index_bias, &buffer_offset, &index_offset); } r300_translate_index_buffer(r300, &indexBuffer, &indexSize, index_offset, &start, count); - r300_update_derived_state(r300); - r300_upload_index_buffer(r300, &indexBuffer, indexSize, start, count, &new_offset); - - start = new_offset; + /* Fallback for misaligned ushort indices. */ + if (indexSize == 2 && (start & 1) && + !r300_resource(indexBuffer)->b.user_ptr) { + /* If we got here, then orgIndexBuffer == indexBuffer. */ + uint16_t *ptr = r300->rws->buffer_map(r300_resource(orgIndexBuffer)->buf, + r300->cs, + PIPE_TRANSFER_READ | + PIPE_TRANSFER_UNSYNCHRONIZED); + + if (info->mode == PIPE_PRIM_TRIANGLES) { + memcpy(indices3, ptr + start, 6); + } else { + /* Copy the mapped index buffer directly to the upload buffer. + * The start index will be aligned simply from the fact that + * every sub-buffer in the upload buffer is aligned. */ + r300_upload_index_buffer(r300, &indexBuffer, indexSize, &start, + count, (uint8_t*)ptr); + } + r300->rws->buffer_unmap(r300_resource(orgIndexBuffer)->buf); + } else { + if (r300_resource(indexBuffer)->b.user_ptr) + r300_upload_index_buffer(r300, &indexBuffer, indexSize, + &start, count, + r300_resource(indexBuffer)->b.user_ptr); + } - /* 15 dwords for emit_draw_elements. Give up if the function fails. */ + /* 19 dwords for emit_draw_elements. Give up if the function fails. */ if (!r300_prepare_for_rendering(r300, - PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS | - PREP_INDEXED, indexBuffer, 15, buffer_offset, indexBias)) + PREP_EMIT_STATES | PREP_VALIDATE_VBOS | PREP_EMIT_AOS | + PREP_INDEXED, indexBuffer, 19, buffer_offset, info->index_bias, + instance_id)) goto done; if (alt_num_verts || count <= 65535) { - r300_emit_draw_elements(r300, indexBuffer, indexSize, - minIndex, maxIndex, mode, start, count); + r300_emit_draw_elements(r300, indexBuffer, indexSize, info->min_index, + info->max_index, info->mode, start, count, + indices3); } else { do { - short_count = MIN2(count, 65534); + if (indexSize == 2 && (start & 1)) + short_count = MIN2(count, 65535); + else + short_count = MIN2(count, 65534); + r300_emit_draw_elements(r300, indexBuffer, indexSize, - minIndex, maxIndex, - mode, start, short_count); + info->min_index, info->max_index, + info->mode, start, short_count, indices3); start += short_count; count -= short_count; @@ -578,7 +693,8 @@ static void r300_draw_range_elements(struct pipe_context* pipe, if (count) { if (!r300_prepare_for_rendering(r300, PREP_VALIDATE_VBOS | PREP_EMIT_AOS | PREP_INDEXED, - indexBuffer, 15, buffer_offset, indexBias)) + indexBuffer, 19, buffer_offset, info->index_bias, + instance_id)) goto done; } } while (count); @@ -590,107 +706,112 @@ done: } } -static void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, - unsigned start, unsigned count) +static void r300_draw_arrays(struct r300_context *r300, + const struct pipe_draw_info *info, + int instance_id) { - struct r300_context* r300 = r300_context(pipe); boolean alt_num_verts = r300->screen->caps.is_r500 && - count > 65536 && - r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0); + info->count > 65536; + unsigned start = info->start; + unsigned count = info->count; unsigned short_count; - r300_update_derived_state(r300); + /* 9 spare dwords for emit_draw_arrays. Give up if the function fails. */ + if (!r300_prepare_for_rendering(r300, + PREP_EMIT_STATES | PREP_VALIDATE_VBOS | PREP_EMIT_AOS, + NULL, 9, start, 0, instance_id)) + return; - if (immd_is_good_idea(r300, count)) { - r300_emit_draw_arrays_immediate(r300, mode, start, count); + if (alt_num_verts || count <= 65535) { + r300_emit_draw_arrays(r300, info->mode, count); } else { - /* 9 spare dwords for emit_draw_arrays. Give up if the function fails. */ - if (!r300_prepare_for_rendering(r300, - PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS, - NULL, 9, start, 0)) - return; + do { + short_count = MIN2(count, 65535); + r300_emit_draw_arrays(r300, info->mode, short_count); - if (alt_num_verts || count <= 65535) { - r300_emit_draw_arrays(r300, mode, count); - } else { - do { - short_count = MIN2(count, 65535); - r300_emit_draw_arrays(r300, mode, short_count); - - start += short_count; - count -= short_count; - - /* 9 spare dwords for emit_draw_arrays. Give up if the function fails. */ - if (count) { - if (!r300_prepare_for_rendering(r300, - PREP_VALIDATE_VBOS | PREP_EMIT_AOS, NULL, 9, - start, 0)) - return; - } - } while (count); - } + start += short_count; + count -= short_count; + + /* 9 spare dwords for emit_draw_arrays. Give up if the function fails. */ + if (count) { + if (!r300_prepare_for_rendering(r300, + PREP_VALIDATE_VBOS | PREP_EMIT_AOS, NULL, 9, + start, 0, instance_id)) + return; + } + } while (count); } } +static void r300_draw_arrays_instanced(struct r300_context *r300, + const struct pipe_draw_info *info) +{ + int i; + + for (i = 0; i < info->instance_count; i++) + r300_draw_arrays(r300, info, i); +} + +static void r300_draw_elements_instanced(struct r300_context *r300, + const struct pipe_draw_info *info) +{ + int i; + + for (i = 0; i < info->instance_count; i++) + r300_draw_elements(r300, info, i); +} + static void r300_draw_vbo(struct pipe_context* pipe, - const struct pipe_draw_info *info) + const struct pipe_draw_info *dinfo) { struct r300_context* r300 = r300_context(pipe); - unsigned count = info->count; - boolean translate = FALSE; - boolean indexed = info->indexed && r300->index_buffer.buffer; - unsigned start_indexed = 0; + struct pipe_draw_info info = *dinfo; + boolean buffers_updated, uploader_flushed; - if (r300->skip_rendering) { - return; - } + info.indexed = info.indexed && r300->index_buffer.buffer; - if (!u_trim_pipe_prim(info->mode, &count)) { + if (r300->skip_rendering || + !u_trim_pipe_prim(info.mode, &info.count)) { return; } - /* Index buffer range checking. */ - if (indexed) { - assert(r300->index_buffer.offset % r300->index_buffer.index_size == 0); - - /* Compute start for draw_elements, taking the offset into account. */ - start_indexed = - info->start + - (r300->index_buffer.offset / r300->index_buffer.index_size); - - if ((start_indexed + count) * r300->index_buffer.index_size > - r300->index_buffer.buffer->width0) { - fprintf(stderr, "r300: Invalid index buffer range. Skipping rendering.\n"); - return; - } - } + r300_update_derived_state(r300); - /* Set up fallback for incompatible vertex layout if needed. */ - if (r300->incompatible_vb_layout || r300->velems->incompatible_layout) { - r300_begin_vertex_translate(r300); - translate = TRUE; + /* Start the vbuf manager and update buffers if needed. */ + u_vbuf_mgr_draw_begin(r300->vbuf_mgr, &info, + &buffers_updated, &uploader_flushed); + if (buffers_updated) { + r300->vertex_arrays_dirty = TRUE; } - if (indexed) { - r300_draw_range_elements(pipe, - r300->index_buffer.buffer, - r300->index_buffer.index_size, - info->index_bias, - info->min_index, - info->max_index, - info->mode, - start_indexed, - count); + /* Draw. */ + if (info.indexed) { + info.start += r300->index_buffer.offset; + info.max_index = MIN2(r300->vbuf_mgr->max_index, info.max_index); + + if (info.instance_count <= 1) { + if (info.count <= 8 && + r300_resource(r300->index_buffer.buffer)->b.user_ptr) { + r300_draw_elements_immediate(r300, &info); + } else { + r300_draw_elements(r300, &info, -1); + } + } else { + r300_draw_elements_instanced(r300, &info); + } } else { - r300_draw_arrays(pipe, - info->mode, - info->start, - count); + if (info.instance_count <= 1) { + if (immd_is_good_idea(r300, info.count)) { + r300_draw_arrays_immediate(r300, &info); + } else { + r300_draw_arrays(r300, &info, -1); + } + } else { + r300_draw_arrays_instanced(r300, &info); + } } - if (translate) { - r300_end_vertex_translate(r300); - } + u_vbuf_mgr_draw_end(r300->vbuf_mgr); } /**************************************************************************** @@ -721,15 +842,16 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe, r300_update_derived_state(r300); r300_reserve_cs_dwords(r300, - PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL | + PREP_EMIT_STATES | PREP_EMIT_AOS_SWTCL | (indexed ? PREP_INDEXED : 0), indexed ? 256 : 6); - for (i = 0; i < r300->vertex_buffer_count; i++) { - if (r300->vertex_buffer[i].buffer) { + for (i = 0; i < r300->vbuf_mgr->nr_vertex_buffers; i++) { + if (r300->vbuf_mgr->vertex_buffer[i].buffer) { void *buf = pipe_buffer_map(pipe, - r300->vertex_buffer[i].buffer, - PIPE_TRANSFER_READ, + r300->vbuf_mgr->vertex_buffer[i].buffer, + PIPE_TRANSFER_READ | + PIPE_TRANSFER_UNSYNCHRONIZED, &vb_transfer[i]); draw_set_mapped_vertex_buffer(r300->draw, i, buf); } @@ -737,7 +859,8 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe, if (indexed) { indices = pipe_buffer_map(pipe, r300->index_buffer.buffer, - PIPE_TRANSFER_READ, &ib_transfer); + PIPE_TRANSFER_READ | + PIPE_TRANSFER_UNSYNCHRONIZED, &ib_transfer); } draw_set_mapped_index_buffer(r300->draw, indices); @@ -748,16 +871,15 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe, draw_flush(r300->draw); r300->draw_vbo_locked = FALSE; - for (i = 0; i < r300->vertex_buffer_count; i++) { - if (r300->vertex_buffer[i].buffer) { - pipe_buffer_unmap(pipe, r300->vertex_buffer[i].buffer, - vb_transfer[i]); + for (i = 0; i < r300->vbuf_mgr->nr_vertex_buffers; i++) { + if (r300->vbuf_mgr->vertex_buffer[i].buffer) { + pipe_buffer_unmap(pipe, vb_transfer[i]); draw_set_mapped_vertex_buffer(r300->draw, i, NULL); } } if (indexed) { - pipe_buffer_unmap(pipe, r300->index_buffer.buffer, ib_transfer); + pipe_buffer_unmap(pipe, ib_transfer); draw_set_mapped_index_buffer(r300->draw, NULL); } } @@ -813,10 +935,10 @@ static boolean r300_render_allocate_vertices(struct vbuf_render* render, pipe_resource_reference(&r300->vbo, NULL); r300->vbo = pipe_buffer_create(screen, PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STREAM, R300_MAX_DRAW_VBO_SIZE); r300->draw_vbo_offset = 0; r300->draw_vbo_size = R300_MAX_DRAW_VBO_SIZE; - r300->validate_buffers = TRUE; } r300render->vertex_size = vertex_size; @@ -835,7 +957,8 @@ static void* r300_render_map_vertices(struct vbuf_render* render) r300render->vbo_ptr = pipe_buffer_map(&r300render->r300->context, r300->vbo, - PIPE_TRANSFER_WRITE, + PIPE_TRANSFER_WRITE | + PIPE_TRANSFER_UNSYNCHRONIZED, &r300render->vbo_transfer); assert(r300render->vbo_ptr); @@ -857,7 +980,7 @@ static void r300_render_unmap_vertices(struct vbuf_render* render, r300render->vbo_max_used = MAX2(r300render->vbo_max_used, r300render->vertex_size * (max + 1)); - pipe_buffer_unmap(context, r300->vbo, r300render->vbo_transfer); + pipe_buffer_unmap(context, r300render->vbo_transfer); r300render->vbo_transfer = NULL; } @@ -901,33 +1024,16 @@ static void r300_render_draw_arrays(struct vbuf_render* render, if (r300->draw_first_emitted) { if (!r300_prepare_for_rendering(r300, - PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL, - NULL, 6, 0, 0)) + PREP_EMIT_STATES | PREP_EMIT_AOS_SWTCL, + NULL, dwords, 0, 0, -1)) return; } else { if (!r300_emit_states(r300, - PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL, - NULL, 0, 0)) + PREP_EMIT_STATES | PREP_EMIT_AOS_SWTCL, + NULL, 0, 0, -1)) return; } - /* Uncomment to dump all VBOs rendered through this interface. - * Slow and noisy! - ptr = pipe_buffer_map(&r300render->r300->context, - r300render->vbo, PIPE_TRANSFER_READ, - &r300render->vbo_transfer); - - for (i = 0; i < count; i++) { - printf("r300: Vertex %d\n", i); - draw_dump_emitted_vertex(&r300->vertex_info, ptr); - ptr += r300->vertex_info.size * 4; - printf("\n"); - } - - pipe_buffer_unmap(&r300render->r300->context, r300render->vbo, - r300render->vbo_transfer); - */ - BEGIN_CS(dwords); OUT_CS_REG(R300_GA_COLOR_CONTROL, r300_provoking_vertex_fixes(r300, r300render->prim)); @@ -958,13 +1064,13 @@ static void r300_render_draw_elements(struct vbuf_render* render, if (r300->draw_first_emitted) { if (!r300_prepare_for_rendering(r300, - PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL | PREP_INDEXED, - NULL, 256, 0, 0)) + PREP_EMIT_STATES | PREP_EMIT_AOS_SWTCL | PREP_INDEXED, + NULL, 256, 0, 0, -1)) return; } else { if (!r300_emit_states(r300, - PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL | PREP_INDEXED, - NULL, 0, 0)) + PREP_EMIT_STATES | PREP_EMIT_AOS_SWTCL | PREP_INDEXED, + NULL, 0, 0, -1)) return; } @@ -1001,7 +1107,7 @@ static void r300_render_draw_elements(struct vbuf_render* render, if (count) { if (!r300_prepare_for_rendering(r300, PREP_EMIT_AOS_SWTCL | PREP_INDEXED, - NULL, 256, 0, 0)) + NULL, 256, 0, 0, -1)) return; end_cs_dwords = r300_get_num_cs_end_dwords(r300); @@ -1022,8 +1128,7 @@ static struct vbuf_render* r300_render_create(struct r300_context* r300) r300render->r300 = r300; - /* XXX find real numbers plz */ - r300render->base.max_vertex_buffer_bytes = 128 * 1024; + r300render->base.max_vertex_buffer_bytes = 1024 * 1024; r300render->base.max_indices = 16 * 1024; r300render->base.get_vertex_info = r300_render_get_vertex_info; @@ -1106,7 +1211,7 @@ static void r300_blitter_draw_rectangle(struct blitter_context *blitter, r300->clip_state.dirty = FALSE; r300->viewport_state.dirty = FALSE; - if (!r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0)) + if (!r300_prepare_for_rendering(r300, PREP_EMIT_STATES, NULL, dwords, 0, 0, -1)) goto done; DBG(r300, DBG_DRAW, "r300: draw_rectangle\n"); @@ -1190,7 +1295,7 @@ static void r300_resource_resolve(struct pipe_context* pipe, aa->aaresolve_ctl = R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_RESOLVE | R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE; - r300->aa_state.size = 12; + r300->aa_state.size = 10; r300_mark_atom_dirty(r300, &r300->aa_state); /* Resolve the surface. */ diff --git a/src/gallium/drivers/r300/r300_render_translate.c b/src/gallium/drivers/r300/r300_render_translate.c index 41a43b04de..f8c7558f4b 100644 --- a/src/gallium/drivers/r300/r300_render_translate.c +++ b/src/gallium/drivers/r300/r300_render_translate.c @@ -20,203 +20,64 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/** - * The functions below translate vertex and index buffers to the layout - * compatible with the hardware, so that all vertex and index fetches are - * DWORD-aligned and all used vertex and index formats are supported. - * For indices, an optional index offset is added to each index. - */ - #include "r300_context.h" -#include "translate/translate.h" #include "util/u_index_modify.h" +#include "util/u_upload_mgr.h" -void r300_begin_vertex_translate(struct r300_context *r300) -{ - struct pipe_context *pipe = &r300->context; - struct translate_key key = {0}; - struct translate_element *te; - unsigned tr_elem_index[PIPE_MAX_ATTRIBS] = {0}; - struct translate *tr; - struct r300_vertex_element_state *ve = r300->velems; - boolean vb_translated[PIPE_MAX_ATTRIBS] = {0}; - void *vb_map[PIPE_MAX_ATTRIBS] = {0}, *out_map; - struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0}, *out_transfer; - struct pipe_resource *out_buffer; - unsigned i, num_verts; - - /* Initialize the translate key, i.e. the recipe how vertices should be - * translated. */ - for (i = 0; i < ve->count; i++) { - struct pipe_vertex_buffer *vb = - &r300->vertex_buffer[ve->velem[i].vertex_buffer_index]; - enum pipe_format output_format = ve->hw_format[i]; - unsigned output_format_size = ve->hw_format_size[i]; - - /* Check for support. */ - if (ve->velem[i].src_format == ve->hw_format[i] && - (vb->buffer_offset + ve->velem[i].src_offset) % 4 == 0 && - vb->stride % 4 == 0) { - continue; - } - - /* Workaround for translate: output floats instead of halfs. */ - switch (output_format) { - case PIPE_FORMAT_R16_FLOAT: - output_format = PIPE_FORMAT_R32_FLOAT; - output_format_size = 4; - break; - case PIPE_FORMAT_R16G16_FLOAT: - output_format = PIPE_FORMAT_R32G32_FLOAT; - output_format_size = 8; - break; - case PIPE_FORMAT_R16G16B16_FLOAT: - output_format = PIPE_FORMAT_R32G32B32_FLOAT; - output_format_size = 12; - break; - case PIPE_FORMAT_R16G16B16A16_FLOAT: - output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - output_format_size = 16; - break; - default:; - } - - /* Add this vertex element. */ - te = &key.element[key.nr_elements]; - /*te->type; - te->instance_divisor;*/ - te->input_buffer = ve->velem[i].vertex_buffer_index; - te->input_format = ve->velem[i].src_format; - te->input_offset = vb->buffer_offset + ve->velem[i].src_offset; - te->output_format = output_format; - te->output_offset = key.output_stride; - - key.output_stride += output_format_size; - vb_translated[ve->velem[i].vertex_buffer_index] = TRUE; - tr_elem_index[i] = key.nr_elements; - key.nr_elements++; - } - - /* Get a translate object. */ - tr = translate_cache_find(r300->tran.translate_cache, &key); - - /* Map buffers we want to translate. */ - for (i = 0; i < r300->vertex_buffer_count; i++) { - if (vb_translated[i]) { - struct pipe_vertex_buffer *vb = &r300->vertex_buffer[i]; - - vb_map[i] = pipe_buffer_map(pipe, vb->buffer, - PIPE_TRANSFER_READ, &vb_transfer[i]); - - tr->set_buffer(tr, i, vb_map[i], vb->stride, vb->max_index); - } - } - - /* Create and map the output buffer. */ - num_verts = r300->vertex_buffer_max_index + 1; - - out_buffer = pipe_buffer_create(&r300->screen->screen, - PIPE_BIND_VERTEX_BUFFER, - key.output_stride * num_verts); - - out_map = pipe_buffer_map(pipe, out_buffer, PIPE_TRANSFER_WRITE, - &out_transfer); - - /* Translate. */ - tr->run(tr, 0, num_verts, 0, out_map); - - /* Unmap all buffers. */ - for (i = 0; i < r300->vertex_buffer_count; i++) { - if (vb_translated[i]) { - pipe_buffer_unmap(pipe, r300->vertex_buffer[i].buffer, - vb_transfer[i]); - } - } - - pipe_buffer_unmap(pipe, out_buffer, out_transfer); - - /* Setup the new vertex buffer in the first free slot. */ - for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { - struct pipe_vertex_buffer *vb = &r300->vertex_buffer[i]; - - if (!vb->buffer) { - pipe_resource_reference(&vb->buffer, out_buffer); - vb->buffer_offset = 0; - vb->max_index = num_verts - 1; - vb->stride = key.output_stride; - r300->tran.vb_slot = i; - r300->validate_buffers = TRUE; - break; - } - } - - /* Save and replace vertex elements. */ - { - struct pipe_vertex_element new_velems[PIPE_MAX_ATTRIBS]; - - r300->tran.saved_velems = r300->velems; - - for (i = 0; i < ve->count; i++) { - if (vb_translated[ve->velem[i].vertex_buffer_index]) { - te = &key.element[tr_elem_index[i]]; - new_velems[i].instance_divisor = ve->velem[i].instance_divisor; - new_velems[i].src_format = te->output_format; - new_velems[i].src_offset = te->output_offset; - new_velems[i].vertex_buffer_index = r300->tran.vb_slot; - } else { - memcpy(&new_velems[i], &ve->velem[i], - sizeof(struct pipe_vertex_element)); - } - } - - r300->tran.new_velems = - pipe->create_vertex_elements_state(pipe, ve->count, new_velems); - pipe->bind_vertex_elements_state(pipe, r300->tran.new_velems); - } - - pipe_resource_reference(&out_buffer, NULL); -} - -void r300_end_vertex_translate(struct r300_context *r300) -{ - struct pipe_context *pipe = &r300->context; - - /* Restore vertex elements. */ - pipe->bind_vertex_elements_state(pipe, r300->tran.saved_velems); - pipe->delete_vertex_elements_state(pipe, r300->tran.new_velems); - - /* Delete the now-unused VBO. */ - pipe_resource_reference(&r300->vertex_buffer[r300->tran.vb_slot].buffer, - NULL); -} void r300_translate_index_buffer(struct r300_context *r300, struct pipe_resource **index_buffer, unsigned *index_size, unsigned index_offset, unsigned *start, unsigned count) { + struct pipe_resource *out_buffer = NULL; + unsigned out_offset; + void *ptr; + boolean flushed; + switch (*index_size) { - case 1: - util_shorten_ubyte_elts(&r300->context, index_buffer, index_offset, *start, count); - *index_size = 2; - *start = 0; - r300->validate_buffers = TRUE; - break; + case 1: + u_upload_alloc(r300->vbuf_mgr->uploader, 0, count * 2, + &out_offset, &out_buffer, &flushed, &ptr); + + util_shorten_ubyte_elts_to_userptr( + &r300->context, *index_buffer, index_offset, + *start, count, ptr); + + *index_buffer = NULL; + pipe_resource_reference(index_buffer, out_buffer); + *index_size = 2; + *start = out_offset / 2; + break; + + case 2: + if (index_offset) { + u_upload_alloc(r300->vbuf_mgr->uploader, 0, count * 2, + &out_offset, &out_buffer, &flushed, &ptr); + + util_rebuild_ushort_elts_to_userptr(&r300->context, *index_buffer, + index_offset, *start, + count, ptr); + + *index_buffer = NULL; + pipe_resource_reference(index_buffer, out_buffer); + *start = out_offset / 2; + } + break; - case 2: - if (*start % 2 != 0 || index_offset) { - util_rebuild_ushort_elts(&r300->context, index_buffer, index_offset, *start, count); - *start = 0; - r300->validate_buffers = TRUE; - } - break; + case 4: + if (index_offset) { + u_upload_alloc(r300->vbuf_mgr->uploader, 0, count * 4, + &out_offset, &out_buffer, &flushed, &ptr); - case 4: - if (index_offset) { - util_rebuild_uint_elts(&r300->context, index_buffer, index_offset, *start, count); - *start = 0; - r300->validate_buffers = TRUE; - } - break; + util_rebuild_uint_elts_to_userptr(&r300->context, *index_buffer, + index_offset, *start, + count, ptr); + + *index_buffer = NULL; + pipe_resource_reference(index_buffer, out_buffer); + *start = out_offset / 4; + } + break; } } diff --git a/src/gallium/drivers/r300/r300_resource.c b/src/gallium/drivers/r300/r300_resource.c index dd1df97059..6593474b1f 100644 --- a/src/gallium/drivers/r300/r300_resource.c +++ b/src/gallium/drivers/r300/r300_resource.c @@ -38,26 +38,14 @@ r300_resource_create(struct pipe_screen *screen, } -static struct pipe_resource * -r300_resource_from_handle(struct pipe_screen * screen, - const struct pipe_resource *templ, - struct winsys_handle *whandle) -{ - if (templ->target == PIPE_BUFFER) - return NULL; - else - return r300_texture_from_handle(screen, templ, whandle); -} - void r300_init_resource_functions(struct r300_context *r300) { r300->context.get_transfer = u_get_transfer_vtbl; r300->context.transfer_map = u_transfer_map_vtbl; - r300->context.transfer_flush_region = u_transfer_flush_region_vtbl; + r300->context.transfer_flush_region = u_default_transfer_flush_region; r300->context.transfer_unmap = u_transfer_unmap_vtbl; r300->context.transfer_destroy = u_transfer_destroy_vtbl; r300->context.transfer_inline_write = u_transfer_inline_write_vtbl; - r300->context.is_resource_referenced = u_is_resource_referenced_vtbl; r300->context.create_surface = r300_create_surface; r300->context.surface_destroy = r300_surface_destroy; } @@ -65,8 +53,8 @@ void r300_init_resource_functions(struct r300_context *r300) void r300_init_screen_resource_functions(struct r300_screen *r300screen) { r300screen->screen.resource_create = r300_resource_create; - r300screen->screen.resource_from_handle = r300_resource_from_handle; - r300screen->screen.resource_get_handle = u_resource_get_handle_vtbl; + r300screen->screen.resource_from_handle = r300_texture_from_handle; + r300screen->screen.resource_get_handle = r300_resource_get_handle; r300screen->screen.resource_destroy = u_resource_destroy_vtbl; r300screen->screen.user_buffer_create = r300_user_buffer_create; } diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 921d6f1e67..8a69628c53 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -24,6 +24,7 @@ #include "util/u_format.h" #include "util/u_format_s3tc.h" #include "util/u_memory.h" +#include "os/os_time.h" #include "r300_context.h" #include "r300_texture.h" @@ -83,12 +84,8 @@ static const char* r300_get_name(struct pipe_screen* pscreen) static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) { struct r300_screen* r300screen = r300_screen(pscreen); - boolean is_r400 = r300screen->caps.is_r400; boolean is_r500 = r300screen->caps.is_r500; - /* XXX extended shader capabilities of r400 unimplemented */ - is_r400 = FALSE; - switch (param) { /* Supported features (boolean caps). */ case PIPE_CAP_NPOT_TEXTURES: @@ -116,6 +113,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_MIRROR_CLAMP: case PIPE_CAP_TEXTURE_MIRROR_REPEAT: case PIPE_CAP_BLEND_EQUATION_SEPARATE: + case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: return 1; case PIPE_CAP_TEXTURE_SWIZZLE: return util_format_s3tc_enabled ? r300screen->caps.dxtc_swizzle : 1; @@ -125,12 +123,17 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_DUAL_SOURCE_BLEND: case PIPE_CAP_INDEP_BLEND_ENABLE: case PIPE_CAP_INDEP_BLEND_FUNC: - case PIPE_CAP_DEPTH_CLAMP: /* XXX implemented, but breaks Regnum Online */ + case PIPE_CAP_DEPTH_CLAMP: case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: case PIPE_CAP_SHADER_STENCIL_EXPORT: + case PIPE_CAP_ARRAY_TEXTURES: + return 0; + + /* SWTCL-only features. */ case PIPE_CAP_STREAM_OUTPUT: case PIPE_CAP_PRIMITIVE_RESTART: - return 0; + case PIPE_CAP_TGSI_INSTANCEID: + return !r300screen->caps.has_tcl; /* Texturing. */ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: @@ -171,9 +174,6 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e boolean is_r400 = r300screen->caps.is_r400; boolean is_r500 = r300screen->caps.is_r500; - /* XXX extended shader capabilities of r400 unimplemented */ - is_r400 = FALSE; - switch (shader) { case PIPE_SHADER_FRAGMENT: @@ -306,10 +306,11 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, enum pipe_format format, enum pipe_texture_target target, unsigned sample_count, - unsigned usage, - unsigned geom_flags) + unsigned usage) { + struct r300_winsys_screen *rws = r300_screen(screen)->rws; uint32_t retval = 0; + boolean drm_2_8_0 = rws->get_value(rws, R300_VID_DRM_2_8_0); boolean is_r500 = r300_screen(screen)->caps.is_r500; boolean is_r400 = r300_screen(screen)->caps.is_r400; boolean is_color2101010 = format == PIPE_FORMAT_R10G10B10A2_UNORM || @@ -317,9 +318,13 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, format == PIPE_FORMAT_B10G10R10A2_UNORM || format == PIPE_FORMAT_R10SG10SB10SA2U_NORM; boolean is_ati1n = format == PIPE_FORMAT_RGTC1_UNORM || - format == PIPE_FORMAT_RGTC1_SNORM; + format == PIPE_FORMAT_RGTC1_SNORM || + format == PIPE_FORMAT_LATC1_UNORM || + format == PIPE_FORMAT_LATC1_SNORM; boolean is_ati2n = format == PIPE_FORMAT_RGTC2_UNORM || - format == PIPE_FORMAT_RGTC2_SNORM; + format == PIPE_FORMAT_RGTC2_SNORM || + format == PIPE_FORMAT_LATC2_UNORM || + format == PIPE_FORMAT_LATC2_SNORM; boolean is_half_float = format == PIPE_FORMAT_R16_FLOAT || format == PIPE_FORMAT_R16G16_FLOAT || format == PIPE_FORMAT_R16G16B16_FLOAT || @@ -363,7 +368,7 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, PIPE_BIND_SCANOUT | PIPE_BIND_SHARED)) && /* 2101010 cannot be rendered to on non-r5xx. */ - (is_r500 || !is_color2101010) && + (!is_color2101010 || (is_r500 && drm_2_8_0)) && r300_is_colorbuffer_format_supported(format)) { retval |= usage & (PIPE_BIND_RENDER_TARGET | @@ -401,6 +406,7 @@ static void r300_destroy_screen(struct pipe_screen* pscreen) struct r300_winsys_screen *rws = r300_winsys_screen(pscreen); util_slab_destroy(&r300screen->pool_buffers); + pipe_mutex_destroy(r300screen->num_contexts_mutex); if (rws) rws->destroy(rws); @@ -412,33 +418,44 @@ static void r300_fence_reference(struct pipe_screen *screen, struct pipe_fence_handle **ptr, struct pipe_fence_handle *fence) { - struct r300_fence **oldf = (struct r300_fence**)ptr; - struct r300_fence *newf = (struct r300_fence*)fence; - - if (pipe_reference(&(*oldf)->reference, &newf->reference)) - FREE(*oldf); - - *ptr = fence; + r300_winsys_bo_reference((struct r300_winsys_bo**)ptr, + (struct r300_winsys_bo*)fence); } -static int r300_fence_signalled(struct pipe_screen *screen, - struct pipe_fence_handle *fence, - unsigned flags) +static boolean r300_fence_signalled(struct pipe_screen *screen, + struct pipe_fence_handle *fence) { - struct r300_fence *rfence = (struct r300_fence*)fence; + struct r300_winsys_screen *rws = r300_screen(screen)->rws; + struct r300_winsys_bo *rfence = (struct r300_winsys_bo*)fence; - return rfence->signalled ? 0 : 1; /* 0 == success */ + return !rws->buffer_is_busy(rfence); } -static int r300_fence_finish(struct pipe_screen *screen, - struct pipe_fence_handle *fence, - unsigned flags) +static boolean r300_fence_finish(struct pipe_screen *screen, + struct pipe_fence_handle *fence, + uint64_t timeout) { - struct r300_fence *rfence = (struct r300_fence*)fence; + struct r300_winsys_screen *rws = r300_screen(screen)->rws; + struct r300_winsys_bo *rfence = (struct r300_winsys_bo*)fence; + + if (timeout != PIPE_TIMEOUT_INFINITE) { + int64_t start_time = os_time_get(); - r300_finish(rfence->ctx); - rfence->signalled = TRUE; - return 0; /* 0 == success */ + /* Convert to microseconds. */ + timeout /= 1000; + + /* Wait in a loop. */ + while (rws->buffer_is_busy(rfence)) { + if (os_time_get() - start_time >= timeout) { + return FALSE; + } + os_time_sleep(10); + } + return TRUE; + } + + rws->buffer_wait(rfence); + return TRUE; } struct pipe_screen* r300_screen_create(struct r300_winsys_screen *rws) @@ -457,12 +474,15 @@ struct pipe_screen* r300_screen_create(struct r300_winsys_screen *rws) r300_init_debug(r300screen); r300_parse_chipset(&r300screen->caps); - r300screen->caps.index_bias_supported = - r300screen->caps.is_r500 && - rws->get_value(rws, R300_VID_DRM_2_3_0); + if (SCREEN_DBG_ON(r300screen, DBG_NO_ZMASK)) + r300screen->caps.zmask_ram = 0; + if (SCREEN_DBG_ON(r300screen, DBG_NO_HIZ)) + r300screen->caps.hiz_ram = 0; + + pipe_mutex_init(r300screen->num_contexts_mutex); util_slab_create(&r300screen->pool_buffers, - sizeof(struct r300_buffer), 64, + sizeof(struct r300_resource), 64, UTIL_SLAB_SINGLETHREADED); r300screen->rws = rws; diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index 5847fe1ffc..576f9c1f4a 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -52,6 +52,7 @@ struct r300_screen { /* The number of created contexts to know whether we have multiple * contexts or not. */ int num_contexts; + pipe_mutex num_contexts_mutex; }; @@ -93,6 +94,8 @@ r300_winsys_screen(struct pipe_screen *screen) { #define DBG_CBZB (1 << 11) #define DBG_HYPERZ (1 << 12) #define DBG_SCISSOR (1 << 13) +#define DBG_UPLOAD (1 << 14) +#define DBG_INFO (1 << 15) /* Features. */ #define DBG_ANISOHQ (1 << 16) #define DBG_NO_TILING (1 << 17) @@ -100,6 +103,8 @@ r300_winsys_screen(struct pipe_screen *screen) { #define DBG_FAKE_OCC (1 << 19) #define DBG_NO_OPT (1 << 20) #define DBG_NO_CBZB (1 << 21) +#define DBG_NO_ZMASK (1 << 22) +#define DBG_NO_HIZ (1 << 23) /* Statistics. */ #define DBG_P_STAT (1 << 25) /*@}*/ diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c index 4436443522..986ae384fb 100644 --- a/src/gallium/drivers/r300/r300_screen_buffer.c +++ b/src/gallium/drivers/r300/r300_screen_buffer.c @@ -33,109 +33,36 @@ #include "r300_screen_buffer.h" #include "r300_winsys.h" -unsigned r300_buffer_is_referenced(struct pipe_context *context, - struct pipe_resource *buf, - enum r300_reference_domain domain) +void r300_upload_index_buffer(struct r300_context *r300, + struct pipe_resource **index_buffer, + unsigned index_size, unsigned *start, + unsigned count, uint8_t *ptr) { - struct r300_context *r300 = r300_context(context); - struct r300_buffer *rbuf = r300_buffer(buf); + unsigned index_offset; + boolean flushed; - if (r300_buffer_is_user_buffer(buf)) - return PIPE_UNREFERENCED; + *index_buffer = NULL; - if (r300->rws->cs_is_buffer_referenced(r300->cs, rbuf->cs_buf, domain)) - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; + u_upload_data(r300->vbuf_mgr->uploader, + 0, count * index_size, + ptr + (*start * index_size), + &index_offset, + index_buffer, &flushed); - return PIPE_UNREFERENCED; -} - -static unsigned r300_buffer_is_referenced_by_cs(struct pipe_context *context, - struct pipe_resource *buf, - unsigned level, int layer) -{ - return r300_buffer_is_referenced(context, buf, R300_REF_CS); -} - -/* External helper, not required to implent u_resource_vtbl: - */ -int r300_upload_index_buffer(struct r300_context *r300, - struct pipe_resource **index_buffer, - unsigned index_size, - unsigned start, - unsigned count, - unsigned *out_offset) -{ - struct pipe_resource *upload_buffer = NULL; - unsigned index_offset = start * index_size; - int ret = 0; - - if (r300_buffer_is_user_buffer(*index_buffer)) { - ret = u_upload_buffer(r300->upload_ib, - index_offset, - count * index_size, - *index_buffer, - &index_offset, - &upload_buffer); - if (ret) { - goto done; - } - *index_buffer = upload_buffer; - *out_offset = index_offset / index_size; - } else - *out_offset = start; - - done: - // if (upload_buffer) - // pipe_resource_reference(&upload_buffer, NULL); - return ret; -} - -/* External helper, not required to implement u_resource_vtbl: - */ -int r300_upload_user_buffers(struct r300_context *r300) -{ - enum pipe_error ret = PIPE_OK; - int i, nr; - - nr = r300->velems->count; - - for (i = 0; i < nr; i++) { - struct pipe_vertex_buffer *vb = - &r300->vertex_buffer[r300->velems->velem[i].vertex_buffer_index]; - - if (r300_buffer_is_user_buffer(vb->buffer)) { - struct pipe_resource *upload_buffer = NULL; - unsigned offset = 0; /*vb->buffer_offset * 4;*/ - unsigned size = vb->buffer->width0; - unsigned upload_offset; - ret = u_upload_buffer(r300->upload_vb, - offset, size, - vb->buffer, - &upload_offset, &upload_buffer); - if (ret) - return ret; - - pipe_resource_reference(&vb->buffer, NULL); - vb->buffer = upload_buffer; - vb->buffer_offset = upload_offset; - r300->validate_buffers = TRUE; - } - } - return ret; + *start = index_offset / index_size; } static void r300_buffer_destroy(struct pipe_screen *screen, struct pipe_resource *buf) { struct r300_screen *r300screen = r300_screen(screen); - struct r300_buffer *rbuf = r300_buffer(buf); - struct r300_winsys_screen *rws = r300screen->rws; + struct r300_resource *rbuf = r300_resource(buf); if (rbuf->constant_buffer) FREE(rbuf->constant_buffer); if (rbuf->buf) - rws->buffer_reference(rws, &rbuf->buf, NULL); + r300_winsys_bo_reference(&rbuf->buf, NULL); util_slab_free(&r300screen->pool_buffers, rbuf); } @@ -179,93 +106,31 @@ r300_buffer_transfer_map( struct pipe_context *pipe, struct r300_context *r300 = r300_context(pipe); struct r300_screen *r300screen = r300_screen(pipe->screen); struct r300_winsys_screen *rws = r300screen->rws; - struct r300_buffer *rbuf = r300_buffer(transfer->resource); + struct r300_resource *rbuf = r300_resource(transfer->resource); uint8_t *map; - boolean flush = FALSE; - unsigned i; - if (rbuf->user_buffer) - return (uint8_t *) rbuf->user_buffer + transfer->box.x; + if (rbuf->b.user_ptr) + return (uint8_t *) rbuf->b.user_ptr + transfer->box.x; if (rbuf->constant_buffer) return (uint8_t *) rbuf->constant_buffer + transfer->box.x; - /* check if the mapping is to a range we already flushed */ - if (transfer->usage & PIPE_TRANSFER_DISCARD) { - for (i = 0; i < rbuf->num_ranges; i++) { - if ((transfer->box.x >= rbuf->ranges[i].start) && - (transfer->box.x < rbuf->ranges[i].end)) - flush = TRUE; - - if (flush) { - /* unreference this hw buffer and allocate a new one */ - rws->buffer_reference(rws, &rbuf->buf, NULL); - - rbuf->num_ranges = 0; - rbuf->buf = - r300screen->rws->buffer_create(r300screen->rws, - rbuf->b.b.width0, 16, - rbuf->b.b.bind, - rbuf->b.b.usage, - rbuf->domain); - rbuf->cs_buf = - r300screen->rws->buffer_get_cs_handle(r300screen->rws, - rbuf->buf); - break; - } - } - } - - map = rws->buffer_map(rws, rbuf->buf, r300->cs, transfer->usage); + map = rws->buffer_map(rbuf->buf, r300->cs, transfer->usage); if (map == NULL) return NULL; - /* map_buffer() returned a pointer to the beginning of the buffer, - * but transfers are expected to return a pointer to just the - * region specified in the box. - */ return map + transfer->box.x; } -static void r300_buffer_transfer_flush_region( struct pipe_context *pipe, - struct pipe_transfer *transfer, - const struct pipe_box *box) -{ - struct r300_buffer *rbuf = r300_buffer(transfer->resource); - unsigned i; - unsigned offset = transfer->box.x + box->x; - unsigned length = box->width; - - assert(box->x + box->width <= transfer->box.width); - - if (rbuf->user_buffer) - return; - if (rbuf->constant_buffer) - return; - - /* mark the range as used */ - for(i = 0; i < rbuf->num_ranges; ++i) { - if(offset <= rbuf->ranges[i].end && rbuf->ranges[i].start <= (offset+box->width)) { - rbuf->ranges[i].start = MIN2(rbuf->ranges[i].start, offset); - rbuf->ranges[i].end = MAX2(rbuf->ranges[i].end, (offset+length)); - return; - } - } - - rbuf->ranges[rbuf->num_ranges].start = offset; - rbuf->ranges[rbuf->num_ranges].end = offset+length; - rbuf->num_ranges++; -} - static void r300_buffer_transfer_unmap( struct pipe_context *pipe, struct pipe_transfer *transfer ) { struct r300_screen *r300screen = r300_screen(pipe->screen); struct r300_winsys_screen *rws = r300screen->rws; - struct r300_buffer *rbuf = r300_buffer(transfer->resource); + struct r300_resource *rbuf = r300_resource(transfer->resource); if (rbuf->buf) { - rws->buffer_unmap(rws, rbuf->buf); + rws->buffer_unmap(rbuf->buf); } } @@ -278,34 +143,33 @@ static void r300_buffer_transfer_inline_write(struct pipe_context *pipe, unsigned stride, unsigned layer_stride) { - struct r300_buffer *rbuf = r300_buffer(resource); - struct pipe_transfer *transfer = NULL; + struct r300_context *r300 = r300_context(pipe); + struct r300_winsys_screen *rws = r300->screen->rws; + struct r300_resource *rbuf = r300_resource(resource); uint8_t *map = NULL; if (rbuf->constant_buffer) { memcpy(rbuf->constant_buffer + box->x, data, box->width); return; } + assert(rbuf->b.user_ptr == NULL); - transfer = r300_buffer_get_transfer(pipe, resource, 0, - PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, box); - map = r300_buffer_transfer_map(pipe, transfer); + map = rws->buffer_map(rbuf->buf, r300->cs, + PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD | usage); - memcpy(map, data, box->width); + memcpy(map + box->x, data, box->width); - r300_buffer_transfer_unmap(pipe, transfer); - r300_buffer_transfer_destroy(pipe, transfer); + rws->buffer_unmap(rbuf->buf); } -struct u_resource_vtbl r300_buffer_vtbl = +static const struct u_resource_vtbl r300_buffer_vtbl = { - u_default_resource_get_handle, /* get_handle */ + NULL, /* get_handle */ r300_buffer_destroy, /* resource_destroy */ - r300_buffer_is_referenced_by_cs, /* is_buffer_referenced */ r300_buffer_get_transfer, /* get_transfer */ r300_buffer_transfer_destroy, /* transfer_destroy */ r300_buffer_transfer_map, /* transfer_map */ - r300_buffer_transfer_flush_region, /* transfer_flush_region */ + NULL, /* transfer_flush_region */ r300_buffer_transfer_unmap, /* transfer_unmap */ r300_buffer_transfer_inline_write /* transfer_inline_write */ }; @@ -314,73 +178,68 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen, const struct pipe_resource *templ) { struct r300_screen *r300screen = r300_screen(screen); - struct r300_buffer *rbuf; + struct r300_resource *rbuf; unsigned alignment = 16; rbuf = util_slab_alloc(&r300screen->pool_buffers); - rbuf->magic = R300_BUFFER_MAGIC; - - rbuf->b.b = *templ; - rbuf->b.vtbl = &r300_buffer_vtbl; - pipe_reference_init(&rbuf->b.b.reference, 1); - rbuf->b.b.screen = screen; + rbuf->b.b.b = *templ; + rbuf->b.b.vtbl = &r300_buffer_vtbl; + pipe_reference_init(&rbuf->b.b.b.reference, 1); + rbuf->b.b.b.screen = screen; + rbuf->b.user_ptr = NULL; rbuf->domain = R300_DOMAIN_GTT; - rbuf->num_ranges = 0; rbuf->buf = NULL; + rbuf->buf_size = templ->width0; rbuf->constant_buffer = NULL; - rbuf->user_buffer = NULL; /* Alloc constant buffers in RAM. */ if (templ->bind & PIPE_BIND_CONSTANT_BUFFER) { rbuf->constant_buffer = MALLOC(templ->width0); - return &rbuf->b.b; + return &rbuf->b.b.b; } rbuf->buf = r300screen->rws->buffer_create(r300screen->rws, - rbuf->b.b.width0, alignment, - rbuf->b.b.bind, rbuf->b.b.usage, + rbuf->b.b.b.width0, alignment, + rbuf->b.b.b.bind, rbuf->b.b.b.usage, rbuf->domain); - rbuf->cs_buf = - r300screen->rws->buffer_get_cs_handle(r300screen->rws, rbuf->buf); - if (!rbuf->buf) { util_slab_free(&r300screen->pool_buffers, rbuf); return NULL; } - return &rbuf->b.b; + rbuf->cs_buf = + r300screen->rws->buffer_get_cs_handle(rbuf->buf); + + return &rbuf->b.b.b; } struct pipe_resource *r300_user_buffer_create(struct pipe_screen *screen, - void *ptr, - unsigned bytes, + void *ptr, unsigned size, unsigned bind) { struct r300_screen *r300screen = r300_screen(screen); - struct r300_buffer *rbuf; + struct r300_resource *rbuf; rbuf = util_slab_alloc(&r300screen->pool_buffers); - rbuf->magic = R300_BUFFER_MAGIC; - - pipe_reference_init(&rbuf->b.b.reference, 1); - rbuf->b.vtbl = &r300_buffer_vtbl; - rbuf->b.b.screen = screen; - rbuf->b.b.target = PIPE_BUFFER; - rbuf->b.b.format = PIPE_FORMAT_R8_UNORM; - rbuf->b.b.usage = PIPE_USAGE_IMMUTABLE; - rbuf->b.b.bind = bind; - rbuf->b.b.width0 = bytes; - rbuf->b.b.height0 = 1; - rbuf->b.b.depth0 = 1; - rbuf->b.b.array_size = 1; - rbuf->b.b.flags = 0; + pipe_reference_init(&rbuf->b.b.b.reference, 1); + rbuf->b.b.b.screen = screen; + rbuf->b.b.b.target = PIPE_BUFFER; + rbuf->b.b.b.format = PIPE_FORMAT_R8_UNORM; + rbuf->b.b.b.usage = PIPE_USAGE_IMMUTABLE; + rbuf->b.b.b.bind = bind; + rbuf->b.b.b.width0 = ~0; + rbuf->b.b.b.height0 = 1; + rbuf->b.b.b.depth0 = 1; + rbuf->b.b.b.array_size = 1; + rbuf->b.b.b.flags = 0; + rbuf->b.b.vtbl = &r300_buffer_vtbl; + rbuf->b.user_ptr = ptr; rbuf->domain = R300_DOMAIN_GTT; - rbuf->num_ranges = 0; rbuf->buf = NULL; + rbuf->buf_size = size; rbuf->constant_buffer = NULL; - rbuf->user_buffer = ptr; - return &rbuf->b.b; + return &rbuf->b.b.b; } diff --git a/src/gallium/drivers/r300/r300_screen_buffer.h b/src/gallium/drivers/r300/r300_screen_buffer.h index 0b3555dd81..cdbc4425fc 100644 --- a/src/gallium/drivers/r300/r300_screen_buffer.h +++ b/src/gallium/drivers/r300/r300_screen_buffer.h @@ -35,53 +35,19 @@ #include "r300_winsys.h" #include "r300_context.h" -#define R300_BUFFER_MAGIC 0xabcd1234 -#define R300_BUFFER_MAX_RANGES 32 - -struct r300_buffer_range { - uint32_t start; - uint32_t end; -}; - -/* Vertex buffer. */ -struct r300_buffer -{ - struct u_resource b; - - uint32_t magic; - - struct r300_winsys_buffer *buf; - struct r300_winsys_cs_buffer *cs_buf; - - enum r300_buffer_domain domain; - - uint8_t *user_buffer; - uint8_t *constant_buffer; - struct r300_buffer_range ranges[R300_BUFFER_MAX_RANGES]; - unsigned num_ranges; -}; - /* Functions. */ -int r300_upload_user_buffers(struct r300_context *r300); - -int r300_upload_index_buffer(struct r300_context *r300, - struct pipe_resource **index_buffer, - unsigned index_size, - unsigned start, - unsigned count, unsigned *out_offset); +void r300_upload_index_buffer(struct r300_context *r300, + struct pipe_resource **index_buffer, + unsigned index_size, unsigned *start, + unsigned count, uint8_t *ptr); struct pipe_resource *r300_buffer_create(struct pipe_screen *screen, const struct pipe_resource *templ); struct pipe_resource *r300_user_buffer_create(struct pipe_screen *screen, - void *ptr, - unsigned bytes, - unsigned usage); - -unsigned r300_buffer_is_referenced(struct pipe_context *context, - struct pipe_resource *buf, - enum r300_reference_domain domain); + void *ptr, unsigned size, + unsigned bind); /* Inline functions. */ @@ -90,9 +56,4 @@ static INLINE struct r300_buffer *r300_buffer(struct pipe_resource *buffer) return (struct r300_buffer *)buffer; } -static INLINE boolean r300_buffer_is_user_buffer(struct pipe_resource *buffer) -{ - return r300_buffer(buffer)->user_buffer ? true : false; -} - #endif diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 7529253240..ecb4fc691c 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -24,10 +24,12 @@ #include "draw/draw_context.h" #include "util/u_framebuffer.h" +#include "util/u_half.h" #include "util/u_math.h" #include "util/u_mm.h" #include "util/u_memory.h" #include "util/u_pack_color.h" +#include "util/u_transfer.h" #include "tgsi/tgsi_parse.h" @@ -44,7 +46,6 @@ #include "r300_texture.h" #include "r300_vs.h" #include "r300_winsys.h" -#include "r300_hyperz.h" /* r300_state: Functions used to intialize state context by translating * Gallium state objects into semi-native r300 state objects. */ @@ -187,12 +188,16 @@ static void* r300_create_blend_state(struct pipe_context* pipe, struct r300_screen* r300screen = r300_screen(pipe->screen); struct r300_blend_state* blend = CALLOC_STRUCT(r300_blend_state); uint32_t blend_control = 0; /* R300_RB3D_CBLEND: 0x4e04 */ + uint32_t blend_control_noclamp = 0; /* R300_RB3D_CBLEND: 0x4e04 */ uint32_t alpha_blend_control = 0; /* R300_RB3D_ABLEND: 0x4e08 */ + uint32_t alpha_blend_control_noclamp = 0; /* R300_RB3D_ABLEND: 0x4e08 */ uint32_t color_channel_mask = 0; /* R300_RB3D_COLOR_CHANNEL_MASK: 0x4e0c */ uint32_t rop = 0; /* R300_RB3D_ROPCNTL: 0x4e18 */ uint32_t dither = 0; /* R300_RB3D_DITHER_CTL: 0x4e50 */ CB_LOCALS; + blend->state = *state; + if (state->rt[0].blend_enable) { unsigned eqRGB = state->rt[0].rgb_func; @@ -205,10 +210,14 @@ static void* r300_create_blend_state(struct pipe_context* pipe, /* despite the name, ALPHA_BLEND_ENABLE has nothing to do with alpha, * this is just the crappy D3D naming */ - blend_control = R300_ALPHA_BLEND_ENABLE | - r300_translate_blend_function(eqRGB) | + blend_control = blend_control_noclamp = + R300_ALPHA_BLEND_ENABLE | ( r300_translate_blend_factor(srcRGB) << R300_SRC_BLEND_SHIFT) | ( r300_translate_blend_factor(dstRGB) << R300_DST_BLEND_SHIFT); + blend_control |= + r300_translate_blend_function(eqRGB, TRUE); + blend_control_noclamp |= + r300_translate_blend_function(eqRGB, FALSE); /* Optimization: some operations do not require the destination color. * @@ -230,6 +239,7 @@ static void* r300_create_blend_state(struct pipe_context* pipe, srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) { /* Enable reading from the colorbuffer. */ blend_control |= R300_READ_ENABLE; + blend_control_noclamp |= R300_READ_ENABLE; if (r300screen->caps.is_r500) { /* Optimization: Depending on incoming pixels, we can @@ -305,10 +315,14 @@ static void* r300_create_blend_state(struct pipe_context* pipe, /* separate alpha */ if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { blend_control |= R300_SEPARATE_ALPHA_ENABLE; - alpha_blend_control = - r300_translate_blend_function(eqA) | + blend_control_noclamp |= R300_SEPARATE_ALPHA_ENABLE; + alpha_blend_control = alpha_blend_control_noclamp = (r300_translate_blend_factor(srcA) << R300_SRC_BLEND_SHIFT) | (r300_translate_blend_factor(dstA) << R300_DST_BLEND_SHIFT); + alpha_blend_control |= + r300_translate_blend_function(eqA, TRUE); + alpha_blend_control_noclamp |= + r300_translate_blend_function(eqA, FALSE); } } @@ -345,7 +359,7 @@ static void* r300_create_blend_state(struct pipe_context* pipe, */ /* Build a command buffer. */ - BEGIN_CB(blend->cb, 8); + BEGIN_CB(blend->cb_clamp, 8); OUT_CB_REG(R300_RB3D_ROPCNTL, rop); OUT_CB_REG_SEQ(R300_RB3D_CBLEND, 3); OUT_CB(blend_control); @@ -354,6 +368,16 @@ static void* r300_create_blend_state(struct pipe_context* pipe, OUT_CB_REG(R300_RB3D_DITHER_CTL, dither); END_CB; + /* Build a command buffer. */ + BEGIN_CB(blend->cb_noclamp, 8); + OUT_CB_REG(R300_RB3D_ROPCNTL, rop); + OUT_CB_REG_SEQ(R300_RB3D_CBLEND, 3); + OUT_CB(blend_control_noclamp); + OUT_CB(alpha_blend_control_noclamp); + OUT_CB(color_channel_mask); + OUT_CB_REG(R300_RB3D_DITHER_CTL, dither); + END_CB; + /* The same as above, but with no colorbuffer reads and writes. */ BEGIN_CB(blend->cb_no_readwrite, 8); OUT_CB_REG(R300_RB3D_ROPCNTL, rop); @@ -374,6 +398,10 @@ static void r300_bind_blend_state(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); UPDATE_STATE(state, r300->blend_state); + + if (r300->fs.state && r300_pick_fragment_shader(r300)) { + r300_mark_fs_code_dirty(r300); + } } /* Free blend state. */ @@ -395,22 +423,64 @@ static void r300_set_blend_color(struct pipe_context* pipe, const struct pipe_blend_color* color) { struct r300_context* r300 = r300_context(pipe); - struct r300_blend_color_state* state = + struct pipe_framebuffer_state *fb = r300->fb_state.state; + struct r300_blend_color_state *state = (struct r300_blend_color_state*)r300->blend_color_state.state; + struct pipe_blend_color c; + enum pipe_format format = fb->nr_cbufs ? fb->cbufs[0]->format : 0; CB_LOCALS; + state->state = *color; /* Save it, so that we can reuse it in set_fb_state */ + c = *color; + + /* The blend color is dependent on the colorbuffer format. */ + if (fb->nr_cbufs) { + switch (format) { + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_I8_UNORM: + c.color[1] = c.color[0]; + break; + + case PIPE_FORMAT_A8_UNORM: + c.color[1] = c.color[3]; + break; + + case PIPE_FORMAT_R8G8_UNORM: + c.color[2] = c.color[1]; + break; + + case PIPE_FORMAT_L8A8_UNORM: + c.color[2] = c.color[3]; + break; + + default:; + } + } + if (r300->screen->caps.is_r500) { - /* XXX if FP16 blending is enabled, we should use the FP16 format */ BEGIN_CB(state->cb, 3); OUT_CB_REG_SEQ(R500_RB3D_CONSTANT_COLOR_AR, 2); - OUT_CB(float_to_fixed10(color->color[0]) | - (float_to_fixed10(color->color[3]) << 16)); - OUT_CB(float_to_fixed10(color->color[2]) | - (float_to_fixed10(color->color[1]) << 16)); + + switch (format) { + case PIPE_FORMAT_R16G16B16A16_FLOAT: + OUT_CB(util_float_to_half(c.color[2]) | + (util_float_to_half(c.color[3]) << 16)); + OUT_CB(util_float_to_half(c.color[0]) | + (util_float_to_half(c.color[1]) << 16)); + break; + + default: + OUT_CB(float_to_fixed10(c.color[0]) | + (float_to_fixed10(c.color[3]) << 16)); + OUT_CB(float_to_fixed10(c.color[2]) | + (float_to_fixed10(c.color[1]) << 16)); + } + END_CB; } else { union util_color uc; - util_pack_color(color->color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); + util_pack_color(c.color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); BEGIN_CB(state->cb, 2); OUT_CB_REG(R300_RB3D_BLEND_COLOR, uc.ui); @@ -442,8 +512,7 @@ static void r300_set_clip_state(struct pipe_context* pipe, OUT_CB_TABLE(state->ucp, state->nr * 4); } OUT_CB_REG(R300_VAP_CLIP_CNTL, ((1 << state->nr) - 1) | - R300_PS_UCP_MODE_CLIP_AS_TRIFAN | - (state->depth_clamp ? R300_CLIP_DISABLE : 0)); + R300_PS_UCP_MODE_CLIP_AS_TRIFAN); END_CB; r300_mark_atom_dirty(r300, &r300->clip_state); @@ -538,29 +607,54 @@ static void* r300_translate_alpha_function(state->alpha.func) | R300_FG_ALPHA_FUNC_ENABLE; - /* We could use 10bit alpha ref but who needs that? */ dsa->alpha_function |= float_to_ubyte(state->alpha.ref_value); + dsa->alpha_value = util_float_to_half(state->alpha.ref_value); - if (caps->is_r500) + if (caps->is_r500) { + dsa->alpha_function_fp16 = dsa->alpha_function | + R500_FG_ALPHA_FUNC_FP16_ENABLE; dsa->alpha_function |= R500_FG_ALPHA_FUNC_8BIT; + } } - BEGIN_CB(&dsa->cb_begin, 8); + BEGIN_CB(&dsa->cb_begin, 10); OUT_CB_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function); OUT_CB_REG_SEQ(R300_ZB_CNTL, 3); OUT_CB(dsa->z_buffer_control); OUT_CB(dsa->z_stencil_control); OUT_CB(dsa->stencil_ref_mask); OUT_CB_REG(R500_ZB_STENCILREFMASK_BF, dsa->stencil_ref_bf); + OUT_CB_REG(R500_FG_ALPHA_VALUE, dsa->alpha_value); END_CB; - BEGIN_CB(dsa->cb_no_readwrite, 8); + BEGIN_CB(&dsa->cb_begin_fp16, 10); + OUT_CB_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function_fp16); + OUT_CB_REG_SEQ(R300_ZB_CNTL, 3); + OUT_CB(dsa->z_buffer_control); + OUT_CB(dsa->z_stencil_control); + OUT_CB(dsa->stencil_ref_mask); + OUT_CB_REG(R500_ZB_STENCILREFMASK_BF, dsa->stencil_ref_bf); + OUT_CB_REG(R500_FG_ALPHA_VALUE, dsa->alpha_value); + END_CB; + + BEGIN_CB(dsa->cb_zb_no_readwrite, 10); OUT_CB_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function); OUT_CB_REG_SEQ(R300_ZB_CNTL, 3); OUT_CB(0); OUT_CB(0); OUT_CB(0); OUT_CB_REG(R500_ZB_STENCILREFMASK_BF, 0); + OUT_CB_REG(R500_FG_ALPHA_VALUE, dsa->alpha_value); + END_CB; + + BEGIN_CB(dsa->cb_fp16_zb_no_readwrite, 10); + OUT_CB_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function_fp16); + OUT_CB_REG_SEQ(R300_ZB_CNTL, 3); + OUT_CB(0); + OUT_CB(0); + OUT_CB(0); + OUT_CB_REG(R500_ZB_STENCILREFMASK_BF, 0); + OUT_CB_REG(R500_FG_ALPHA_VALUE, dsa->alpha_value); END_CB; return (void*)dsa; @@ -617,21 +711,16 @@ static void r300_set_stencil_ref(struct pipe_context* pipe, } static void r300_tex_set_tiling_flags(struct r300_context *r300, - struct r300_texture *tex, unsigned level) + struct r300_resource *tex, + unsigned level) { /* Check if the macrotile flag needs to be changed. * Skip changing the flags otherwise. */ - if (tex->desc.macrotile[tex->surface_level] != - tex->desc.macrotile[level]) { - /* Tiling determines how DRM treats the buffer data. - * We must flush CS when changing it if the buffer is referenced. */ - if (r300->rws->cs_is_buffer_referenced(r300->cs, - tex->cs_buffer, R300_REF_CS)) - r300->context.flush(&r300->context, 0, NULL); - - r300->rws->buffer_set_tiling(r300->rws, tex->buffer, - tex->desc.microtile, tex->desc.macrotile[level], - tex->desc.stride_in_bytes[0]); + if (tex->tex.macrotile[tex->surface_level] != + tex->tex.macrotile[level]) { + r300->rws->buffer_set_tiling(tex->buf, r300->cs, + tex->tex.microtile, tex->tex.macrotile[level], + tex->tex.stride_in_bytes[0]); tex->surface_level = level; } @@ -646,12 +735,12 @@ static void r300_fb_set_tiling_flags(struct r300_context *r300, /* Set tiling flags for new surfaces. */ for (i = 0; i < state->nr_cbufs; i++) { r300_tex_set_tiling_flags(r300, - r300_texture(state->cbufs[i]->texture), + r300_resource(state->cbufs[i]->texture), state->cbufs[i]->u.tex.level); } if (state->zsbuf) { r300_tex_set_tiling_flags(r300, - r300_texture(state->zsbuf->texture), + r300_resource(state->zsbuf->texture), state->zsbuf->u.tex.level); } } @@ -660,7 +749,7 @@ static void r300_print_fb_surf_info(struct pipe_surface *surf, unsigned index, const char *binding) { struct pipe_resource *tex = surf->texture; - struct r300_texture *rtex = r300_texture(tex); + struct r300_resource *rtex = r300_resource(tex); fprintf(stderr, "r300: %s[%i] Dim: %ix%i, Firstlayer: %i, " @@ -673,9 +762,9 @@ static void r300_print_fb_surf_info(struct pipe_surface *surf, unsigned index, surf->u.tex.first_layer, surf->u.tex.last_layer, surf->u.tex.level, util_format_short_name(surf->format), - rtex->desc.macrotile[0] ? "YES" : " NO", - rtex->desc.microtile ? "YES" : " NO", - rtex->desc.stride_in_pixels[0], + rtex->tex.macrotile[0] ? "YES" : " NO", + rtex->tex.microtile ? "YES" : " NO", + rtex->tex.stride_in_pixels[0], tex->width0, tex->height0, tex->depth0, tex->last_level, util_format_short_name(tex->format)); } @@ -686,13 +775,23 @@ void r300_mark_fb_state_dirty(struct r300_context *r300, struct pipe_framebuffer_state *state = r300->fb_state.state; boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); - /* What is marked as dirty depends on the enum r300_fb_state_change. */ r300_mark_atom_dirty(r300, &r300->gpu_flush); r300_mark_atom_dirty(r300, &r300->fb_state); - r300_mark_atom_dirty(r300, &r300->hyperz_state); + /* What is marked as dirty depends on the enum r300_fb_state_change. */ if (change == R300_CHANGED_FB_STATE) { r300_mark_atom_dirty(r300, &r300->aa_state); + r300_mark_atom_dirty(r300, &r300->dsa_state); /* for AlphaRef */ + r300_set_blend_color(&r300->context, r300->blend_color_state.state); + } + + if (change == R300_CHANGED_FB_STATE || + change == R300_CHANGED_HYPERZ_FLAG) { + r300_mark_atom_dirty(r300, &r300->hyperz_state); + } + + if (change == R300_CHANGED_FB_STATE || + change == R300_CHANGED_MULTIWRITE) { r300_mark_atom_dirty(r300, &r300->fb_state_pipelined); } @@ -704,23 +803,21 @@ void r300_mark_fb_state_dirty(struct r300_context *r300, else if (state->zsbuf) { r300->fb_state.size += 10; if (can_hyperz) - r300->fb_state.size += r300->screen->caps.hiz_ram ? 8 : 4; + r300->fb_state.size += 8; } /* The size of the rest of atoms stays the same. */ } static void - r300_set_framebuffer_state(struct pipe_context* pipe, - const struct pipe_framebuffer_state* state) +r300_set_framebuffer_state(struct pipe_context* pipe, + const struct pipe_framebuffer_state* state) { struct r300_context* r300 = r300_context(pipe); struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state; struct pipe_framebuffer_state *old_state = r300->fb_state.state; - boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); unsigned max_width, max_height, i; uint32_t zbuffer_bpp = 0; - int blocksize; if (r300->screen->caps.is_r500) { max_width = max_height = 4096; @@ -736,10 +833,37 @@ static void return; } - /* If nr_cbufs is changed from zero to non-zero or vice versa... */ - if (!!old_state->nr_cbufs != !!state->nr_cbufs) { - r300_mark_atom_dirty(r300, &r300->blend_state); + if (old_state->zsbuf && r300->zmask_in_use && !r300->hyperz_locked) { + /* There is a zmask in use, what are we gonna do? */ + if (state->zsbuf) { + if (!pipe_surface_equal(old_state->zsbuf, state->zsbuf)) { + /* Decompress the currently bound zbuffer before we bind another one. */ + r300_decompress_zmask(r300); + r300->hiz_in_use = FALSE; + } + } else { + /* We don't bind another zbuffer, so lock the current one. */ + r300->hyperz_locked = TRUE; + pipe_surface_reference(&r300->locked_zbuffer, old_state->zsbuf); + } + } else if (r300->hyperz_locked && r300->locked_zbuffer) { + /* We have a locked zbuffer now, what are we gonna do? */ + if (state->zsbuf) { + if (!pipe_surface_equal(r300->locked_zbuffer, state->zsbuf)) { + /* We are binding some other zbuffer, so decompress the locked one, + * it gets unlocked automatically. */ + r300_decompress_zmask_locked_unsafe(r300); + r300->hiz_in_use = FALSE; + } else { + /* We are binding the locked zbuffer again, so unlock it. */ + r300->hyperz_locked = FALSE; + } + } } + + /* Need to reset clamping or colormask. */ + r300_mark_atom_dirty(r300, &r300->blend_state); + /* If zsbuf is set from NULL to non-NULL or vice versa.. */ if (!!old_state->zsbuf != !!state->zsbuf) { r300_mark_atom_dirty(r300, &r300->dsa_state); @@ -750,14 +874,14 @@ static void util_copy_framebuffer_state(r300->fb_state.state, state); + if (!r300->hyperz_locked) { + pipe_surface_reference(&r300->locked_zbuffer, NULL); + } + r300_mark_fb_state_dirty(r300, R300_CHANGED_FB_STATE); - r300->validate_buffers = TRUE; - r300->z_compression = false; - if (state->zsbuf) { - blocksize = util_format_get_blocksize(state->zsbuf->texture->format); - switch (blocksize) { + switch (util_format_get_blocksize(state->zsbuf->texture->format)) { case 2: zbuffer_bpp = 16; break; @@ -765,31 +889,6 @@ static void zbuffer_bpp = 24; break; } - if (can_hyperz) { - struct r300_surface *zs_surf = r300_surface(state->zsbuf); - struct r300_texture *tex; - int compress = r300->screen->caps.is_rv350 ? RV350_Z_COMPRESS_88 : R300_Z_COMPRESS_44; - int level = zs_surf->base.u.tex.level; - - tex = r300_texture(zs_surf->base.texture); - - /* work out whether we can support hiz on this buffer */ - r300_hiz_alloc_block(r300, zs_surf); - - /* work out whether we can support zmask features on this buffer */ - r300_zmask_alloc_block(r300, zs_surf, compress); - - if (tex->zmask_mem[level]) { - /* compression causes hangs on 16-bit */ - if (zbuffer_bpp == 24) - r300->z_compression = compress; - } - DBG(r300, DBG_HYPERZ, - "hyper-z features: hiz: %d @ %08x z-compression: %d z-fastfill: %d @ %08x\n", tex->hiz_mem[level] ? 1 : 0, - tex->hiz_mem[level] ? tex->hiz_mem[level]->ofs : 0xdeadbeef, - r300->z_compression, tex->zmask_mem[level] ? 1 : 0, - tex->zmask_mem[level] ? tex->zmask_mem[level]->ofs : 0xdeadbeef); - } /* Polygon offset depends on the zbuffer bit depth. */ if (r300->zbuffer_bpp != zbuffer_bpp) { @@ -801,27 +900,25 @@ static void } /* Set up AA config. */ - if (r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0)) { - if (state->nr_cbufs && state->cbufs[0]->texture->nr_samples > 1) { - aa->aa_config = R300_GB_AA_CONFIG_AA_ENABLE; - - switch (state->cbufs[0]->texture->nr_samples) { - case 2: - aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2; - break; - case 3: - aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3; - break; - case 4: - aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4; - break; - case 6: - aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6; - break; - } - } else { - aa->aa_config = 0; + if (state->nr_cbufs && state->cbufs[0]->texture->nr_samples > 1) { + aa->aa_config = R300_GB_AA_CONFIG_AA_ENABLE; + + switch (state->cbufs[0]->texture->nr_samples) { + case 2: + aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2; + break; + case 3: + aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3; + break; + case 4: + aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4; + break; + case 6: + aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6; + break; } + } else { + aa->aa_config = 0; } if (DBG_ON(r300, DBG_FB)) { @@ -876,16 +973,25 @@ static void r300_bind_fs_state(struct pipe_context* pipe, void* shader) { struct r300_context* r300 = r300_context(pipe); struct r300_fragment_shader* fs = (struct r300_fragment_shader*)shader; + struct pipe_framebuffer_state *fb = r300->fb_state.state; + boolean last_multi_write; if (fs == NULL) { r300->fs.state = NULL; return; } + last_multi_write = r300_fragment_shader_writes_all(r300_fs(r300)); + r300->fs.state = fs; r300_pick_fragment_shader(r300); r300_mark_fs_code_dirty(r300); + if (fb->nr_cbufs > 1 && + last_multi_write != r300_fragment_shader_writes_all(fs)) { + r300_mark_fb_state_dirty(r300, R300_CHANGED_MULTIWRITE); + } + r300_mark_atom_dirty(r300, &r300->rs_block_state); /* Will be updated before the emission. */ } @@ -934,12 +1040,14 @@ static void* r300_create_rs_state(struct pipe_context* pipe, uint32_t line_stipple_value; /* R300_GA_LINE_STIPPLE_VALUE: 0x4260 */ uint32_t polygon_mode; /* R300_GA_POLY_MODE: 0x4288 */ uint32_t clip_rule; /* R300_SC_CLIP_RULE: 0x43D0 */ + uint32_t round_mode; /* R300_GA_ROUND_MODE: 0x428c */ /* Point sprites texture coordinates, 0: lower left, 1: upper right */ float point_texcoord_left = 0; /* R300_GA_POINT_S0: 0x4200 */ float point_texcoord_bottom = 0;/* R300_GA_POINT_T0: 0x4204 */ float point_texcoord_right = 1; /* R300_GA_POINT_S1: 0x4208 */ float point_texcoord_top = 0; /* R300_GA_POINT_T1: 0x420c */ + boolean vclamp = TRUE; CB_LOCALS; /* Copy rasterizer state. */ @@ -1062,6 +1170,12 @@ static void* r300_create_rs_state(struct pipe_context* pipe, } } + /* Vertex color clamping. FP20 means no clamping. */ + round_mode = + R300_GA_ROUND_MODE_GEOMETRY_ROUND_NEAREST | + (!vclamp ? (R300_GA_ROUND_MODE_RGB_CLAMP_FP20 | + R300_GA_ROUND_MODE_ALPHA_CLAMP_FP20) : 0); + /* Build the main command buffer. */ BEGIN_CB(rs->cb_main, RS_STATE_MAIN_SIZE); OUT_CB_REG(R300_VAP_CNTL_STATUS, vap_control_status); @@ -1076,6 +1190,7 @@ static void* r300_create_rs_state(struct pipe_context* pipe, OUT_CB_REG(R300_GA_LINE_STIPPLE_CONFIG, line_stipple_config); OUT_CB_REG(R300_GA_LINE_STIPPLE_VALUE, line_stipple_value); OUT_CB_REG(R300_GA_POLY_MODE, polygon_mode); + OUT_CB_REG(R300_GA_ROUND_MODE, round_mode); OUT_CB_REG(R300_SC_CLIP_RULE, clip_rule); OUT_CB_REG_SEQ(R300_GA_POINT_S0, 4); OUT_CB_32F(point_texcoord_left); @@ -1282,7 +1397,7 @@ static void r300_set_fragment_sampler_views(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); struct r300_textures_state* state = (struct r300_textures_state*)r300->textures_state.state; - struct r300_texture *texture; + struct r300_resource *texture; unsigned i, real_num_views = 0, view_index = 0; unsigned tex_units = r300->screen->caps.num_tex_units; boolean dirty_tex = FALSE; @@ -1298,29 +1413,27 @@ static void r300_set_fragment_sampler_views(struct pipe_context* pipe, } for (i = 0; i < count; i++) { - if (&state->sampler_views[i]->base != views[i]) { - pipe_sampler_view_reference( - (struct pipe_sampler_view**)&state->sampler_views[i], - views[i]); + pipe_sampler_view_reference( + (struct pipe_sampler_view**)&state->sampler_views[i], + views[i]); - if (!views[i]) { - continue; - } + if (!views[i]) { + continue; + } - /* A new sampler view (= texture)... */ - dirty_tex = TRUE; + /* A new sampler view (= texture)... */ + dirty_tex = TRUE; - /* Set the texrect factor in the fragment shader. + /* Set the texrect factor in the fragment shader. * Needed for RECT and NPOT fallback. */ - texture = r300_texture(views[i]->texture); - if (texture->desc.is_npot) { - r300_mark_atom_dirty(r300, &r300->fs_rc_constant_state); - } + texture = r300_resource(views[i]->texture); + if (texture->tex.is_npot) { + r300_mark_atom_dirty(r300, &r300->fs_rc_constant_state); + } - state->sampler_views[i]->texcache_region = + state->sampler_views[i]->texcache_region = r300_assign_texture_cache_region(view_index, real_num_views); - view_index++; - } + view_index++; } for (i = count; i < tex_units; i++) { @@ -1334,7 +1447,6 @@ static void r300_set_fragment_sampler_views(struct pipe_context* pipe, state->sampler_view_count = count; r300_mark_atom_dirty(r300, &r300->textures_state); - r300->validate_buffers = TRUE; if (dirty_tex) { r300_mark_atom_dirty(r300, &r300->texture_cache_inval); @@ -1347,7 +1459,7 @@ r300_create_sampler_view(struct pipe_context *pipe, const struct pipe_sampler_view *templ) { struct r300_sampler_view *view = CALLOC_STRUCT(r300_sampler_view); - struct r300_texture *tex = r300_texture(texture); + struct r300_resource *tex = r300_resource(texture); boolean is_r500 = r300_screen(pipe->screen)->caps.is_r500; boolean dxtc_swizzle = r300_screen(pipe->screen)->caps.dxtc_swizzle; @@ -1449,88 +1561,30 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe, const struct pipe_vertex_buffer* buffers) { struct r300_context* r300 = r300_context(pipe); - struct pipe_vertex_buffer *vbo; - unsigned i, max_index = (1 << 24) - 1; - boolean any_user_buffer = FALSE; + unsigned i; struct pipe_vertex_buffer dummy_vb = {0}; /* There must be at least one vertex buffer set, otherwise it locks up. */ if (!count) { dummy_vb.buffer = r300->dummy_vb; - dummy_vb.max_index = r300->dummy_vb->width0 / 4; buffers = &dummy_vb; count = 1; } - if (count == r300->vertex_buffer_count && - memcmp(r300->vertex_buffer, buffers, - sizeof(struct pipe_vertex_buffer) * count) == 0) { - return; - } + u_vbuf_mgr_set_vertex_buffers(r300->vbuf_mgr, count, buffers); if (r300->screen->caps.has_tcl) { /* HW TCL. */ - r300->incompatible_vb_layout = FALSE; - - /* Check if the strides and offsets are aligned to the size of DWORD. */ for (i = 0; i < count; i++) { - if (buffers[i].buffer) { - if (buffers[i].stride % 4 != 0 || - buffers[i].buffer_offset % 4 != 0) { - r300->incompatible_vb_layout = TRUE; - break; - } + if (buffers[i].buffer && + !r300_resource(buffers[i].buffer)->b.user_ptr) { } } - - for (i = 0; i < count; i++) { - /* Why, yes, I AM casting away constness. How did you know? */ - vbo = (struct pipe_vertex_buffer*)&buffers[i]; - - /* Skip NULL buffers */ - if (!buffers[i].buffer) { - continue; - } - - if (r300_buffer_is_user_buffer(vbo->buffer)) { - any_user_buffer = TRUE; - } - - if (vbo->max_index == ~0) { - /* if no VBO stride then only one vertex value so max index is 1 */ - /* should think about converting to VS constants like svga does */ - if (!vbo->stride) - vbo->max_index = 1; - else - vbo->max_index = - (vbo->buffer->width0 - vbo->buffer_offset) / vbo->stride; - } - - max_index = MIN2(vbo->max_index, max_index); - } - - r300->any_user_vbs = any_user_buffer; - r300->vertex_buffer_max_index = max_index; - r300->aos_dirty = TRUE; - r300->validate_buffers = TRUE; + r300->vertex_arrays_dirty = TRUE; } else { /* SW TCL. */ draw_set_vertex_buffers(r300->draw, count, buffers); } - - /* Common code. */ - for (i = 0; i < count; i++) { - /* Reference our buffer. */ - pipe_resource_reference(&r300->vertex_buffer[i].buffer, buffers[i].buffer); - } - for (; i < r300->vertex_buffer_count; i++) { - /* Dereference any old buffers. */ - pipe_resource_reference(&r300->vertex_buffer[i].buffer, NULL); - } - - memcpy(r300->vertex_buffer, buffers, - sizeof(struct pipe_vertex_buffer) * count); - r300->vertex_buffer_count = count; } static void r300_set_index_buffer(struct pipe_context* pipe, @@ -1538,19 +1592,19 @@ static void r300_set_index_buffer(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); - if (ib) { + if (ib && ib->buffer) { + assert(ib->offset % ib->index_size == 0); + pipe_resource_reference(&r300->index_buffer.buffer, ib->buffer); memcpy(&r300->index_buffer, ib, sizeof(r300->index_buffer)); + r300->index_buffer.offset /= r300->index_buffer.index_size; } else { pipe_resource_reference(&r300->index_buffer.buffer, NULL); memset(&r300->index_buffer, 0, sizeof(r300->index_buffer)); } - if (r300->screen->caps.has_tcl) { - r300->validate_buffers = TRUE; - } - else { + if (!r300->screen->caps.has_tcl) { draw_set_index_buffer(r300->draw, ib); } } @@ -1563,17 +1617,11 @@ static void r300_vertex_psc(struct r300_vertex_element_state *velems) enum pipe_format format; unsigned i; - if (velems->count > 16) { - fprintf(stderr, "r300: More than 16 vertex elements are not supported," - " requested %i, using 16.\n", velems->count); - velems->count = 16; - } - /* Vertex shaders have no semantics on their inputs, * so PSC should just route stuff based on the vertex elements, * and not on attrib information. */ for (i = 0; i < velems->count; i++) { - format = velems->hw_format[i]; + format = velems->velem[i].src_format; type = r300_translate_vertex_data_type(format); if (type == R300_INVALID_FORMAT) { @@ -1605,16 +1653,13 @@ static void r300_vertex_psc(struct r300_vertex_element_state *velems) vstream->count = (i >> 1) + 1; } -#define FORMAT_REPLACE(what, withwhat) \ - case PIPE_FORMAT_##what: *format = PIPE_FORMAT_##withwhat; break - static void* r300_create_vertex_elements_state(struct pipe_context* pipe, unsigned count, const struct pipe_vertex_element* attribs) { + struct r300_context *r300 = r300_context(pipe); struct r300_vertex_element_state *velems; unsigned i; - enum pipe_format *format; struct pipe_vertex_element dummy_attrib = {0}; /* R300 Programmable Stream Control (PSC) doesn't support 0 vertex elements. */ @@ -1622,81 +1667,33 @@ static void* r300_create_vertex_elements_state(struct pipe_context* pipe, dummy_attrib.src_format = PIPE_FORMAT_R8G8B8A8_UNORM; attribs = &dummy_attrib; count = 1; + } else if (count > 16) { + fprintf(stderr, "r300: More than 16 vertex elements are not supported," + " requested %i, using 16.\n", count); + count = 16; } - assert(count <= PIPE_MAX_ATTRIBS); velems = CALLOC_STRUCT(r300_vertex_element_state); - if (velems != NULL) { - velems->count = count; - memcpy(velems->velem, attribs, sizeof(struct pipe_vertex_element) * count); - - if (r300_screen(pipe->screen)->caps.has_tcl) { - /* Set the best hw format in case the original format is not - * supported by hw. */ - for (i = 0; i < count; i++) { - velems->hw_format[i] = velems->velem[i].src_format; - format = &velems->hw_format[i]; - - /* This is basically the list of unsupported formats. - * For now we don't care about the alignment, that's going to - * be sorted out after the PSC setup. */ - switch (*format) { - FORMAT_REPLACE(R64_FLOAT, R32_FLOAT); - FORMAT_REPLACE(R64G64_FLOAT, R32G32_FLOAT); - FORMAT_REPLACE(R64G64B64_FLOAT, R32G32B32_FLOAT); - FORMAT_REPLACE(R64G64B64A64_FLOAT, R32G32B32A32_FLOAT); - - FORMAT_REPLACE(R32_UNORM, R32_FLOAT); - FORMAT_REPLACE(R32G32_UNORM, R32G32_FLOAT); - FORMAT_REPLACE(R32G32B32_UNORM, R32G32B32_FLOAT); - FORMAT_REPLACE(R32G32B32A32_UNORM, R32G32B32A32_FLOAT); - - FORMAT_REPLACE(R32_USCALED, R32_FLOAT); - FORMAT_REPLACE(R32G32_USCALED, R32G32_FLOAT); - FORMAT_REPLACE(R32G32B32_USCALED, R32G32B32_FLOAT); - FORMAT_REPLACE(R32G32B32A32_USCALED,R32G32B32A32_FLOAT); - - FORMAT_REPLACE(R32_SNORM, R32_FLOAT); - FORMAT_REPLACE(R32G32_SNORM, R32G32_FLOAT); - FORMAT_REPLACE(R32G32B32_SNORM, R32G32B32_FLOAT); - FORMAT_REPLACE(R32G32B32A32_SNORM, R32G32B32A32_FLOAT); - - FORMAT_REPLACE(R32_SSCALED, R32_FLOAT); - FORMAT_REPLACE(R32G32_SSCALED, R32G32_FLOAT); - FORMAT_REPLACE(R32G32B32_SSCALED, R32G32B32_FLOAT); - FORMAT_REPLACE(R32G32B32A32_SSCALED,R32G32B32A32_FLOAT); - - FORMAT_REPLACE(R32_FIXED, R32_FLOAT); - FORMAT_REPLACE(R32G32_FIXED, R32G32_FLOAT); - FORMAT_REPLACE(R32G32B32_FIXED, R32G32B32_FLOAT); - FORMAT_REPLACE(R32G32B32A32_FIXED, R32G32B32A32_FLOAT); - - default:; - } + if (!velems) + return NULL; - velems->incompatible_layout = - velems->incompatible_layout || - velems->velem[i].src_format != velems->hw_format[i] || - velems->velem[i].src_offset % 4 != 0; - } + velems->count = count; + velems->vmgr_elements = + u_vbuf_mgr_create_vertex_elements(r300->vbuf_mgr, count, attribs, + velems->velem); - /* Now setup PSC. - * The unused components will be replaced by (..., 0, 1). */ - r300_vertex_psc(velems); - - /* Align the formats to the size of DWORD. - * We only care about the blocksizes of the formats since - * swizzles are already set up. - * Also compute the vertex size. */ - for (i = 0; i < count; i++) { - /* This is OK because we check for aligned strides too - * elsewhere. */ - velems->hw_format_size[i] = - align(util_format_get_blocksize(velems->hw_format[i]), 4); - velems->vertex_size_dwords += velems->hw_format_size[i] / 4; - } + if (r300_screen(pipe->screen)->caps.has_tcl) { + /* Setup PSC. + * The unused components will be replaced by (..., 0, 1). */ + r300_vertex_psc(velems); + + for (i = 0; i < count; i++) { + velems->format_size[i] = + align(util_format_get_blocksize(velems->velem[i].src_format), 4); + velems->vertex_size_dwords += velems->format_size[i] / 4; } } + return velems; } @@ -1712,6 +1709,8 @@ static void r300_bind_vertex_elements_state(struct pipe_context *pipe, r300->velems = velems; + u_vbuf_mgr_bind_vertex_elements(r300->vbuf_mgr, state, velems->vmgr_elements); + if (r300->draw) { draw_set_vertex_elements(r300->draw, velems->count, velems->velem); return; @@ -1719,12 +1718,16 @@ static void r300_bind_vertex_elements_state(struct pipe_context *pipe, UPDATE_STATE(&velems->vertex_stream, r300->vertex_stream_state); r300->vertex_stream_state.size = (1 + velems->vertex_stream.count) * 2; - r300->aos_dirty = TRUE; + r300->vertex_arrays_dirty = TRUE; } static void r300_delete_vertex_elements_state(struct pipe_context *pipe, void *state) { - FREE(state); + struct r300_context *r300 = r300_context(pipe); + struct r300_vertex_element_state *velems = state; + + u_vbuf_mgr_destroy_vertex_elements(r300->vbuf_mgr, velems->vmgr_elements); + FREE(state); } static void* r300_create_vs_state(struct pipe_context* pipe, @@ -1811,6 +1814,7 @@ static void r300_set_constant_buffer(struct pipe_context *pipe, { struct r300_context* r300 = r300_context(pipe); struct r300_constant_buffer *cbuf; + struct r300_resource *rbuf = r300_resource(buf); uint32_t *mapped; switch (shader) { @@ -1824,14 +1828,18 @@ static void r300_set_constant_buffer(struct pipe_context *pipe, return; } - if (buf == NULL || buf->width0 == 0 || - (mapped = (uint32_t*)r300_buffer(buf)->constant_buffer) == NULL) { + if (buf == NULL || buf->width0 == 0) + return; + + if (rbuf->b.user_ptr) + mapped = (uint32_t*)rbuf->b.user_ptr; + else if (rbuf->constant_buffer) + mapped = (uint32_t*)rbuf->constant_buffer; + else return; - } if (shader == PIPE_SHADER_FRAGMENT || (shader == PIPE_SHADER_VERTEX && r300->screen->caps.has_tcl)) { - assert((buf->width0 % (4 * sizeof(float))) == 0); cbuf->ptr = mapped; } @@ -1862,6 +1870,14 @@ static void r300_set_constant_buffer(struct pipe_context *pipe, } } +static void r300_texture_barrier(struct pipe_context *pipe) +{ + struct r300_context *r300 = r300_context(pipe); + + r300_mark_atom_dirty(r300, &r300->gpu_flush); + r300_mark_atom_dirty(r300, &r300->texture_cache_inval); +} + void r300_init_state_functions(struct r300_context* r300) { r300->context.create_blend_state = r300_create_blend_state; @@ -1908,6 +1924,7 @@ void r300_init_state_functions(struct r300_context* r300) r300->context.set_vertex_buffers = r300_set_vertex_buffers; r300->context.set_index_buffer = r300_set_index_buffer; + r300->context.redefine_user_buffer = u_default_redefine_user_buffer; r300->context.create_vertex_elements_state = r300_create_vertex_elements_state; r300->context.bind_vertex_elements_state = r300_bind_vertex_elements_state; @@ -1916,4 +1933,6 @@ void r300_init_state_functions(struct r300_context* r300) r300->context.create_vs_state = r300_create_vs_state; r300->context.bind_vs_state = r300_bind_vs_state; r300->context.delete_vs_state = r300_delete_vs_state; + + r300->context.texture_barrier = r300_texture_barrier; } diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index d5fc8ece25..ec00e2552c 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -29,10 +29,8 @@ #include "r300_context.h" #include "r300_fs.h" -#include "r300_hyperz.h" #include "r300_screen.h" #include "r300_shader_semantics.h" -#include "r300_state_derived.h" #include "r300_state_inlines.h" #include "r300_texture.h" #include "r300_vs.h" @@ -490,7 +488,8 @@ static void r300_update_rs_block(struct r300_context *r300) for (; i < ATTR_GENERIC_COUNT; i++) { if (fs_inputs->generic[i] != ATTR_UNUSED) { fprintf(stderr, "r300: ERROR: FS input generic %i unassigned, " - "not enough hardware slots.\n", i); + "not enough hardware slots (it's not a bug, do not " + "report it).\n", i); } } @@ -525,7 +524,8 @@ static void r300_update_rs_block(struct r300_context *r300) DBG(r300, DBG_RS, "r300: FS input fog unassigned.\n"); } else { fprintf(stderr, "r300: ERROR: FS input fog unassigned, " - "not enough hardware slots.\n"); + "not enough hardware slots. (it's not a bug, " + "do not report it)\n"); } } } @@ -552,7 +552,8 @@ static void r300_update_rs_block(struct r300_context *r300) } else { if (fs_inputs->wpos != ATTR_UNUSED && tex_count >= 8) { fprintf(stderr, "r300: ERROR: FS input WPOS unassigned, " - "not enough hardware slots.\n"); + "not enough hardware slots. (it's not a bug, do not " + "report it)\n"); } } @@ -640,11 +641,36 @@ static uint32_t r300_get_border_color(enum pipe_format format, /* Compressed formats. */ if (util_format_is_compressed(format)) { - util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_UNORM, &uc); - return uc.ui; + switch (format) { + case PIPE_FORMAT_RGTC1_SNORM: + case PIPE_FORMAT_RGTC1_UNORM: + case PIPE_FORMAT_LATC1_SNORM: + case PIPE_FORMAT_LATC1_UNORM: + /* Add 1/32 to round the border color instead of truncating. */ + /* The Y component is used for the border color. */ + border_swizzled[1] = border_swizzled[2] + 1.0f/32; + util_pack_color(border_swizzled, PIPE_FORMAT_B4G4R4A4_UNORM, &uc); + return uc.ui; + case PIPE_FORMAT_RGTC2_SNORM: + case PIPE_FORMAT_LATC2_SNORM: + border_swizzled[0] = border_swizzled[2]; + util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_SNORM, &uc); + return uc.ui; + case PIPE_FORMAT_RGTC2_UNORM: + case PIPE_FORMAT_LATC2_UNORM: + util_pack_color(border_swizzled, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); + return uc.ui; + default: + util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_UNORM, &uc); + return uc.ui; + } } switch (desc->channel[0].size) { + case 2: + util_pack_color(border_swizzled, PIPE_FORMAT_B2G3R3_UNORM, &uc); + break; + case 4: util_pack_color(border_swizzled, PIPE_FORMAT_B4G4R4A4_UNORM, &uc); break; @@ -671,7 +697,20 @@ static uint32_t r300_get_border_color(enum pipe_format format, case 16: if (desc->nr_channels <= 2) { border_swizzled[0] = border_swizzled[2]; - util_pack_color(border_swizzled, PIPE_FORMAT_R16G16_UNORM, &uc); + if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT) { + util_pack_color(border_swizzled, PIPE_FORMAT_R16G16_FLOAT, &uc); + } else { + util_pack_color(border_swizzled, PIPE_FORMAT_R16G16_UNORM, &uc); + } + } else { + util_pack_color(border_swizzled, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); + } + break; + + case 32: + if (desc->nr_channels == 1) { + border_swizzled[0] = border_swizzled[2]; + util_pack_color(border_swizzled, PIPE_FORMAT_R32_FLOAT, &uc); } else { util_pack_color(border_swizzled, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); } @@ -681,6 +720,25 @@ static uint32_t r300_get_border_color(enum pipe_format format, return uc.ui; } +static boolean util_format_is_float(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + unsigned i; + + if (!format) + return FALSE; + + /* Find the first non-void channel. */ + for (i = 0; i < 4; i++) + if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) + break; + + if (i == 4) + return FALSE; + + return desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT ? TRUE : FALSE; +} + static void r300_merge_textures_and_samplers(struct r300_context* r300) { struct r300_textures_state *state = @@ -688,7 +746,7 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) struct r300_texture_sampler_state *texstate; struct r300_sampler_state *sampler; struct r300_sampler_view *view; - struct r300_texture *tex; + struct r300_resource *tex; unsigned min_level, max_level, i, j, size; unsigned count = MIN2(state->sampler_view_count, state->sampler_state_count); @@ -706,7 +764,7 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) state->tx_enable |= 1 << i; view = state->sampler_views[i]; - tex = r300_texture(view->base.texture); + tex = r300_resource(view->base.texture); sampler = state->sampler_states[i]; texstate = &state->regs[i]; @@ -722,32 +780,37 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) /* determine min/max levels */ max_level = MIN3(sampler->max_lod + view->base.u.tex.first_level, - tex->desc.b.b.last_level, view->base.u.tex.last_level); + tex->b.b.b.last_level, view->base.u.tex.last_level); min_level = MIN2(sampler->min_lod + view->base.u.tex.first_level, max_level); - if (tex->desc.is_npot && min_level > 0) { + if (tex->tex.is_npot && min_level > 0) { /* Even though we do not implement mipmapping for NPOT * textures, we should at least honor the minimum level * which is allowed to be displayed. We do this by setting up - * an i-th mipmap level as the zero level. */ - r300_texture_setup_format_state(r300->screen, &tex->desc, + * the i-th mipmap level as the zero level. */ + unsigned offset = tex->tex_offset + + tex->tex.offset_in_bytes[min_level]; + + r300_texture_setup_format_state(r300->screen, tex, min_level, &texstate->format); - texstate->format.tile_config |= - tex->desc.offset_in_bytes[min_level] & 0xffffffe0; - assert((tex->desc.offset_in_bytes[min_level] & 0x1f) == 0); + texstate->format.tile_config |= offset & 0xffffffe0; + assert((offset & 0x1f) == 0); + } else { + texstate->format.tile_config |= tex->tex_offset & 0xffffffe0; + assert((tex->tex_offset & 0x1f) == 0); } /* Assign a texture cache region. */ texstate->format.format1 |= view->texcache_region; /* Depth textures are kinda special. */ - if (util_format_is_depth_or_stencil(tex->desc.b.b.format)) { + if (util_format_is_depth_or_stencil(tex->b.b.b.format)) { unsigned char depth_swizzle[4]; if (!r300->screen->caps.is_r500 && - util_format_get_blocksizebits(tex->desc.b.b.format) == 32) { + util_format_get_blocksizebits(tex->b.b.b.format) == 32) { /* X24x8 is sampled as Y16X16 on r3xx-r4xx. * The depth here is at the Y component. */ for (j = 0; j < 4; j++) @@ -772,17 +835,17 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) } if (r300->screen->caps.dxtc_swizzle && - util_format_is_compressed(tex->desc.b.b.format)) { + util_format_is_compressed(tex->b.b.b.format)) { texstate->filter1 |= R400_DXTC_SWIZZLE_ENABLE; } /* to emulate 1D textures through 2D ones correctly */ - if (tex->desc.b.b.target == PIPE_TEXTURE_1D) { + if (tex->b.b.b.target == PIPE_TEXTURE_1D) { texstate->filter0 &= ~R300_TX_WRAP_T_MASK; texstate->filter0 |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE); } - if (tex->desc.is_npot) { + if (tex->tex.is_npot) { /* NPOT textures don't support mip filter, unfortunately. * This prevents incorrect rendering. */ texstate->filter0 &= ~R300_TX_MIN_FILTER_MIP_MASK; @@ -811,6 +874,32 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) texstate->filter0 |= R300_TX_MAX_MIP_LEVEL(min_level); } + /* Float textures only support nearest and mip-nearest filtering. */ + if (util_format_is_float(tex->b.b.b.format)) { + /* No MAG linear filtering. */ + if ((texstate->filter0 & R300_TX_MAG_FILTER_MASK) == + R300_TX_MAG_FILTER_LINEAR) { + texstate->filter0 &= ~R300_TX_MAG_FILTER_MASK; + texstate->filter0 |= R300_TX_MAG_FILTER_NEAREST; + } + /* No MIN linear filtering. */ + if ((texstate->filter0 & R300_TX_MIN_FILTER_MASK) == + R300_TX_MIN_FILTER_LINEAR) { + texstate->filter0 &= ~R300_TX_MIN_FILTER_MASK; + texstate->filter0 |= R300_TX_MIN_FILTER_NEAREST; + } + /* No mipmap linear filtering. */ + if ((texstate->filter0 & R300_TX_MIN_FILTER_MIP_MASK) == + R300_TX_MIN_FILTER_MIP_LINEAR) { + texstate->filter0 &= ~R300_TX_MIN_FILTER_MIP_MASK; + texstate->filter0 |= R300_TX_MIN_FILTER_MIP_NEAREST; + } + /* No anisotropic filtering. */ + texstate->filter0 &= ~R300_TX_MAX_ANISO_MASK; + texstate->filter1 &= ~R500_TX_MAX_ANISO_MASK; + texstate->filter1 &= ~R500_TX_ANISO_HIGH_QUALITY; + } + texstate->filter0 |= i << 28; size += 16; @@ -859,44 +948,35 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) } } -/* We can't use compressed zbuffers as samplers. */ -static void r300_flush_depth_textures(struct r300_context *r300) +static void r300_decompress_depth_textures(struct r300_context *r300) { struct r300_textures_state *state = (struct r300_textures_state*)r300->textures_state.state; - unsigned i, level; + struct pipe_resource *tex; unsigned count = MIN2(state->sampler_view_count, state->sampler_state_count); + unsigned i; - if (r300->z_decomp_rd) + if (!r300->hyperz_locked || !r300->locked_zbuffer) { return; + } - for (i = 0; i < count; i++) + for (i = 0; i < count; i++) { if (state->sampler_views[i] && state->sampler_states[i]) { - struct pipe_resource *tex = state->sampler_views[i]->base.texture; - - if (tex->target == PIPE_TEXTURE_3D || - tex->target == PIPE_TEXTURE_CUBE) - continue; - - /* Ignore non-depth textures. - * Also ignore reinterpreted depth textures, e.g. resource_copy. */ - if (!util_format_is_depth_or_stencil(tex->format)) - continue; - - for (level = 0; level <= tex->last_level; level++) - if (r300_texture(tex)->zmask_in_use[level]) { - /* We don't handle 3D textures and cubemaps yet. */ - r300_flush_depth_stencil(&r300->context, tex, level, 0); - } + tex = state->sampler_views[i]->base.texture; + + if (tex == r300->locked_zbuffer->texture) { + r300_decompress_zmask_locked(r300); + return; + } } + } } void r300_update_derived_state(struct r300_context* r300) { - r300_flush_depth_textures(r300); - if (r300->textures_state.dirty) { + r300_decompress_depth_textures(r300); r300_merge_textures_and_samplers(r300); } diff --git a/src/gallium/drivers/r300/r300_state_derived.h b/src/gallium/drivers/r300/r300_state_derived.h deleted file mode 100644 index 71a4a47b00..0000000000 --- a/src/gallium/drivers/r300/r300_state_derived.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#ifndef R300_STATE_DERIVED_H -#define R300_STATE_DERIVED_H - -struct r300_context; - -void r300_update_derived_state(struct r300_context* r300); - -#endif /* R300_STATE_DERIVED_H */ diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h index 7e501221b1..54dae1acd9 100644 --- a/src/gallium/drivers/r300/r300_state_inlines.h +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -25,13 +25,9 @@ #define R300_STATE_INLINES_H #include "draw/draw_vertex.h" - #include "pipe/p_format.h" - #include "util/u_format.h" - #include "r300_reg.h" - #include <stdio.h> /* Some maths. These should probably find their way to u_math, if needed. */ @@ -42,23 +38,24 @@ static INLINE int pack_float_16_6x(float f) { /* Blend state. */ -static INLINE uint32_t r300_translate_blend_function(int blend_func) +static INLINE uint32_t r300_translate_blend_function(int blend_func, + boolean clamp) { switch (blend_func) { - case PIPE_BLEND_ADD: - return R300_COMB_FCN_ADD_CLAMP; - case PIPE_BLEND_SUBTRACT: - return R300_COMB_FCN_SUB_CLAMP; - case PIPE_BLEND_REVERSE_SUBTRACT: - return R300_COMB_FCN_RSUB_CLAMP; - case PIPE_BLEND_MIN: - return R300_COMB_FCN_MIN; - case PIPE_BLEND_MAX: - return R300_COMB_FCN_MAX; - default: - fprintf(stderr, "r300: Unknown blend function %d\n", blend_func); - assert(0); - break; + case PIPE_BLEND_ADD: + return clamp ? R300_COMB_FCN_ADD_CLAMP : R300_COMB_FCN_ADD_NOCLAMP; + case PIPE_BLEND_SUBTRACT: + return clamp ? R300_COMB_FCN_SUB_CLAMP : R300_COMB_FCN_SUB_NOCLAMP; + case PIPE_BLEND_REVERSE_SUBTRACT: + return clamp ? R300_COMB_FCN_RSUB_CLAMP : R300_COMB_FCN_RSUB_NOCLAMP; + case PIPE_BLEND_MIN: + return R300_COMB_FCN_MIN; + case PIPE_BLEND_MAX: + return R300_COMB_FCN_MAX; + default: + fprintf(stderr, "r300: Unknown blend function %d\n", blend_func); + assert(0); + break; } return 0; } @@ -341,24 +338,6 @@ static INLINE uint32_t r500_anisotropy(unsigned max_aniso) R500_TX_ANISO_HIGH_QUALITY; } -/* Non-CSO state. (For now.) */ - -static INLINE uint32_t r300_translate_gb_pipes(int pipe_count) -{ - switch (pipe_count) { - case 1: - return R300_GB_TILE_PIPE_COUNT_RV300; - case 2: - return R300_GB_TILE_PIPE_COUNT_R300; - case 3: - return R300_GB_TILE_PIPE_COUNT_R420_3P; - case 4: - return R300_GB_TILE_PIPE_COUNT_R420; - } - return 0; -} - - /* Translate pipe_formats into PSC vertex types. */ static INLINE uint16_t r300_translate_vertex_data_type(enum pipe_format format) { diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 70fc5d96d8..c650fb7ed3 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -171,8 +171,18 @@ uint32_t r300_translate_texformat(enum pipe_format format, } } - result |= r300_get_swizzle_combined(desc->swizzle, swizzle_view, - util_format_is_compressed(format) && dxtc_swizzle); + if (util_format_is_compressed(format) && + dxtc_swizzle && + format != PIPE_FORMAT_RGTC2_UNORM && + format != PIPE_FORMAT_RGTC2_SNORM && + format != PIPE_FORMAT_LATC2_UNORM && + format != PIPE_FORMAT_LATC2_SNORM) { + result |= r300_get_swizzle_combined(desc->swizzle, swizzle_view, + TRUE); + } else { + result |= r300_get_swizzle_combined(desc->swizzle, swizzle_view, + FALSE); + } /* S3TC formats. */ if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { @@ -197,10 +207,25 @@ uint32_t r300_translate_texformat(enum pipe_format format, } } - /* Add sign. */ - for (i = 0; i < desc->nr_channels; i++) { - if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { - result |= sign_bit[i]; + /* RGTC formats. */ + if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { + switch (format) { + case PIPE_FORMAT_RGTC1_SNORM: + case PIPE_FORMAT_LATC1_SNORM: + result |= sign_bit[1]; + case PIPE_FORMAT_LATC1_UNORM: + case PIPE_FORMAT_RGTC1_UNORM: + return R500_TX_FORMAT_ATI1N | result; + + case PIPE_FORMAT_RGTC2_SNORM: + case PIPE_FORMAT_LATC2_SNORM: + result |= sign_bit[2] | sign_bit[3]; + case PIPE_FORMAT_RGTC2_UNORM: + case PIPE_FORMAT_LATC2_UNORM: + return R400_TX_FORMAT_ATI2N | result; + + default: + return ~0; /* Unsupported/unknown. */ } } @@ -211,17 +236,10 @@ uint32_t r300_translate_texformat(enum pipe_format format, return R300_TX_FORMAT_CxV8U8 | result; } - /* RGTC formats. */ - if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { - switch (format) { - case PIPE_FORMAT_RGTC1_UNORM: - case PIPE_FORMAT_RGTC1_SNORM: - return R500_TX_FORMAT_ATI1N | result; - case PIPE_FORMAT_RGTC2_UNORM: - case PIPE_FORMAT_RGTC2_SNORM: - return R400_TX_FORMAT_ATI2N | result; - default: - return ~0; /* Unsupported/unknown. */ + /* Add sign. */ + for (i = 0; i < desc->nr_channels; i++) { + if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { + result |= sign_bit[i]; } } @@ -244,6 +262,11 @@ uint32_t r300_translate_texformat(enum pipe_format format, desc->channel[2].size == 6) { return R300_TX_FORMAT_Z6Y5X5 | result; } + if (desc->channel[0].size == 2 && + desc->channel[1].size == 3 && + desc->channel[2].size == 3) { + return R300_TX_FORMAT_Z3Y3X2 | result; + } return ~0; /* Unsupported/unknown. */ case 4: @@ -348,6 +371,8 @@ uint32_t r500_tx_format_msb_bit(enum pipe_format format) switch (format) { case PIPE_FORMAT_RGTC1_UNORM: case PIPE_FORMAT_RGTC1_SNORM: + case PIPE_FORMAT_LATC1_UNORM: + case PIPE_FORMAT_LATC1_SNORM: case PIPE_FORMAT_X8Z24_UNORM: case PIPE_FORMAT_S8_USCALED_Z24_UNORM: return R500_TXFORMAT_MSB; @@ -365,14 +390,18 @@ static uint32_t r300_translate_colorformat(enum pipe_format format) switch (format) { /* 8-bit buffers. */ case PIPE_FORMAT_A8_UNORM: + /*case PIPE_FORMAT_A8_SNORM:*/ case PIPE_FORMAT_I8_UNORM: + /*case PIPE_FORMAT_I8_SNORM:*/ case PIPE_FORMAT_L8_UNORM: + /*case PIPE_FORMAT_L8_SNORM:*/ case PIPE_FORMAT_R8_UNORM: case PIPE_FORMAT_R8_SNORM: return R300_COLOR_FORMAT_I8; /* 16-bit buffers. */ case PIPE_FORMAT_L8A8_UNORM: + /*case PIPE_FORMAT_L8A8_SNORM:*/ case PIPE_FORMAT_R8G8_UNORM: case PIPE_FORMAT_R8G8_SNORM: return R300_COLOR_FORMAT_UV88; @@ -390,13 +419,21 @@ static uint32_t r300_translate_colorformat(enum pipe_format format) /* 32-bit buffers. */ case PIPE_FORMAT_B8G8R8A8_UNORM: + /*case PIPE_FORMAT_B8G8R8A8_SNORM:*/ case PIPE_FORMAT_B8G8R8X8_UNORM: + /*case PIPE_FORMAT_B8G8R8X8_SNORM:*/ case PIPE_FORMAT_A8R8G8B8_UNORM: + /*case PIPE_FORMAT_A8R8G8B8_SNORM:*/ case PIPE_FORMAT_X8R8G8B8_UNORM: + /*case PIPE_FORMAT_X8R8G8B8_SNORM:*/ case PIPE_FORMAT_A8B8G8R8_UNORM: + /*case PIPE_FORMAT_A8B8G8R8_SNORM:*/ + case PIPE_FORMAT_R8G8B8A8_UNORM: case PIPE_FORMAT_R8G8B8A8_SNORM: case PIPE_FORMAT_X8B8G8R8_UNORM: + /*case PIPE_FORMAT_X8B8G8R8_SNORM:*/ case PIPE_FORMAT_R8G8B8X8_UNORM: + /*case PIPE_FORMAT_R8G8B8X8_SNORM:*/ case PIPE_FORMAT_R8SG8SB8UX8U_NORM: return R300_COLOR_FORMAT_ARGB8888; @@ -481,6 +518,8 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) } else { if (desc->channel[i].size == 16) { modifier |= R300_US_OUT_FMT_C4_16; + } else if (desc->channel[i].size == 10) { + modifier |= R300_US_OUT_FMT_C4_10; } else { /* C4_8 seems to be used for the formats whose pixel size * is <= 32 bits. */ @@ -499,9 +538,12 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) /* 8-bit outputs, one channel. * COLORFORMAT_I8 stores the C2 component. */ case PIPE_FORMAT_A8_UNORM: + /*case PIPE_FORMAT_A8_SNORM:*/ return modifier | R300_C2_SEL_A; case PIPE_FORMAT_I8_UNORM: + /*case PIPE_FORMAT_I8_SNORM:*/ case PIPE_FORMAT_L8_UNORM: + /*case PIPE_FORMAT_L8_SNORM:*/ case PIPE_FORMAT_R8_UNORM: case PIPE_FORMAT_R8_SNORM: return modifier | R300_C2_SEL_R; @@ -509,6 +551,7 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) /* 16-bit outputs, two channels. * COLORFORMAT_UV88 stores C2 and C0. */ case PIPE_FORMAT_L8A8_UNORM: + /*case PIPE_FORMAT_L8A8_SNORM:*/ return modifier | R300_C0_SEL_A | R300_C2_SEL_R; case PIPE_FORMAT_R8G8_UNORM: case PIPE_FORMAT_R8G8_SNORM: @@ -521,7 +564,9 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) case PIPE_FORMAT_B4G4R4A4_UNORM: case PIPE_FORMAT_B4G4R4X4_UNORM: case PIPE_FORMAT_B8G8R8A8_UNORM: + /*case PIPE_FORMAT_B8G8R8A8_SNORM:*/ case PIPE_FORMAT_B8G8R8X8_UNORM: + /*case PIPE_FORMAT_B8G8R8X8_SNORM:*/ case PIPE_FORMAT_B10G10R10A2_UNORM: return modifier | R300_C0_SEL_B | R300_C1_SEL_G | @@ -529,20 +574,26 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) /* ARGB outputs. */ case PIPE_FORMAT_A8R8G8B8_UNORM: + /*case PIPE_FORMAT_A8R8G8B8_SNORM:*/ case PIPE_FORMAT_X8R8G8B8_UNORM: + /*case PIPE_FORMAT_X8R8G8B8_SNORM:*/ return modifier | R300_C0_SEL_A | R300_C1_SEL_R | R300_C2_SEL_G | R300_C3_SEL_B; /* ABGR outputs. */ case PIPE_FORMAT_A8B8G8R8_UNORM: + /*case PIPE_FORMAT_A8B8G8R8_SNORM:*/ case PIPE_FORMAT_X8B8G8R8_UNORM: + /*case PIPE_FORMAT_X8B8G8R8_SNORM:*/ return modifier | R300_C0_SEL_A | R300_C1_SEL_B | R300_C2_SEL_G | R300_C3_SEL_R; /* RGBA outputs. */ case PIPE_FORMAT_R8G8B8X8_UNORM: + /*case PIPE_FORMAT_R8G8B8X8_SNORM:*/ + case PIPE_FORMAT_R8G8B8A8_UNORM: case PIPE_FORMAT_R8G8B8A8_SNORM: case PIPE_FORMAT_R8SG8SB8UX8U_NORM: case PIPE_FORMAT_R10G10B10A2_UNORM: @@ -578,11 +629,12 @@ boolean r300_is_sampler_format_supported(enum pipe_format format) } void r300_texture_setup_format_state(struct r300_screen *screen, - struct r300_texture_desc *desc, + struct r300_resource *tex, unsigned level, struct r300_texture_format_state *out) { - struct pipe_resource *pt = &desc->b.b; + struct pipe_resource *pt = &tex->b.b.b; + struct r300_texture_desc *desc = &tex->tex; boolean is_r500 = screen->caps.is_r500; /* Mask out all the fields we change. */ @@ -625,163 +677,147 @@ void r300_texture_setup_format_state(struct r300_screen *screen, R300_TXO_MICRO_TILE(desc->microtile); } -static void r300_texture_setup_fb_state(struct r300_screen* screen, - struct r300_texture* tex) +static void r300_texture_setup_fb_state(struct r300_surface *surf) { - unsigned i; + struct r300_resource *tex = r300_resource(surf->base.texture); + unsigned level = surf->base.u.tex.level; /* Set framebuffer state. */ - if (util_format_is_depth_or_stencil(tex->desc.b.b.format)) { - for (i = 0; i <= tex->desc.b.b.last_level; i++) { - tex->fb_state.pitch[i] = - tex->desc.stride_in_pixels[i] | - R300_DEPTHMACROTILE(tex->desc.macrotile[i]) | - R300_DEPTHMICROTILE(tex->desc.microtile); - } - tex->fb_state.format = r300_translate_zsformat(tex->desc.b.b.format); + if (util_format_is_depth_or_stencil(surf->base.format)) { + surf->pitch = + tex->tex.stride_in_pixels[level] | + R300_DEPTHMACROTILE(tex->tex.macrotile[level]) | + R300_DEPTHMICROTILE(tex->tex.microtile); + surf->format = r300_translate_zsformat(surf->base.format); + surf->pitch_zmask = tex->tex.zmask_stride_in_pixels[level]; + surf->pitch_hiz = tex->tex.hiz_stride_in_pixels[level]; } else { - for (i = 0; i <= tex->desc.b.b.last_level; i++) { - tex->fb_state.pitch[i] = - tex->desc.stride_in_pixels[i] | - r300_translate_colorformat(tex->desc.b.b.format) | - R300_COLOR_TILE(tex->desc.macrotile[i]) | - R300_COLOR_MICROTILE(tex->desc.microtile); - } - tex->fb_state.format = r300_translate_out_fmt(tex->desc.b.b.format); + surf->pitch = + tex->tex.stride_in_pixels[level] | + r300_translate_colorformat(surf->base.format) | + R300_COLOR_TILE(tex->tex.macrotile[level]) | + R300_COLOR_MICROTILE(tex->tex.microtile); + surf->format = r300_translate_out_fmt(surf->base.format); } } -void r300_texture_reinterpret_format(struct pipe_screen *screen, +boolean r300_resource_set_properties(struct pipe_screen *screen, struct pipe_resource *tex, - enum pipe_format new_format) + unsigned offset, + const struct pipe_resource *new_properties) { - struct r300_screen *r300screen = r300_screen(screen); + struct r300_screen *rscreen = r300_screen(screen); + struct r300_resource *res = r300_resource(tex); - SCREEN_DBG(r300screen, DBG_TEX, - "r300: texture_reinterpret_format: %s -> %s\n", + SCREEN_DBG(rscreen, DBG_TEX, + "r300: texture_set_properties: %s -> %s\n", util_format_short_name(tex->format), - util_format_short_name(new_format)); - - tex->format = new_format; - - r300_texture_setup_fb_state(r300_screen(screen), r300_texture(tex)); -} - -static unsigned r300_texture_is_referenced(struct pipe_context *context, - struct pipe_resource *texture, - unsigned level, int layer) -{ - struct r300_context *r300 = r300_context(context); - struct r300_texture *rtex = (struct r300_texture *)texture; + util_format_short_name(new_properties->format)); - if (r300->rws->cs_is_buffer_referenced(r300->cs, - rtex->cs_buffer, R300_REF_CS)) - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; + if (!r300_texture_desc_init(rscreen, res, new_properties)) { + fprintf(stderr, "r300: ERROR: Cannot set texture properties.\n"); + return FALSE; + } + res->tex_offset = offset; + r300_texture_setup_format_state(rscreen, res, 0, &res->tx_format); - return PIPE_UNREFERENCED; + return TRUE; } static void r300_texture_destroy(struct pipe_screen *screen, struct pipe_resource* texture) { - struct r300_texture* tex = (struct r300_texture*)texture; - struct r300_winsys_screen *rws = (struct r300_winsys_screen *)texture->screen->winsys; - int i; - - rws->buffer_reference(rws, &tex->buffer, NULL); - for (i = 0; i < R300_MAX_TEXTURE_LEVELS; i++) { - if (tex->hiz_mem[i]) - u_mmFreeMem(tex->hiz_mem[i]); - if (tex->zmask_mem[i]) - u_mmFreeMem(tex->zmask_mem[i]); - } + struct r300_resource* tex = (struct r300_resource*)texture; + r300_winsys_bo_reference(&tex->buf, NULL); FREE(tex); } -static boolean r300_texture_get_handle(struct pipe_screen* screen, - struct pipe_resource *texture, - struct winsys_handle *whandle) +boolean r300_resource_get_handle(struct pipe_screen* screen, + struct pipe_resource *texture, + struct winsys_handle *whandle) { struct r300_winsys_screen *rws = (struct r300_winsys_screen *)screen->winsys; - struct r300_texture* tex = (struct r300_texture*)texture; + struct r300_resource* tex = (struct r300_resource*)texture; if (!tex) { return FALSE; } - return rws->buffer_get_handle(rws, tex->buffer, - tex->desc.stride_in_bytes[0], whandle); + return rws->buffer_get_handle(tex->buf, + tex->tex.stride_in_bytes[0], whandle); } -struct u_resource_vtbl r300_texture_vtbl = +static const struct u_resource_vtbl r300_texture_vtbl = { - r300_texture_get_handle, /* get_handle */ - r300_texture_destroy, /* resource_destroy */ - r300_texture_is_referenced, /* is_resource_referenced */ - r300_texture_get_transfer, /* get_transfer */ - r300_texture_transfer_destroy, /* transfer_destroy */ - r300_texture_transfer_map, /* transfer_map */ - u_default_transfer_flush_region, /* transfer_flush_region */ - r300_texture_transfer_unmap, /* transfer_unmap */ - u_default_transfer_inline_write /* transfer_inline_write */ + NULL, /* get_handle */ + r300_texture_destroy, /* resource_destroy */ + r300_texture_get_transfer, /* get_transfer */ + r300_texture_transfer_destroy, /* transfer_destroy */ + r300_texture_transfer_map, /* transfer_map */ + NULL, /* transfer_flush_region */ + r300_texture_transfer_unmap, /* transfer_unmap */ + u_default_transfer_inline_write /* transfer_inline_write */ }; /* The common texture constructor. */ -static struct r300_texture* +static struct r300_resource* r300_texture_create_object(struct r300_screen *rscreen, const struct pipe_resource *base, enum r300_buffer_tiling microtile, enum r300_buffer_tiling macrotile, unsigned stride_in_bytes_override, unsigned max_buffer_size, - struct r300_winsys_buffer *buffer) + struct r300_winsys_bo *buffer) { struct r300_winsys_screen *rws = rscreen->rws; - struct r300_texture *tex = CALLOC_STRUCT(r300_texture); + struct r300_resource *tex = CALLOC_STRUCT(r300_resource); if (!tex) { if (buffer) - rws->buffer_reference(rws, &buffer, NULL); + r300_winsys_bo_reference(&buffer, NULL); return NULL; } - /* Initialize the descriptor. */ - if (!r300_texture_desc_init(rscreen, &tex->desc, base, - microtile, macrotile, - stride_in_bytes_override, - max_buffer_size)) { + pipe_reference_init(&tex->b.b.b.reference, 1); + tex->b.b.b.screen = &rscreen->screen; + tex->b.b.b.usage = base->usage; + tex->b.b.b.bind = base->bind; + tex->b.b.b.flags = base->flags; + tex->b.b.vtbl = &r300_texture_vtbl; + tex->tex.microtile = microtile; + tex->tex.macrotile[0] = macrotile; + tex->tex.stride_in_bytes_override = stride_in_bytes_override; + tex->domain = base->flags & R300_RESOURCE_FLAG_TRANSFER ? + R300_DOMAIN_GTT : + R300_DOMAIN_VRAM | R300_DOMAIN_GTT; + tex->buf_size = max_buffer_size; + + if (!r300_resource_set_properties(&rscreen->screen, &tex->b.b.b, 0, base)) { if (buffer) - rws->buffer_reference(rws, &buffer, NULL); + r300_winsys_bo_reference(&buffer, NULL); FREE(tex); return NULL; } - /* Initialize the hardware state. */ - r300_texture_setup_format_state(rscreen, &tex->desc, 0, &tex->tx_format); - r300_texture_setup_fb_state(rscreen, tex); - - tex->desc.b.vtbl = &r300_texture_vtbl; - pipe_reference_init(&tex->desc.b.b.reference, 1); - tex->domain = base->flags & R300_RESOURCE_FLAG_TRANSFER ? - R300_DOMAIN_GTT : - R300_DOMAIN_VRAM | R300_DOMAIN_GTT; - tex->buffer = buffer; /* Create the backing buffer if needed. */ - if (!tex->buffer) { - tex->buffer = rws->buffer_create(rws, tex->desc.size_in_bytes, 2048, + if (!buffer) { + tex->buf_size = tex->tex.size_in_bytes; + tex->buf = rws->buffer_create(rws, tex->tex.size_in_bytes, 2048, base->bind, base->usage, tex->domain); - if (!tex->buffer) { + if (!tex->buf) { FREE(tex); return NULL; } + } else { + tex->buf = buffer; } - tex->cs_buffer = rws->buffer_get_cs_handle(rws, tex->buffer); + tex->cs_buf = rws->buffer_get_cs_handle(tex->buf); - rws->buffer_set_tiling(rws, tex->buffer, - tex->desc.microtile, tex->desc.macrotile[0], - tex->desc.stride_in_bytes[0]); + rws->buffer_set_tiling(tex->buf, NULL, + tex->tex.microtile, tex->tex.macrotile[0], + tex->tex.stride_in_bytes[0]); return tex; } @@ -813,7 +849,7 @@ struct pipe_resource *r300_texture_from_handle(struct pipe_screen *screen, { struct r300_winsys_screen *rws = (struct r300_winsys_screen*)screen->winsys; struct r300_screen *rscreen = r300_screen(screen); - struct r300_winsys_buffer *buffer; + struct r300_winsys_bo *buffer; enum r300_buffer_tiling microtile, macrotile; unsigned stride, size; @@ -829,7 +865,7 @@ struct pipe_resource *r300_texture_from_handle(struct pipe_screen *screen, if (!buffer) return NULL; - rws->buffer_get_tiling(rws, buffer, µtile, ¯otile); + rws->buffer_get_tiling(buffer, µtile, ¯otile); /* Enforce a microtiled zbuffer. */ if (util_format_is_depth_or_stencil(base->format) && @@ -840,8 +876,7 @@ struct pipe_resource *r300_texture_from_handle(struct pipe_screen *screen, break; case 2: - if (rws->get_value(rws, R300_VID_SQUARE_TILING_SUPPORT)) - microtile = R300_BUFFER_SQUARETILED; + microtile = R300_BUFFER_SQUARETILED; break; } } @@ -857,7 +892,7 @@ struct pipe_surface* r300_create_surface(struct pipe_context * ctx, struct pipe_resource* texture, const struct pipe_surface *surf_tmpl) { - struct r300_texture* tex = r300_texture(texture); + struct r300_resource* tex = r300_resource(texture); struct r300_surface* surface = CALLOC_STRUCT(r300_surface); unsigned level = surf_tmpl->u.tex.level; @@ -877,29 +912,28 @@ struct pipe_surface* r300_create_surface(struct pipe_context * ctx, surface->base.u.tex.first_layer = surf_tmpl->u.tex.first_layer; surface->base.u.tex.last_layer = surf_tmpl->u.tex.last_layer; - surface->buffer = tex->buffer; - surface->cs_buffer = tex->cs_buffer; + surface->buf = tex->buf; + surface->cs_buf = tex->cs_buf; /* Prefer VRAM if there are multiple domains to choose from. */ surface->domain = tex->domain; if (surface->domain & R300_DOMAIN_VRAM) surface->domain &= ~R300_DOMAIN_GTT; - surface->offset = r300_texture_get_offset(&tex->desc, level, + surface->offset = r300_texture_get_offset(tex, level, surf_tmpl->u.tex.first_layer); - surface->pitch = tex->fb_state.pitch[level]; - surface->format = tex->fb_state.format; + r300_texture_setup_fb_state(surface); /* Parameters for the CBZB clear. */ - surface->cbzb_allowed = tex->desc.cbzb_allowed[level]; + surface->cbzb_allowed = tex->tex.cbzb_allowed[level]; surface->cbzb_width = align(surface->base.width, 64); /* Height must be aligned to the size of a tile. */ - tile_height = r300_get_pixel_alignment(tex->desc.b.b.format, - tex->desc.b.b.nr_samples, - tex->desc.microtile, - tex->desc.macrotile[level], - DIM_HEIGHT); + tile_height = r300_get_pixel_alignment(tex->b.b.b.format, + tex->b.b.b.nr_samples, + tex->tex.microtile, + tex->tex.macrotile[level], + DIM_HEIGHT, 0); surface->cbzb_height = align((surface->base.height + 1) / 2, tile_height); @@ -907,7 +941,7 @@ struct pipe_surface* r300_create_surface(struct pipe_context * ctx, /* Offset must be aligned to 2K and must point at the beginning * of a scanline. */ offset = surface->offset + - tex->desc.stride_in_bytes[level] * surface->cbzb_height; + tex->tex.stride_in_bytes[level] * surface->cbzb_height; surface->cbzb_midpoint_offset = offset & ~2047; surface->cbzb_pitch = surface->pitch & 0x1ffffc; @@ -922,8 +956,8 @@ struct pipe_surface* r300_create_surface(struct pipe_context * ctx, surface->cbzb_allowed ? "YES" : " NO", surface->cbzb_width, surface->cbzb_height, offset & 2047, - tex->desc.microtile ? "YES" : " NO", - tex->desc.macrotile[level] ? "YES" : " NO"); + tex->tex.microtile ? "YES" : " NO", + tex->tex.macrotile[level] ? "YES" : " NO"); } return &surface->base; diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index 0ab22f747e..158a387478 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -32,7 +32,7 @@ struct pipe_resource; struct winsys_handle; struct r300_texture_format_state; struct r300_texture_desc; -struct r300_texture; +struct r300_resource; struct r300_screen; unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format, @@ -46,9 +46,10 @@ uint32_t r300_translate_texformat(enum pipe_format format, uint32_t r500_tx_format_msb_bit(enum pipe_format format); -void r300_texture_reinterpret_format(struct pipe_screen *screen, +boolean r300_resource_set_properties(struct pipe_screen *screen, struct pipe_resource *tex, - enum pipe_format new_format); + unsigned offset, + const struct pipe_resource *new_properties); boolean r300_is_colorbuffer_format_supported(enum pipe_format format); @@ -57,10 +58,14 @@ boolean r300_is_zs_format_supported(enum pipe_format format); boolean r300_is_sampler_format_supported(enum pipe_format format); void r300_texture_setup_format_state(struct r300_screen *screen, - struct r300_texture_desc *desc, + struct r300_resource *tex, unsigned level, struct r300_texture_format_state *out); +boolean r300_resource_get_handle(struct pipe_screen* screen, + struct pipe_resource *texture, + struct winsys_handle *whandle); + struct pipe_resource* r300_texture_from_handle(struct pipe_screen* screen, const struct pipe_resource* base, diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c index aa82c47151..2910666dd5 100644 --- a/src/gallium/drivers/r300/r300_texture_desc.c +++ b/src/gallium/drivers/r300/r300_texture_desc.c @@ -34,7 +34,7 @@ unsigned r300_get_pixel_alignment(enum pipe_format format, unsigned num_samples, enum r300_buffer_tiling microtile, enum r300_buffer_tiling macrotile, - enum r300_dim dim) + enum r300_dim dim, boolean is_rs690) { static const unsigned table[2][5][3][2] = { @@ -57,6 +57,7 @@ unsigned r300_get_pixel_alignment(enum pipe_format format, {{ 16, 8}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */ } }; + static const unsigned aa_block[2] = {4, 8}; unsigned tile = 0; unsigned pixsize = util_format_get_blocksize(format); @@ -74,6 +75,14 @@ unsigned r300_get_pixel_alignment(enum pipe_format format, } else { /* Standard alignment. */ tile = table[macrotile][util_logbase2(pixsize)][microtile][dim]; + if (macrotile == 0 && is_rs690 && dim == DIM_WIDTH) { + int align; + int h_tile; + h_tile = table[macrotile][util_logbase2(pixsize)][microtile][DIM_HEIGHT]; + align = 64 / (pixsize * h_tile); + if (tile < align) + tile = align; + } } assert(tile); @@ -81,19 +90,19 @@ unsigned r300_get_pixel_alignment(enum pipe_format format, } /* Return true if macrotiling should be enabled on the miplevel. */ -static boolean r300_texture_macro_switch(struct r300_texture_desc *desc, +static boolean r300_texture_macro_switch(struct r300_resource *tex, unsigned level, boolean rv350_mode, enum r300_dim dim) { unsigned tile, texdim; - tile = r300_get_pixel_alignment(desc->b.b.format, desc->b.b.nr_samples, - desc->microtile, R300_BUFFER_TILED, dim); + tile = r300_get_pixel_alignment(tex->b.b.b.format, tex->b.b.b.nr_samples, + tex->tex.microtile, R300_BUFFER_TILED, dim, 0); if (dim == DIM_WIDTH) { - texdim = u_minify(desc->width0, level); + texdim = u_minify(tex->tex.width0, level); } else { - texdim = u_minify(desc->height0, level); + texdim = u_minify(tex->tex.height0, level); } /* See TX_FILTER1_n.MACRO_SWITCH. */ @@ -109,91 +118,70 @@ static boolean r300_texture_macro_switch(struct r300_texture_desc *desc, * at the given level. */ static unsigned r300_texture_get_stride(struct r300_screen *screen, - struct r300_texture_desc *desc, + struct r300_resource *tex, unsigned level) { unsigned tile_width, width, stride; + boolean is_rs690 = (screen->caps.family == CHIP_FAMILY_RS600 || + screen->caps.family == CHIP_FAMILY_RS690 || + screen->caps.family == CHIP_FAMILY_RS740); - if (desc->stride_in_bytes_override) - return desc->stride_in_bytes_override; + if (tex->tex.stride_in_bytes_override) + return tex->tex.stride_in_bytes_override; /* Check the level. */ - if (level > desc->b.b.last_level) { + if (level > tex->b.b.b.last_level) { SCREEN_DBG(screen, DBG_TEX, "%s: level (%u) > last_level (%u)\n", - __FUNCTION__, level, desc->b.b.last_level); + __FUNCTION__, level, tex->b.b.b.last_level); return 0; } - width = u_minify(desc->width0, level); + width = u_minify(tex->tex.width0, level); - if (util_format_is_plain(desc->b.b.format)) { - tile_width = r300_get_pixel_alignment(desc->b.b.format, - desc->b.b.nr_samples, - desc->microtile, - desc->macrotile[level], - DIM_WIDTH); + if (util_format_is_plain(tex->b.b.b.format)) { + tile_width = r300_get_pixel_alignment(tex->b.b.b.format, + tex->b.b.b.nr_samples, + tex->tex.microtile, + tex->tex.macrotile[level], + DIM_WIDTH, is_rs690); width = align(width, tile_width); - stride = util_format_get_stride(desc->b.b.format, width); - - /* Some IGPs need a minimum stride of 64 bytes, hmm... */ - if (!desc->macrotile[level] && - (screen->caps.family == CHIP_FAMILY_RS600 || - screen->caps.family == CHIP_FAMILY_RS690 || - screen->caps.family == CHIP_FAMILY_RS740)) { - unsigned min_stride; - - if (desc->microtile) { - unsigned tile_height = - r300_get_pixel_alignment(desc->b.b.format, - desc->b.b.nr_samples, - desc->microtile, - desc->macrotile[level], - DIM_HEIGHT); - - min_stride = 64 / tile_height; - } else { - min_stride = 64; - } - - return stride < min_stride ? min_stride : stride; - } - + stride = util_format_get_stride(tex->b.b.b.format, width); /* The alignment to 32 bytes is sort of implied by the layout... */ return stride; } else { - return align(util_format_get_stride(desc->b.b.format, width), 32); + return align(util_format_get_stride(tex->b.b.b.format, width), is_rs690 ? 64 : 32); } } -static unsigned r300_texture_get_nblocksy(struct r300_texture_desc *desc, +static unsigned r300_texture_get_nblocksy(struct r300_resource *tex, unsigned level, boolean *out_aligned_for_cbzb) { unsigned height, tile_height; - height = u_minify(desc->height0, level); + height = u_minify(tex->tex.height0, level); - if (util_format_is_plain(desc->b.b.format)) { - tile_height = r300_get_pixel_alignment(desc->b.b.format, - desc->b.b.nr_samples, - desc->microtile, - desc->macrotile[level], - DIM_HEIGHT); - height = align(height, tile_height); + /* Mipmapped and 3D textures must have their height aligned to POT. */ + if ((tex->b.b.b.target != PIPE_TEXTURE_1D && + tex->b.b.b.target != PIPE_TEXTURE_2D && + tex->b.b.b.target != PIPE_TEXTURE_RECT) || + tex->b.b.b.last_level != 0) { + height = util_next_power_of_two(height); + } - /* This is needed for the kernel checker, unfortunately. */ - if ((desc->b.b.target != PIPE_TEXTURE_1D && - desc->b.b.target != PIPE_TEXTURE_2D && - desc->b.b.target != PIPE_TEXTURE_RECT) || - desc->b.b.last_level != 0) { - height = util_next_power_of_two(height); - } + if (util_format_is_plain(tex->b.b.b.format)) { + tile_height = r300_get_pixel_alignment(tex->b.b.b.format, + tex->b.b.b.nr_samples, + tex->tex.microtile, + tex->tex.macrotile[level], + DIM_HEIGHT, 0); + height = align(height, tile_height); /* See if the CBZB clear can be used on the buffer, * taking the texture size into account. */ if (out_aligned_for_cbzb) { - if (desc->macrotile[level]) { + if (tex->tex.macrotile[level]) { /* When clearing, the layer (width*height) is horizontally split * into two, and the upper and lower halves are cleared by the CB * and ZB units, respectively. Therefore, the number of macrotiles @@ -201,10 +189,10 @@ static unsigned r300_texture_get_nblocksy(struct r300_texture_desc *desc, /* Align the height so that there is an even number of macrotiles. * Do so for 3 or more macrotiles in the Y direction. */ - if (level == 0 && desc->b.b.last_level == 0 && - (desc->b.b.target == PIPE_TEXTURE_1D || - desc->b.b.target == PIPE_TEXTURE_2D || - desc->b.b.target == PIPE_TEXTURE_RECT) && + if (level == 0 && tex->b.b.b.last_level == 0 && + (tex->b.b.b.target == PIPE_TEXTURE_1D || + tex->b.b.b.target == PIPE_TEXTURE_2D || + tex->b.b.b.target == PIPE_TEXTURE_RECT) && height >= tile_height * 3) { height = align(height, tile_height * 2); } @@ -216,30 +204,7 @@ static unsigned r300_texture_get_nblocksy(struct r300_texture_desc *desc, } } - return util_format_get_nblocksy(desc->b.b.format, height); -} - -static void r300_texture_3d_fix_mipmapping(struct r300_screen *screen, - struct r300_texture_desc *desc) -{ - /* The kernels <= 2.6.34-rc4 compute the size of mipmapped 3D textures - * incorrectly. This is a workaround to prevent CS from being rejected. */ - - unsigned i, size; - - if (!screen->rws->get_value(screen->rws, R300_VID_DRM_2_3_0) && - desc->b.b.target == PIPE_TEXTURE_3D && - desc->b.b.last_level > 0) { - size = 0; - - for (i = 0; i <= desc->b.b.last_level; i++) { - size += desc->stride_in_bytes[i] * - r300_texture_get_nblocksy(desc, i, FALSE); - } - - size *= desc->depth0; - desc->size_in_bytes = size; - } + return util_format_get_nblocksy(tex->b.b.b.format, height); } /* Get a width in pixels from a stride in bytes. */ @@ -251,15 +216,15 @@ static unsigned stride_to_width(enum pipe_format format, } static void r300_setup_miptree(struct r300_screen *screen, - struct r300_texture_desc *desc, + struct r300_resource *tex, boolean align_for_cbzb) { - struct pipe_resource *base = &desc->b.b; + struct pipe_resource *base = &tex->b.b.b; unsigned stride, size, layer_size, nblocksy, i; boolean rv350_mode = screen->caps.family >= CHIP_FAMILY_R350; boolean aligned_for_cbzb; - desc->size_in_bytes = 0; + tex->tex.size_in_bytes = 0; SCREEN_DBG(screen, DBG_TEXALLOC, "r300: Making miptree for texture, format %s\n", @@ -267,21 +232,21 @@ static void r300_setup_miptree(struct r300_screen *screen, for (i = 0; i <= base->last_level; i++) { /* Let's see if this miplevel can be macrotiled. */ - desc->macrotile[i] = - (desc->macrotile[0] == R300_BUFFER_TILED && - r300_texture_macro_switch(desc, i, rv350_mode, DIM_WIDTH) && - r300_texture_macro_switch(desc, i, rv350_mode, DIM_HEIGHT)) ? + tex->tex.macrotile[i] = + (tex->tex.macrotile[0] == R300_BUFFER_TILED && + r300_texture_macro_switch(tex, i, rv350_mode, DIM_WIDTH) && + r300_texture_macro_switch(tex, i, rv350_mode, DIM_HEIGHT)) ? R300_BUFFER_TILED : R300_BUFFER_LINEAR; - stride = r300_texture_get_stride(screen, desc, i); + stride = r300_texture_get_stride(screen, tex, i); /* Compute the number of blocks in Y, see if the CBZB clear can be * used on the texture. */ aligned_for_cbzb = FALSE; - if (align_for_cbzb && desc->cbzb_allowed[i]) - nblocksy = r300_texture_get_nblocksy(desc, i, &aligned_for_cbzb); + if (align_for_cbzb && tex->tex.cbzb_allowed[i]) + nblocksy = r300_texture_get_nblocksy(tex, i, &aligned_for_cbzb); else - nblocksy = r300_texture_get_nblocksy(desc, i, NULL); + nblocksy = r300_texture_get_nblocksy(tex, i, NULL); layer_size = stride * nblocksy; @@ -292,75 +257,182 @@ static void r300_setup_miptree(struct r300_screen *screen, if (base->target == PIPE_TEXTURE_CUBE) size = layer_size * 6; else - size = layer_size * u_minify(desc->depth0, i); + size = layer_size * u_minify(tex->tex.depth0, i); - desc->offset_in_bytes[i] = desc->size_in_bytes; - desc->size_in_bytes = desc->offset_in_bytes[i] + size; - desc->layer_size_in_bytes[i] = layer_size; - desc->stride_in_bytes[i] = stride; - desc->stride_in_pixels[i] = stride_to_width(desc->b.b.format, stride); - desc->cbzb_allowed[i] = desc->cbzb_allowed[i] && aligned_for_cbzb; + tex->tex.offset_in_bytes[i] = tex->tex.size_in_bytes; + tex->tex.size_in_bytes = tex->tex.offset_in_bytes[i] + size; + tex->tex.layer_size_in_bytes[i] = layer_size; + tex->tex.stride_in_bytes[i] = stride; + tex->tex.stride_in_pixels[i] = stride_to_width(tex->b.b.b.format, stride); + tex->tex.cbzb_allowed[i] = tex->tex.cbzb_allowed[i] && aligned_for_cbzb; SCREEN_DBG(screen, DBG_TEXALLOC, "r300: Texture miptree: Level %d " "(%dx%dx%d px, pitch %d bytes) %d bytes total, macrotiled %s\n", - i, u_minify(desc->width0, i), u_minify(desc->height0, i), - u_minify(desc->depth0, i), stride, desc->size_in_bytes, - desc->macrotile[i] ? "TRUE" : "FALSE"); + i, u_minify(tex->tex.width0, i), u_minify(tex->tex.height0, i), + u_minify(tex->tex.depth0, i), stride, tex->tex.size_in_bytes, + tex->tex.macrotile[i] ? "TRUE" : "FALSE"); } } -static void r300_setup_flags(struct r300_texture_desc *desc) +static void r300_setup_flags(struct r300_resource *tex) { - desc->uses_stride_addressing = - !util_is_power_of_two(desc->b.b.width0) || - (desc->stride_in_bytes_override && - stride_to_width(desc->b.b.format, - desc->stride_in_bytes_override) != desc->b.b.width0); - - desc->is_npot = - desc->uses_stride_addressing || - !util_is_power_of_two(desc->b.b.height0) || - !util_is_power_of_two(desc->b.b.depth0); + tex->tex.uses_stride_addressing = + !util_is_power_of_two(tex->b.b.b.width0) || + (tex->tex.stride_in_bytes_override && + stride_to_width(tex->b.b.b.format, + tex->tex.stride_in_bytes_override) != tex->b.b.b.width0); + + tex->tex.is_npot = + tex->tex.uses_stride_addressing || + !util_is_power_of_two(tex->b.b.b.height0) || + !util_is_power_of_two(tex->b.b.b.depth0); } static void r300_setup_cbzb_flags(struct r300_screen *rscreen, - struct r300_texture_desc *desc) + struct r300_resource *tex) { unsigned i, bpp; boolean first_level_valid; - bpp = util_format_get_blocksizebits(desc->b.b.format); + bpp = util_format_get_blocksizebits(tex->b.b.b.format); /* 1) The texture must be point-sampled, * 2) The depth must be 16 or 32 bits. * 3) If the midpoint ZB offset is not aligned to 2048, it returns garbage * with certain texture sizes. Macrotiling ensures the alignment. */ - first_level_valid = desc->b.b.nr_samples <= 1 && + first_level_valid = tex->b.b.b.nr_samples <= 1 && (bpp == 16 || bpp == 32) && - desc->macrotile[0]; + tex->tex.macrotile[0]; if (SCREEN_DBG_ON(rscreen, DBG_NO_CBZB)) first_level_valid = FALSE; - for (i = 0; i <= desc->b.b.last_level; i++) - desc->cbzb_allowed[i] = first_level_valid && desc->macrotile[i]; + for (i = 0; i <= tex->b.b.b.last_level; i++) + tex->tex.cbzb_allowed[i] = first_level_valid && tex->tex.macrotile[i]; +} + +static unsigned r300_pixels_to_dwords(unsigned stride, + unsigned height, + unsigned xblock, unsigned yblock) +{ + return (util_align_npot(stride, xblock) * align(height, yblock)) / (xblock * yblock); +} + +static void r300_setup_hyperz_properties(struct r300_screen *screen, + struct r300_resource *tex) +{ + /* The tile size of 1 DWORD in ZMASK RAM is: + * + * GPU Pipes 4x4 mode 8x8 mode + * ------------------------------------------ + * R580 4P/1Z 32x32 64x64 + * RV570 3P/1Z 48x16 96x32 + * RV530 1P/2Z 32x16 64x32 + * 1P/1Z 16x16 32x32 + */ + static unsigned zmask_blocks_x_per_dw[4] = {4, 8, 12, 8}; + static unsigned zmask_blocks_y_per_dw[4] = {4, 4, 4, 8}; + + /* In HIZ RAM, one dword is always 8x8 pixels (each byte is 4x4 pixels), + * but the blocks have very weird ordering. + * + * With 2 pipes and an image of size 8xY, where Y >= 1, + * clearing 4 dwords clears blocks like this: + * + * 01012323 + * + * where numbers correspond to dword indices. The blocks are interleaved + * in the X direction, so the alignment must be 4x1 blocks (32x8 pixels). + * + * With 4 pipes and an image of size 8xY, where Y >= 4, + * clearing 8 dwords clears blocks like this: + * 01012323 + * 45456767 + * 01012323 + * 45456767 + * where numbers correspond to dword indices. The blocks are interleaved + * in both directions, so the alignment must be 4x4 blocks (32x32 pixels) + */ + static unsigned hiz_align_x[4] = {8, 32, 48, 32}; + static unsigned hiz_align_y[4] = {8, 8, 8, 32}; + + if (util_format_is_depth_or_stencil(tex->b.b.b.format) && + util_format_get_blocksizebits(tex->b.b.b.format) == 32 && + tex->tex.microtile) { + unsigned i, pipes; + + if (screen->caps.family == CHIP_FAMILY_RV530) { + pipes = screen->caps.num_z_pipes; + } else { + pipes = screen->caps.num_frag_pipes; + } + + for (i = 0; i <= tex->b.b.b.last_level; i++) { + unsigned zcomp_numdw, zcompsize, hiz_numdw, stride, height; + + stride = align(tex->tex.stride_in_pixels[i], 16); + height = u_minify(tex->b.b.b.height0, i); + + /* The 8x8 compression mode needs macrotiling. */ + zcompsize = screen->caps.z_compress == R300_ZCOMP_8X8 && + tex->tex.macrotile[i] && + tex->b.b.b.nr_samples <= 1 ? 8 : 4; + + /* Get the ZMASK buffer size in dwords. */ + zcomp_numdw = r300_pixels_to_dwords(stride, height, + zmask_blocks_x_per_dw[pipes-1] * zcompsize, + zmask_blocks_y_per_dw[pipes-1] * zcompsize); + + /* Check whether we have enough ZMASK memory. */ + if (util_format_get_blocksizebits(tex->b.b.b.format) == 32 && + zcomp_numdw <= screen->caps.zmask_ram * pipes) { + tex->tex.zmask_dwords[i] = zcomp_numdw; + tex->tex.zcomp8x8[i] = zcompsize == 8; + + tex->tex.zmask_stride_in_pixels[i] = + util_align_npot(stride, zmask_blocks_x_per_dw[pipes-1] * zcompsize); + } else { + tex->tex.zmask_dwords[i] = 0; + tex->tex.zcomp8x8[i] = FALSE; + tex->tex.zmask_stride_in_pixels[i] = 0; + } + + /* Now setup HIZ. */ + stride = util_align_npot(stride, hiz_align_x[pipes-1]); + height = align(height, hiz_align_y[pipes-1]); + + /* Get the HIZ buffer size in dwords. */ + hiz_numdw = (stride * height) / (8*8 * pipes); + + /* Check whether we have enough HIZ memory. */ + if (hiz_numdw <= screen->caps.hiz_ram * pipes) { + tex->tex.hiz_dwords[i] = hiz_numdw; + tex->tex.hiz_stride_in_pixels[i] = stride; + } else { + tex->tex.hiz_dwords[i] = 0; + tex->tex.hiz_stride_in_pixels[i] = 0; + } + } + } } static void r300_setup_tiling(struct r300_screen *screen, - struct r300_texture_desc *desc) + struct r300_resource *tex) { - struct r300_winsys_screen *rws = screen->rws; - enum pipe_format format = desc->b.b.format; + enum pipe_format format = tex->b.b.b.format; boolean rv350_mode = screen->caps.family >= CHIP_FAMILY_R350; boolean is_zb = util_format_is_depth_or_stencil(format); boolean dbg_no_tiling = SCREEN_DBG_ON(screen, DBG_NO_TILING); + tex->tex.microtile = R300_BUFFER_LINEAR; + tex->tex.macrotile[0] = R300_BUFFER_LINEAR; + if (!util_format_is_plain(format)) { return; } /* If height == 1, disable microtiling except for zbuffer. */ - if (!is_zb && (desc->b.b.height0 == 1 || dbg_no_tiling)) { + if (!is_zb && (tex->b.b.b.height0 == 1 || dbg_no_tiling)) { return; } @@ -369,13 +441,11 @@ static void r300_setup_tiling(struct r300_screen *screen, case 1: case 4: case 8: - desc->microtile = R300_BUFFER_TILED; + tex->tex.microtile = R300_BUFFER_TILED; break; case 2: - if (rws->get_value(rws, R300_VID_SQUARE_TILING_SUPPORT)) { - desc->microtile = R300_BUFFER_SQUARETILED; - } + tex->tex.microtile = R300_BUFFER_SQUARETILED; break; } @@ -384,104 +454,99 @@ static void r300_setup_tiling(struct r300_screen *screen, } /* Set macrotiling. */ - if (r300_texture_macro_switch(desc, 0, rv350_mode, DIM_WIDTH) && - r300_texture_macro_switch(desc, 0, rv350_mode, DIM_HEIGHT)) { - desc->macrotile[0] = R300_BUFFER_TILED; + if (r300_texture_macro_switch(tex, 0, rv350_mode, DIM_WIDTH) && + r300_texture_macro_switch(tex, 0, rv350_mode, DIM_HEIGHT)) { + tex->tex.macrotile[0] = R300_BUFFER_TILED; } } -static void r300_tex_print_info(struct r300_screen *rscreen, - struct r300_texture_desc *desc, +static void r300_tex_print_info(struct r300_resource *tex, const char *func) { fprintf(stderr, "r300: %s: Macro: %s, Micro: %s, Pitch: %i, Dim: %ix%ix%i, " "LastLevel: %i, Size: %i, Format: %s\n", func, - desc->macrotile[0] ? "YES" : " NO", - desc->microtile ? "YES" : " NO", - desc->stride_in_pixels[0], - desc->b.b.width0, desc->b.b.height0, desc->b.b.depth0, - desc->b.b.last_level, desc->size_in_bytes, - util_format_short_name(desc->b.b.format)); + tex->tex.macrotile[0] ? "YES" : " NO", + tex->tex.microtile ? "YES" : " NO", + tex->tex.stride_in_pixels[0], + tex->b.b.b.width0, tex->b.b.b.height0, tex->b.b.b.depth0, + tex->b.b.b.last_level, tex->tex.size_in_bytes, + util_format_short_name(tex->b.b.b.format)); } boolean r300_texture_desc_init(struct r300_screen *rscreen, - struct r300_texture_desc *desc, - const struct pipe_resource *base, - enum r300_buffer_tiling microtile, - enum r300_buffer_tiling macrotile, - unsigned stride_in_bytes_override, - unsigned max_buffer_size) + struct r300_resource *tex, + const struct pipe_resource *base) { - desc->b.b = *base; - desc->b.b.screen = &rscreen->screen; - desc->stride_in_bytes_override = stride_in_bytes_override; - desc->width0 = base->width0; - desc->height0 = base->height0; - desc->depth0 = base->depth0; - - r300_setup_flags(desc); + tex->b.b.b.target = base->target; + tex->b.b.b.format = base->format; + tex->b.b.b.width0 = base->width0; + tex->b.b.b.height0 = base->height0; + tex->b.b.b.depth0 = base->depth0; + tex->b.b.b.array_size = base->array_size; + tex->b.b.b.last_level = base->last_level; + tex->b.b.b.nr_samples = base->nr_samples; + tex->tex.width0 = base->width0; + tex->tex.height0 = base->height0; + tex->tex.depth0 = base->depth0; + + r300_setup_flags(tex); /* Align a 3D NPOT texture to POT. */ - if (base->target == PIPE_TEXTURE_3D && desc->is_npot) { - desc->width0 = util_next_power_of_two(desc->width0); - desc->height0 = util_next_power_of_two(desc->height0); - desc->depth0 = util_next_power_of_two(desc->depth0); + if (base->target == PIPE_TEXTURE_3D && tex->tex.is_npot) { + tex->tex.width0 = util_next_power_of_two(tex->tex.width0); + tex->tex.height0 = util_next_power_of_two(tex->tex.height0); + tex->tex.depth0 = util_next_power_of_two(tex->tex.depth0); } /* Setup tiling. */ - if (microtile == R300_BUFFER_SELECT_LAYOUT || - macrotile == R300_BUFFER_SELECT_LAYOUT) { - r300_setup_tiling(rscreen, desc); - } else { - desc->microtile = microtile; - desc->macrotile[0] = macrotile; - assert(desc->b.b.last_level == 0); + if (tex->tex.microtile == R300_BUFFER_SELECT_LAYOUT) { + r300_setup_tiling(rscreen, tex); } - r300_setup_cbzb_flags(rscreen, desc); + r300_setup_cbzb_flags(rscreen, tex); /* Setup the miptree description. */ - r300_setup_miptree(rscreen, desc, TRUE); + r300_setup_miptree(rscreen, tex, TRUE); /* If the required buffer size is larger the given max size, * try again without the alignment for the CBZB clear. */ - if (max_buffer_size && desc->size_in_bytes > max_buffer_size) { - r300_setup_miptree(rscreen, desc, FALSE); + if (tex->buf_size && tex->tex.size_in_bytes > tex->buf_size) { + r300_setup_miptree(rscreen, tex, FALSE); } - r300_texture_3d_fix_mipmapping(rscreen, desc); + r300_setup_hyperz_properties(rscreen, tex); - if (max_buffer_size) { + if (tex->buf_size) { /* Make sure the buffer we got is large enough. */ - if (desc->size_in_bytes > max_buffer_size) { + if (tex->tex.size_in_bytes > tex->buf_size) { fprintf(stderr, "r300: texture_desc_init: The buffer is not " "large enough. Got: %i, Need: %i, Info:\n", - max_buffer_size, desc->size_in_bytes); - r300_tex_print_info(rscreen, desc, "texture_desc_init"); + tex->buf_size, tex->tex.size_in_bytes); + r300_tex_print_info(tex, "texture_desc_init"); return FALSE; } - desc->buffer_size_in_bytes = max_buffer_size; + tex->tex.buffer_size_in_bytes = tex->buf_size; } else { - desc->buffer_size_in_bytes = desc->size_in_bytes; + tex->tex.buffer_size_in_bytes = tex->tex.size_in_bytes; } if (SCREEN_DBG_ON(rscreen, DBG_TEX)) - r300_tex_print_info(rscreen, desc, "texture_desc_init"); + r300_tex_print_info(tex, "texture_desc_init"); return TRUE; } -unsigned r300_texture_get_offset(struct r300_texture_desc *desc, +unsigned r300_texture_get_offset(struct r300_resource *tex, unsigned level, unsigned layer) { - unsigned offset = desc->offset_in_bytes[level]; + unsigned offset = tex->tex.offset_in_bytes[level]; - switch (desc->b.b.target) { + switch (tex->b.b.b.target) { case PIPE_TEXTURE_3D: case PIPE_TEXTURE_CUBE: - return offset + layer * desc->layer_size_in_bytes[level]; + return offset + layer * tex->tex.layer_size_in_bytes[level]; default: assert(layer == 0); diff --git a/src/gallium/drivers/r300/r300_texture_desc.h b/src/gallium/drivers/r300/r300_texture_desc.h index 44d88794a1..ce6e9643ec 100644 --- a/src/gallium/drivers/r300/r300_texture_desc.h +++ b/src/gallium/drivers/r300/r300_texture_desc.h @@ -30,7 +30,7 @@ struct pipe_resource; struct r300_screen; struct r300_texture_desc; -struct r300_texture; +struct r300_resource; enum r300_dim { DIM_WIDTH = 0, @@ -41,17 +41,13 @@ unsigned r300_get_pixel_alignment(enum pipe_format format, unsigned num_samples, enum r300_buffer_tiling microtile, enum r300_buffer_tiling macrotile, - enum r300_dim dim); + enum r300_dim dim, boolean is_rs690); boolean r300_texture_desc_init(struct r300_screen *rscreen, - struct r300_texture_desc *desc, - const struct pipe_resource *base, - enum r300_buffer_tiling microtile, - enum r300_buffer_tiling macrotile, - unsigned stride_in_bytes_override, - unsigned max_buffer_size); - -unsigned r300_texture_get_offset(struct r300_texture_desc *desc, + struct r300_resource *tex, + const struct pipe_resource *base); + +unsigned r300_texture_get_offset(struct r300_resource *tex, unsigned level, unsigned layer); #endif diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 15a323989b..97ec0a1a1f 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -191,7 +191,12 @@ static void transform_dstreg( dst->File = translate_register_file(src->Register.File); dst->Index = translate_register_index(ttr, src->Register.File, src->Register.Index); dst->WriteMask = src->Register.WriteMask; - dst->RelAddr = src->Register.Indirect; + + if (src->Register.Indirect) { + ttr->error = TRUE; + fprintf(stderr, "r300: Relative addressing of destination operands " + "is unsupported.\n"); + } } static void transform_srcreg( @@ -332,6 +337,8 @@ void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, unsigned imm_index = 0; int i; + ttr->error = FALSE; + /* Allocate constants placeholders. * * Note: What if declared constants are not contiguous? */ diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.h b/src/gallium/drivers/r300/r300_tgsi_to_rc.h index 97641a954b..adb044cfe5 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.h +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.h @@ -47,6 +47,9 @@ struct tgsi_to_rc { /* Vertex shaders have no half swizzles, and no way to handle them, so * until rc grows proper support, indicate if they're safe to use. */ boolean use_half_swizzles; + + /* If an error occured. */ + boolean error; }; void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, const struct tgsi_token * tokens); diff --git a/src/gallium/drivers/r300/r300_transfer.c b/src/gallium/drivers/r300/r300_transfer.c index 3b95af79bc..65c5095be6 100644 --- a/src/gallium/drivers/r300/r300_transfer.c +++ b/src/gallium/drivers/r300/r300_transfer.c @@ -37,7 +37,7 @@ struct r300_transfer { unsigned offset; /* Linear texture. */ - struct r300_texture *linear_texture; + struct r300_resource *linear_texture; }; /* Convenience cast wrapper. */ @@ -54,7 +54,7 @@ static void r300_copy_from_tiled_texture(struct pipe_context *ctx, struct pipe_transfer *transfer = (struct pipe_transfer*)r300transfer; struct pipe_resource *tex = transfer->resource; - ctx->resource_copy_region(ctx, &r300transfer->linear_texture->desc.b.b, 0, + ctx->resource_copy_region(ctx, &r300transfer->linear_texture->b.b.b, 0, 0, 0, 0, tex, transfer->level, &transfer->box); } @@ -70,9 +70,10 @@ static void r300_copy_into_tiled_texture(struct pipe_context *ctx, ctx->resource_copy_region(ctx, tex, transfer->level, transfer->box.x, transfer->box.y, transfer->box.z, - &r300transfer->linear_texture->desc.b.b, 0, &src_box); + &r300transfer->linear_texture->b.b.b, 0, &src_box); - ctx->flush(ctx, 0, NULL); + /* XXX remove this. */ + r300_flush(ctx, 0, NULL); } struct pipe_transfer* @@ -83,25 +84,25 @@ r300_texture_get_transfer(struct pipe_context *ctx, const struct pipe_box *box) { struct r300_context *r300 = r300_context(ctx); - struct r300_texture *tex = r300_texture(texture); + struct r300_resource *tex = r300_resource(texture); struct r300_transfer *trans; struct pipe_resource base; boolean referenced_cs, referenced_hw, blittable; + const struct util_format_description *desc = + util_format_description(texture->format); referenced_cs = - r300->rws->cs_is_buffer_referenced(r300->cs, - tex->cs_buffer, R300_REF_CS); + r300->rws->cs_is_buffer_referenced(r300->cs, tex->cs_buf); if (referenced_cs) { referenced_hw = TRUE; } else { referenced_hw = - r300->rws->cs_is_buffer_referenced(r300->cs, - tex->cs_buffer, R300_REF_HW); + r300->rws->buffer_is_busy(tex->buf); } - blittable = ctx->screen->is_format_supported( - ctx->screen, texture->format, texture->target, 0, - PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET, 0); + blittable = desc->layout == UTIL_FORMAT_LAYOUT_PLAIN || + desc->layout == UTIL_FORMAT_LAYOUT_S3TC || + desc->layout == UTIL_FORMAT_LAYOUT_RGTC; trans = CALLOC_STRUCT(r300_transfer); if (trans) { @@ -114,13 +115,17 @@ r300_texture_get_transfer(struct pipe_context *ctx, /* If the texture is tiled, we must create a temporary detiled texture * for this transfer. * Also make write transfers pipelined. */ - if (tex->desc.microtile || tex->desc.macrotile[level] || - ((referenced_hw & !(usage & PIPE_TRANSFER_READ)) && blittable)) { + if (tex->tex.microtile || tex->tex.macrotile[level] || + (referenced_hw && blittable && !(usage & PIPE_TRANSFER_READ))) { + if (r300->blitter->running) { + fprintf(stderr, "r300: ERROR: Blitter recursion in texture_get_transfer.\n"); + os_break(); + } + base.target = PIPE_TEXTURE_2D; base.format = texture->format; base.width0 = box->width; base.height0 = box->height; - /* XXX: was depth0 = 0 */ base.depth0 = 1; base.array_size = 1; base.last_level = 0; @@ -141,23 +146,23 @@ r300_texture_get_transfer(struct pipe_context *ctx, } /* Create the temporary texture. */ - trans->linear_texture = r300_texture( + trans->linear_texture = r300_resource( ctx->screen->resource_create(ctx->screen, &base)); if (!trans->linear_texture) { /* Oh crap, the thing can't create the texture. * Let's flush and try again. */ - ctx->flush(ctx, 0, NULL); + r300_flush(ctx, 0, NULL); - trans->linear_texture = r300_texture( + trans->linear_texture = r300_resource( ctx->screen->resource_create(ctx->screen, &base)); if (!trans->linear_texture) { /* For linear textures, it's safe to fallback to * an unpipelined transfer. */ - if (!tex->desc.microtile && !tex->desc.macrotile[level]) { + if (!tex->tex.microtile && !tex->tex.macrotile[level]) { goto unpipelined; } @@ -169,18 +174,12 @@ r300_texture_get_transfer(struct pipe_context *ctx, } } - assert(!trans->linear_texture->desc.microtile && - !trans->linear_texture->desc.macrotile[0]); + assert(!trans->linear_texture->tex.microtile && + !trans->linear_texture->tex.macrotile[0]); - /* Set the stride. - * - * Even though we are using an internal texture for this, - * the transfer level, box and usage parameters still reflect - * the arguments received to get_transfer. We just do the - * right thing internally. - */ + /* Set the stride. */ trans->transfer.stride = - trans->linear_texture->desc.stride_in_bytes[0]; + trans->linear_texture->tex.stride_in_bytes[0]; if (usage & PIPE_TRANSFER_READ) { /* We cannot map a tiled texture directly because the data is @@ -188,18 +187,19 @@ r300_texture_get_transfer(struct pipe_context *ctx, r300_copy_from_tiled_texture(ctx, trans); /* Always referenced in the blit. */ - ctx->flush(ctx, 0, NULL); + r300_flush(ctx, 0, NULL); } return &trans->transfer; } unpipelined: /* Unpipelined transfer. */ - trans->transfer.stride = tex->desc.stride_in_bytes[level]; - trans->offset = r300_texture_get_offset(&tex->desc, level, box->z); + trans->transfer.stride = tex->tex.stride_in_bytes[level]; + trans->offset = r300_texture_get_offset(tex, level, box->z); - if (referenced_cs) - ctx->flush(ctx, PIPE_FLUSH_RENDER_CACHE, NULL); + if (referenced_cs && + !(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) + r300_flush(ctx, 0, NULL); return &trans->transfer; } return NULL; @@ -228,20 +228,19 @@ void* r300_texture_transfer_map(struct pipe_context *ctx, struct r300_context *r300 = r300_context(ctx); struct r300_winsys_screen *rws = (struct r300_winsys_screen *)ctx->winsys; struct r300_transfer *r300transfer = r300_transfer(transfer); - struct r300_texture *tex = r300_texture(transfer->resource); + struct r300_resource *tex = r300_resource(transfer->resource); char *map; - enum pipe_format format = tex->desc.b.b.format; + enum pipe_format format = tex->b.b.b.format; if (r300transfer->linear_texture) { /* The detiled texture is of the same size as the region being mapped * (no offset needed). */ - return rws->buffer_map(rws, - r300transfer->linear_texture->buffer, + return rws->buffer_map(r300transfer->linear_texture->buf, r300->cs, transfer->usage); } else { /* Tiling is disabled. */ - map = rws->buffer_map(rws, tex->buffer, r300->cs, + map = rws->buffer_map(tex->buf, r300->cs, transfer->usage); if (!map) { @@ -259,11 +258,11 @@ void r300_texture_transfer_unmap(struct pipe_context *ctx, { struct r300_winsys_screen *rws = (struct r300_winsys_screen *)ctx->winsys; struct r300_transfer *r300transfer = r300_transfer(transfer); - struct r300_texture *tex = r300_texture(transfer->resource); + struct r300_resource *tex = r300_resource(transfer->resource); if (r300transfer->linear_texture) { - rws->buffer_unmap(rws, r300transfer->linear_texture->buffer); + rws->buffer_unmap(r300transfer->linear_texture->buf); } else { - rws->buffer_unmap(rws, tex->buffer); + rws->buffer_unmap(tex->buf); } } diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index 78021e2c5d..b319890157 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -226,6 +226,13 @@ void r300_translate_vertex_shader(struct r300_context *r300, r300_tgsi_to_rc(&ttr, vs->state.tokens); + if (ttr.error) { + fprintf(stderr, "r300 VP: Cannot translate a shader. " + "Using a dummy shader instead.\n"); + r300_dummy_vertex_shader(r300, vs); + return; + } + if (compiler.Base.Program.Constants.Count > 200) { compiler.Base.remove_unused_constants = TRUE; } diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index 0dd330d101..3a6798a542 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -28,37 +28,44 @@ * Any winsys hosting this pipe needs to implement r300_winsys_screen and then * call r300_screen_create to start things. */ +#include "r300_defines.h" + +#include "pipebuffer/pb_bufmgr.h" #include "pipe/p_defines.h" #include "pipe/p_state.h" -#include "r300_defines.h" - #define R300_MAX_CMDBUF_DWORDS (16 * 1024) +#define R300_FLUSH_ASYNC (1 << 0) struct winsys_handle; struct r300_winsys_screen; -struct r300_winsys_buffer; /* for map/unmap etc. */ -struct r300_winsys_cs_buffer; /* for write_reloc etc. */ +#define r300_winsys_bo pb_buffer +#define r300_winsys_bo_reference(pdst, src) pb_reference(pdst, src) + +struct r300_winsys_cs_handle; /* for write_reloc etc. */ struct r300_winsys_cs { - unsigned cdw; /* Number of used dwords. */ - uint32_t *buf; /* The command buffer. */ + unsigned cdw; /* Number of used dwords. */ + uint32_t *buf; /* The command buffer. */ }; enum r300_value_id { R300_VID_PCI_ID, R300_VID_GB_PIPES, R300_VID_Z_PIPES, - R300_VID_SQUARE_TILING_SUPPORT, - R300_VID_DRM_2_3_0, - R300_VID_DRM_2_6_0, - R300_CAN_HYPERZ, -}; - -enum r300_reference_domain { /* bitfield */ - R300_REF_CS = 1, - R300_REF_HW = 2 + R300_VID_GART_SIZE, + R300_VID_VRAM_SIZE, + R300_VID_DRM_MAJOR, + R300_VID_DRM_MINOR, + R300_VID_DRM_PATCHLEVEL, + + /* These should probably go away: */ + R300_VID_DRM_2_6_0, /* Hyper-Z, GB_Z_PEQ_CONFIG on rv350->r4xx, R500 FG_ALPHA_VALUE */ + R300_VID_DRM_2_8_0, /* R500 US_FORMAT regs, R500 ARGB2101010 colorbuffer, CMask, R16F/RG16F */ + + R300_CAN_HYPERZ, /* ZMask + HiZ */ + R300_CAN_AACOMPRESS, /* CMask */ }; struct r300_winsys_screen { @@ -97,91 +104,78 @@ struct r300_winsys_screen { * \param domain A bitmask of the R300_DOMAIN_* flags. * \return The created buffer object. */ - struct r300_winsys_buffer *(*buffer_create)(struct r300_winsys_screen *ws, + struct r300_winsys_bo *(*buffer_create)(struct r300_winsys_screen *ws, unsigned size, unsigned alignment, unsigned bind, unsigned usage, enum r300_buffer_domain domain); - struct r300_winsys_cs_buffer *(*buffer_get_cs_handle)( - struct r300_winsys_screen *ws, - struct r300_winsys_buffer *buf); - - /** - * Reference a buffer object (assign with reference counting). - * - * \param ws The winsys this function is called from. - * \param pdst A destination pointer to set the source buffer to. - * \param src A source buffer object. - */ - void (*buffer_reference)(struct r300_winsys_screen *ws, - struct r300_winsys_buffer **pdst, - struct r300_winsys_buffer *src); + struct r300_winsys_cs_handle *(*buffer_get_cs_handle)( + struct r300_winsys_bo *buf); /** * Map the entire data store of a buffer object into the client's address * space. * - * \param ws The winsys this function is called from. * \param buf A winsys buffer object to map. * \param cs A command stream to flush if the buffer is referenced by it. * \param usage A bitmask of the PIPE_TRANSFER_* flags. * \return The pointer at the beginning of the buffer. */ - void *(*buffer_map)(struct r300_winsys_screen *ws, - struct r300_winsys_buffer *buf, + void *(*buffer_map)(struct r300_winsys_bo *buf, struct r300_winsys_cs *cs, enum pipe_transfer_usage usage); /** * Unmap a buffer object from the client's address space. * - * \param ws The winsys this function is called from. * \param buf A winsys buffer object to unmap. */ - void (*buffer_unmap)(struct r300_winsys_screen *ws, - struct r300_winsys_buffer *buf); + void (*buffer_unmap)(struct r300_winsys_bo *buf); + + /** + * Return TRUE if a buffer object is being used by the GPU. + * + * \param buf A winsys buffer object. + */ + boolean (*buffer_is_busy)(struct r300_winsys_bo *buf); /** * Wait for a buffer object until it is not used by a GPU. This is * equivalent to a fence placed after the last command using the buffer, * and synchronizing to the fence. * - * \param ws The winsys this function is called from. * \param buf A winsys buffer object to wait for. */ - void (*buffer_wait)(struct r300_winsys_screen *ws, - struct r300_winsys_buffer *buf); + void (*buffer_wait)(struct r300_winsys_bo *buf); /** * Return tiling flags describing a memory layout of a buffer object. * - * \param ws The winsys this function is called from. * \param buf A winsys buffer object to get the flags from. * \param macrotile A pointer to the return value of the microtile flag. * \param microtile A pointer to the return value of the macrotile flag. * * \note microtile and macrotile are not bitmasks! */ - void (*buffer_get_tiling)(struct r300_winsys_screen *ws, - struct r300_winsys_buffer *buf, + void (*buffer_get_tiling)(struct r300_winsys_bo *buf, enum r300_buffer_tiling *microtile, enum r300_buffer_tiling *macrotile); /** * Set tiling flags describing a memory layout of a buffer object. * - * \param ws The winsys this function is called from. * \param buf A winsys buffer object to set the flags for. + * \param cs A command stream to flush if the buffer is referenced by it. * \param macrotile A macrotile flag. * \param microtile A microtile flag. * \param stride A stride of the buffer in bytes, for texturing. * * \note microtile and macrotile are not bitmasks! */ - void (*buffer_set_tiling)(struct r300_winsys_screen *ws, - struct r300_winsys_buffer *buf, + void (*buffer_set_tiling)(struct r300_winsys_bo *buf, + struct r300_winsys_cs *cs, enum r300_buffer_tiling microtile, enum r300_buffer_tiling macrotile, unsigned stride); @@ -196,7 +190,7 @@ struct r300_winsys_screen { * \param stride The returned buffer stride in bytes. * \param size The returned buffer size. */ - struct r300_winsys_buffer *(*buffer_from_handle)(struct r300_winsys_screen *ws, + struct r300_winsys_bo *(*buffer_from_handle)(struct r300_winsys_screen *ws, struct winsys_handle *whandle, unsigned *stride, unsigned *size); @@ -205,14 +199,12 @@ struct r300_winsys_screen { * Get a winsys handle from a winsys buffer. The internal structure * of the handle is platform-specific and only a winsys should access it. * - * \param ws The winsys this function is called from. * \param buf A winsys buffer object to get the handle from. * \param whandle A winsys handle pointer. * \param stride A stride of the buffer in bytes, for texturing. * \return TRUE on success. */ - boolean (*buffer_get_handle)(struct r300_winsys_screen *ws, - struct r300_winsys_buffer *buf, + boolean (*buffer_get_handle)(struct r300_winsys_bo *buf, unsigned stride, struct winsys_handle *whandle); @@ -238,23 +230,22 @@ struct r300_winsys_screen { void (*cs_destroy)(struct r300_winsys_cs *cs); /** - * Add a buffer object to the list of buffers to validate. + * Add a new buffer relocation. Every relocation must first be added + * before it can be written. * - * \param cs A command stream to add buffer for validation against. - * \param buf A winsys buffer to validate. - * \param rd A read domain containing a bitmask - * of the R300_DOMAIN_* flags. - * \param wd A write domain containing a bitmask - * of the R300_DOMAIN_* flags. + * \param cs A command stream to add buffer for validation against. + * \param buf A winsys buffer to validate. + * \param rd A read domain containing a bitmask of the R300_DOMAIN_* flags. + * \param wd A write domain containing a bitmask of the R300_DOMAIN_* flags. */ - void (*cs_add_buffer)(struct r300_winsys_cs *cs, - struct r300_winsys_cs_buffer *buf, - enum r300_buffer_domain rd, - enum r300_buffer_domain wd); + void (*cs_add_reloc)(struct r300_winsys_cs *cs, + struct r300_winsys_cs_handle *buf, + enum r300_buffer_domain rd, + enum r300_buffer_domain wd); /** - * Revalidate all currently set up winsys buffers. - * Returns TRUE if a flush is required. + * Return TRUE if there is enough memory in VRAM and GTT for the relocs + * added so far. * * \param cs A command stream to validate. */ @@ -269,16 +260,15 @@ struct r300_winsys_screen { * \param wd A write domain containing a bitmask of the R300_DOMAIN_* flags. */ void (*cs_write_reloc)(struct r300_winsys_cs *cs, - struct r300_winsys_cs_buffer *buf, - enum r300_buffer_domain rd, - enum r300_buffer_domain wd); + struct r300_winsys_cs_handle *buf); /** * Flush a command stream. * * \param cs A command stream to flush. + * \param flags, R300_FLUSH_ASYNC or 0. */ - void (*cs_flush)(struct r300_winsys_cs *cs); + void (*cs_flush)(struct r300_winsys_cs *cs, unsigned flags); /** * Set a flush callback which is called from winsys when flush is @@ -289,28 +279,17 @@ struct r300_winsys_screen { * \param user A user pointer that will be passed to the flush callback. */ void (*cs_set_flush)(struct r300_winsys_cs *cs, - void (*flush)(void *), + void (*flush)(void *ctx, unsigned flags), void *user); /** - * Reset the list of buffer objects to validate, usually called - * prior to adding buffer objects for validation. - * - * \param cs A command stream to reset buffers for. - */ - void (*cs_reset_buffers)(struct r300_winsys_cs *cs); - - /** - * Return TRUE if a buffer is referenced by a command stream or by hardware - * (i.e. is busy), based on the domain parameter. + * Return TRUE if a buffer is referenced by a command stream. * * \param cs A command stream. * \param buf A winsys buffer. - * \param domain A bitmask of the R300_REF_* enums. */ boolean (*cs_is_buffer_referenced)(struct r300_winsys_cs *cs, - struct r300_winsys_cs_buffer *buf, - enum r300_reference_domain domain); + struct r300_winsys_cs_handle *buf); }; #endif /* R300_WINSYS_H */ |