diff options
Diffstat (limited to 'src/mesa')
57 files changed, 1310 insertions, 904 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index 1d2e953eb1..5986cbffad 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -194,6 +194,16 @@ GLboolean brw_miptree_layout(struct intel_context *intel, } } + /* The 965's sampler lays cachelines out according to how accesses + * in the texture surfaces run, so they may be "vertical" through + * memory. As a result, the docs say in Surface Padding Requirements: + * Sampling Engine Surfaces that two extra rows of padding are required. + * We don't know of similar requirements for pre-965, but given that + * those docs are silent on padding requirements in general, let's play + * it safe. + */ + if (mt->target == GL_TEXTURE_CUBE_MAP) + mt->total_height += 2; break; } diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c index 497f796764..068a3f3379 100644 --- a/src/mesa/drivers/dri/intel/intel_regions.c +++ b/src/mesa/drivers/dri/intel/intel_regions.c @@ -181,10 +181,20 @@ intel_region_alloc(struct intel_context *intel, dri_bo *buffer; struct intel_region *region; + /* If we're tiled, our allocations are in 8 or 32-row blocks, so + * failure to align our height means that we won't allocate enough pages. + * + * If we're untiled, we still have to align to 2 rows high because the + * data port accesses 2x2 blocks even if the bottom row isn't to be + * rendered, so failure to align means we could walk off the end of the + * GTT and fault. + */ if (tiling == I915_TILING_X) height = ALIGN(height, 8); else if (tiling == I915_TILING_Y) height = ALIGN(height, 32); + else + height = ALIGN(height, 2); if (expect_accelerated_upload) { buffer = drm_intel_bo_alloc_for_render(intel->bufmgr, "region", diff --git a/src/mesa/drivers/dri/r200/Makefile b/src/mesa/drivers/dri/r200/Makefile index e81a1b38ac..2114ce55c1 100644 --- a/src/mesa/drivers/dri/r200/Makefile +++ b/src/mesa/drivers/dri/r200/Makefile @@ -27,7 +27,8 @@ RADEON_COMMON_SOURCES = \ radeon_cs_legacy.c \ radeon_mipmap_tree.c \ radeon_span.c \ - radeon_fbo.c + radeon_fbo.c \ + radeon_queryobj.c DRIVER_SOURCES = r200_context.c \ diff --git a/src/mesa/drivers/dri/r200/r200_cmdbuf.c b/src/mesa/drivers/dri/r200/r200_cmdbuf.c index 14d6bc19c9..e63935378e 100644 --- a/src/mesa/drivers/dri/r200/r200_cmdbuf.c +++ b/src/mesa/drivers/dri/r200/r200_cmdbuf.c @@ -219,6 +219,9 @@ void r200FlushElts(GLcontext *ctx) radeon_bo_unref(rmesa->radeon.tcl.elt_dma_bo); rmesa->radeon.tcl.elt_dma_bo = NULL; + if (R200_ELT_BUF_SZ > elt_used) + radeonReturnDmaRegion(&rmesa->radeon, R200_ELT_BUF_SZ - elt_used); + if (R200_DEBUG & DEBUG_SYNC) { fprintf(stderr, "%s: Syncing\n", __FUNCTION__); radeonFinish( rmesa->radeon.glCtx ); @@ -240,22 +243,13 @@ GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa, radeonEmitState(&rmesa->radeon); - rmesa->radeon.tcl.elt_dma_bo = radeon_bo_open(rmesa->radeon.radeonScreen->bom, - 0, R200_ELT_BUF_SZ, 4, - RADEON_GEM_DOMAIN_GTT, 0); - rmesa->radeon.tcl.elt_dma_offset = 0; + radeonAllocDmaRegion(&rmesa->radeon, &rmesa->radeon.tcl.elt_dma_bo, + &rmesa->radeon.tcl.elt_dma_offset, R200_ELT_BUF_SZ, 4); rmesa->tcl.elt_used = min_nr * 2; - ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, rmesa->radeon.tcl.elt_dma_bo, - RADEON_GEM_DOMAIN_GTT, 0); - if (ret) { - fprintf(stderr,"failure to revalidate BOs - badness\n"); - } - radeon_bo_map(rmesa->radeon.tcl.elt_dma_bo, 1); retval = rmesa->radeon.tcl.elt_dma_bo->ptr + rmesa->radeon.tcl.elt_dma_offset; - if (R200_DEBUG & DEBUG_PRIMS) fprintf(stderr, "%s: header prim %x \n", __FUNCTION__, primitive); diff --git a/src/mesa/drivers/dri/r200/r200_state.c b/src/mesa/drivers/dri/r200/r200_state.c index 5a6fd20d8c..ffc1a95745 100644 --- a/src/mesa/drivers/dri/r200/r200_state.c +++ b/src/mesa/drivers/dri/r200/r200_state.c @@ -2289,8 +2289,11 @@ static GLboolean r200ValidateBuffers(GLcontext *ctx) { r200ContextPtr rmesa = R200_CONTEXT(ctx); struct radeon_renderbuffer *rrb; + struct radeon_dma_bo *dma_bo; int i, ret; + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "%s\n", __FUNCTION__); radeon_cs_space_reset_bos(rmesa->radeon.cmdbuf.cs); rrb = radeon_get_colorbuffer(&rmesa->radeon); @@ -2323,9 +2326,12 @@ static GLboolean r200ValidateBuffers(GLcontext *ctx) RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); } - ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, rmesa->radeon.dma.current, RADEON_GEM_DOMAIN_GTT, 0); - if (ret) - return GL_FALSE; + dma_bo = first_elem(&rmesa->radeon.dma.reserved); + { + ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, dma_bo->bo, RADEON_GEM_DOMAIN_GTT, 0); + if (ret) + return GL_FALSE; + } return GL_TRUE; } diff --git a/src/mesa/drivers/dri/r200/r200_swtcl.c b/src/mesa/drivers/dri/r200/r200_swtcl.c index 83e70b586d..1b23891140 100644 --- a/src/mesa/drivers/dri/r200/r200_swtcl.c +++ b/src/mesa/drivers/dri/r200/r200_swtcl.c @@ -39,6 +39,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/image.h" #include "main/imports.h" #include "main/macros.h" +#include "main/simple_list.h" #include "swrast/s_context.h" #include "swrast/s_fog.h" @@ -275,7 +276,7 @@ void r200_swtcl_flush(GLcontext *ctx, uint32_t current_offset) radeonEmitState(&rmesa->radeon); r200EmitVertexAOS( rmesa, rmesa->radeon.swtcl.vertex_size, - rmesa->radeon.dma.current, + first_elem(&rmesa->radeon.dma.reserved)->bo, current_offset); diff --git a/src/mesa/drivers/dri/r200/r200_tcl.c b/src/mesa/drivers/dri/r200/r200_tcl.c index 580370933e..ca9a8dbf8c 100644 --- a/src/mesa/drivers/dri/r200/r200_tcl.c +++ b/src/mesa/drivers/dri/r200/r200_tcl.c @@ -146,7 +146,7 @@ static GLushort *r200AllocElts( r200ContextPtr rmesa, GLuint nr ) rmesa->tcl.elt_used + nr*2 < R200_ELT_BUF_SZ) { GLushort *dest = (GLushort *)(rmesa->radeon.tcl.elt_dma_bo->ptr + - rmesa->tcl.elt_used); + rmesa->radeon.tcl.elt_dma_offset + rmesa->tcl.elt_used); rmesa->tcl.elt_used += nr*2; diff --git a/src/mesa/drivers/dri/r200/radeon_queryobj.c b/src/mesa/drivers/dri/r200/radeon_queryobj.c new file mode 120000 index 0000000000..1d6ebc1c48 --- /dev/null +++ b/src/mesa/drivers/dri/r200/radeon_queryobj.c @@ -0,0 +1 @@ +../radeon/radeon_queryobj.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r200/radeon_queryobj.h b/src/mesa/drivers/dri/r200/radeon_queryobj.h new file mode 120000 index 0000000000..8f6f842b0a --- /dev/null +++ b/src/mesa/drivers/dri/r200/radeon_queryobj.h @@ -0,0 +1 @@ +../radeon/radeon_queryobj.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile index 77b3d168f3..188efcb7a0 100644 --- a/src/mesa/drivers/dri/r300/Makefile +++ b/src/mesa/drivers/dri/r300/Makefile @@ -37,7 +37,8 @@ RADEON_COMMON_SOURCES = \ radeon_mipmap_tree.c \ radeon_span.c \ radeon_fbo.c \ - radeon_buffer_objects.c + radeon_buffer_objects.c \ + radeon_queryobj.c DRIVER_SOURCES = \ radeon_screen.c \ @@ -54,7 +55,6 @@ DRIVER_SOURCES = \ r300_shader.c \ r300_emit.c \ r300_swtcl.c \ - r300_queryobj.c \ $(RADEON_COMMON_SOURCES) \ $(EGL_SOURCES) \ $(CS_SOURCES) diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c index 39cc6953ba..980ef3eaea 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c @@ -128,24 +128,24 @@ static char* r300_vs_swiz_debug[] = { static void r300_vs_op_dump(uint32_t op) { - printf(" dst: %d%s op: ", + fprintf(stderr, " dst: %d%s op: ", (op >> 13) & 0x7f, r300_vs_dst_debug[(op >> 8) & 0x7]); if (op & 0x80) { if (op & 0x1) { - printf("PVS_MACRO_OP_2CLK_M2X_ADD\n"); + fprintf(stderr, "PVS_MACRO_OP_2CLK_M2X_ADD\n"); } else { - printf(" PVS_MACRO_OP_2CLK_MADD\n"); + fprintf(stderr, " PVS_MACRO_OP_2CLK_MADD\n"); } } else if (op & 0x40) { - printf("%s\n", r300_vs_me_ops[op & 0x1f]); + fprintf(stderr, "%s\n", r300_vs_me_ops[op & 0x1f]); } else { - printf("%s\n", r300_vs_ve_ops[op & 0x1f]); + fprintf(stderr, "%s\n", r300_vs_ve_ops[op & 0x1f]); } } static void r300_vs_src_dump(uint32_t src) { - printf(" reg: %d%s swiz: %s%s/%s%s/%s%s/%s%s\n", + fprintf(stderr, " reg: %d%s swiz: %s%s/%s%s/%s%s/%s%s\n", (src >> 5) & 0x7f, r300_vs_src_debug[src & 0x3], src & (1 << 25) ? "-" : " ", r300_vs_swiz_debug[(src >> 13) & 0x7], @@ -166,11 +166,11 @@ void r300_vertex_program_dump(struct r300_vertex_program_code * vs) unsigned offset = i*4; unsigned src; - printf("%d: op: 0x%08x", i, vs->body.d[offset]); + fprintf(stderr, "%d: op: 0x%08x", i, vs->body.d[offset]); r300_vs_op_dump(vs->body.d[offset]); for(src = 0; src < 3; ++src) { - printf(" src%i: 0x%08x", src, vs->body.d[offset+1+src]); + fprintf(stderr, " src%i: 0x%08x", src, vs->body.d[offset+1+src]); r300_vs_src_dump(vs->body.d[offset+1+src]); } } diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index bd46f9acf2..1ca9eacda1 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -54,6 +54,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_mipmap_tree.h" #include "r300_state.h" #include "radeon_reg.h" +#include "radeon_queryobj.h" /** # of dwords reserved for additional instructions that may need to be written * during flushing. @@ -430,6 +431,7 @@ static void emit_zstencil_format(GLcontext *ctx, struct radeon_state_atom * atom format = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL; } + BEGIN_BATCH_NO_AUTOSTATE(5); OUT_BATCH(atom->cmd[0]); atom->cmd[1] &= ~0xf; atom->cmd[1] |= format; @@ -437,6 +439,7 @@ static void emit_zstencil_format(GLcontext *ctx, struct radeon_state_atom * atom OUT_BATCH(atom->cmd[2]); OUT_BATCH(atom->cmd[3]); OUT_BATCH(atom->cmd[4]); + END_BATCH(); } static int check_always(GLcontext *ctx, struct radeon_state_atom *atom) @@ -791,6 +794,17 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.tex.border_color.cmd[R300_TEX_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_BORDER_COLOR_0, 0); + radeon_init_query_stateobj(&r300->radeon, R300_QUERYOBJ_CMDSIZE); + if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) { + r300->radeon.query.queryobj.cmd[R300_QUERYOBJ_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, RV530_FG_ZBREG_DEST, 1); + r300->radeon.query.queryobj.cmd[R300_QUERYOBJ_DATA_0] = RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL; + } else { + r300->radeon.query.queryobj.cmd[R300_QUERYOBJ_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_REG_DEST, 1); + r300->radeon.query.queryobj.cmd[R300_QUERYOBJ_DATA_0] = R300_RASTER_PIPE_SELECT_ALL; + } + r300->radeon.query.queryobj.cmd[R300_QUERYOBJ_CMD_1] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_ZPASS_DATA, 1); + r300->radeon.query.queryobj.cmd[R300_QUERYOBJ_DATA_1] = 0; + r300->radeon.hw.is_dirty = GL_TRUE; r300->radeon.hw.all_dirty = GL_TRUE; diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c index 91fa77a169..ca8021df16 100644 --- a/src/mesa/drivers/dri/r300/r300_context.c +++ b/src/mesa/drivers/dri/r300/r300_context.c @@ -64,11 +64,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_ioctl.h" #include "r300_tex.h" #include "r300_emit.h" -#include "r300_queryobj.h" #include "r300_swtcl.h" #include "radeon_bocs_wrapper.h" #include "radeon_buffer_objects.h" - +#include "radeon_queryobj.h" #include "vblank.h" #include "utils.h" @@ -234,6 +233,84 @@ static void r300_fallback(GLcontext *ctx, GLuint bit, GLboolean mode) r300->radeon.Fallback &= ~bit; } +static void r300_emit_query_finish(radeonContextPtr radeon) +{ + r300ContextPtr r300 = (r300ContextPtr)radeon; + struct radeon_query_object *query = radeon->query.current; + BATCH_LOCALS(radeon); + + BEGIN_BATCH_NO_AUTOSTATE(3 * 2 *r300->num_z_pipes + 2); + switch (r300->num_z_pipes) { + case 4: + OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_3); + OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_BATCH_RELOC(0, query->bo, query->curr_offset+3*sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0); + case 3: + OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_2); + OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_BATCH_RELOC(0, query->bo, query->curr_offset+2*sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0); + case 2: + if (r300->radeon.radeonScreen->chip_family <= CHIP_FAMILY_RV380) { + OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_3); + } else { + OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_1); + } + OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_BATCH_RELOC(0, query->bo, query->curr_offset+1*sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0); + case 1: + default: + OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_0); + OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_BATCH_RELOC(0, query->bo, query->curr_offset, 0, RADEON_GEM_DOMAIN_GTT, 0); + break; + } + OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_ALL); + END_BATCH(); + query->curr_offset += r300->num_z_pipes * sizeof(uint32_t); + assert(query->curr_offset < RADEON_QUERY_PAGE_SIZE); + query->emitted_begin = GL_FALSE; +} + +static void rv530_emit_query_finish_single_z(radeonContextPtr radeon) +{ + BATCH_LOCALS(radeon); + struct radeon_query_object *query = radeon->query.current; + + BEGIN_BATCH_NO_AUTOSTATE(8); + OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); + OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_BATCH_RELOC(0, query->bo, query->curr_offset, 0, RADEON_GEM_DOMAIN_GTT, 0); + OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); + END_BATCH(); + + query->curr_offset += sizeof(uint32_t); + assert(query->curr_offset < RADEON_QUERY_PAGE_SIZE); + query->emitted_begin = GL_FALSE; +} + +#if 0 +static void rv530_emit_query_finish_double_z(radeonContextPtr radeon) +{ + r300ContextPtr r300 = (r300ContextPtr)radeon; + BATCH_LOCALS(radeon); + struct radeon_query_object *query = radeon->query.current; + + BEGIN_BATCH_NO_AUTOSTATE(6); + OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); + OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_BATCH_RELOC(0, query->bo, query->curr_offset, 0, RADEON_GEM_DOMAIN_GTT, 0); + OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_1); + OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_BATCH_RELOC(0, query->bo, query->curr_offset + sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0); + OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); + END_BATCH(); + + query->curr_offset += 2 * sizeof(uint32_t); + assert(query->curr_offset < RADEON_QUERY_PAGE_SIZE); + query->emitted_begin = GL_FALSE; +} +#endif + static void r300_init_vtbl(radeonContextPtr radeon) { radeon->vtbl.get_lock = r300_get_lock; @@ -242,6 +319,12 @@ static void r300_init_vtbl(radeonContextPtr radeon) radeon->vtbl.swtcl_flush = r300_swtcl_flush; radeon->vtbl.pre_emit_atoms = r300_vtbl_pre_emit_atoms; radeon->vtbl.fallback = r300_fallback; + if (radeon->radeonScreen->chip_family == CHIP_FAMILY_RV530) + /* single Z gives me correct results on my hw need to check if we ever need + * double z */ + radeon->vtbl.emit_query_finish = rv530_emit_query_finish_single_z; + else + radeon->vtbl.emit_query_finish = r300_emit_query_finish; } static void r300InitConstValues(GLcontext *ctx, radeonScreenPtr screen) @@ -361,8 +444,7 @@ static void r300InitGLExtensions(GLcontext *ctx) _mesa_disable_extension(ctx, "GL_EXT_texture_compression_s3tc"); } - if (!r300->radeon.radeonScreen->drmSupportsOcclusionQueries || - !r300->options.hw_tcl_enabled) { + if (!r300->radeon.radeonScreen->drmSupportsOcclusionQueries) { _mesa_disable_extension(ctx, "GL_ARB_occlusion_query"); } } @@ -389,6 +471,7 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, r300ParseOptions(r300, screen); + r300->radeon.radeonScreen = screen; r300_init_vtbl(&r300->radeon); _mesa_init_driver_functions(&functions); @@ -396,7 +479,7 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, r300InitStateFuncs(&functions); r300InitTextureFuncs(&functions); r300InitShaderFuncs(&functions); - r300InitQueryObjFunctions(&functions); + radeonInitQueryObjFunctions(&functions); radeonInitBufferObjectFuncs(&functions); if (!radeonInitContext(&r300->radeon, &functions, @@ -453,8 +536,6 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, r300InitGLExtensions(ctx); - make_empty_list(&r300->query.not_flushed_head); - return GL_TRUE; } diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 3ba3426608..339b304558 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -51,22 +51,6 @@ typedef struct r300_context r300ContextRec; typedef struct r300_context *r300ContextPtr; -/* From http://gcc. gnu.org/onlinedocs/gcc-3.2.3/gcc/Variadic-Macros.html . - I suppose we could inline this and use macro to fetch out __LINE__ and stuff in case we run into trouble - with other compilers ... GLUE! -*/ -#define WARN_ONCE(a, ...) { \ - static int warn##__LINE__=1; \ - if(warn##__LINE__){ \ - fprintf(stderr, "*********************************WARN_ONCE*********************************\n"); \ - fprintf(stderr, "File %s function %s line %d\n", \ - __FILE__, __FUNCTION__, __LINE__); \ - fprintf(stderr, a, ## __VA_ARGS__);\ - fprintf(stderr, "***************************************************************************\n"); \ - warn##__LINE__=0;\ - } \ - } - #include "r300_vertprog.h" @@ -290,6 +274,12 @@ typedef struct r300_context *r300ContextPtr; #define R300_TEX_CMDSIZE (MAX_TEXTURE_UNITS+1) */ +#define R300_QUERYOBJ_CMD_0 0 +#define R300_QUERYOBJ_DATA_0 1 +#define R300_QUERYOBJ_CMD_1 2 +#define R300_QUERYOBJ_DATA_1 3 +#define R300_QUERYOBJ_CMDSIZE 4 + /** * Cache for hardware register state. */ @@ -380,7 +370,6 @@ struct r300_hw_state { struct radeon_state_atom border_color; } tex; struct radeon_state_atom txe; /* tex enable (4104) */ - radeonTexObj *textures[R300_MAX_TEXTURE_UNITS]; }; @@ -505,15 +494,6 @@ struct r300_index_buffer { GLuint count; }; -struct r300_query_object { - struct gl_query_object Base; - struct radeon_bo *bo; - int curr_offset; - GLboolean emitted_begin; - - /* Double linked list of not flushed query objects */ - struct r300_query_object *prev, *next; -}; /** * \brief R300 context structure. @@ -549,12 +529,6 @@ struct r300_context { uint32_t fallback; DECLARE_RENDERINPUTS(render_inputs_bitset); - - struct { - struct r300_query_object *current; - struct r300_query_object not_flushed_head; - } query; - int num_z_pipes; }; diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c index d524d60299..dbf5384d55 100644 --- a/src/mesa/drivers/dri/r300/r300_draw.c +++ b/src/mesa/drivers/dri/r300/r300_draw.c @@ -31,12 +31,12 @@ #include "main/state.h" #include "main/api_validate.h" #include "main/enums.h" +#include "main/simple_list.h" #include "r300_reg.h" #include "r300_context.h" #include "r300_emit.h" #include "r300_render.h" -#include "r300_queryobj.h" #include "r300_state.h" #include "r300_tex.h" @@ -114,7 +114,7 @@ static void r300FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer radeonAllocDmaRegion(&r300->radeon, &r300->ind_buf.bo, &r300->ind_buf.bo_offet, size, 4); - assert(r300->ind_buf.bo->ptr != NULL) + assert(r300->ind_buf.bo->ptr != NULL); out = (GLuint *)ADD_POINTERS(r300->ind_buf.bo->ptr, r300->ind_buf.bo_offset); for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) { @@ -511,7 +511,7 @@ static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *ar } r300->radeon.tcl.aos_count = vbuf->num_attribs; - ret = radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs, r300->radeon.dma.current, RADEON_GEM_DOMAIN_GTT, 0); + ret = radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs, first_elem(&r300->radeon.dma.reserved)->bo, RADEON_GEM_DOMAIN_GTT, 0); if (ret) r300SwitchFallback(ctx, R300_FALLBACK_INVALID_BUFFERS, GL_TRUE); } @@ -583,16 +583,12 @@ static GLboolean r300TryDrawPrims(GLcontext *ctx, r300EmitCacheFlush(r300); radeonEmitState(&r300->radeon); - r300EmitQueryBegin(ctx); - for (i = 0; i < nr_prims; ++i) { r300RunRenderPrimitive(ctx, prim[i].start, prim[i].start + prim[i].count, prim[i].mode); } r300EmitCacheFlush(r300); - r300EmitQueryEnd(ctx); - r300FreeData(ctx); return GL_TRUE; diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index da801f42e4..7ab6928247 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -57,7 +57,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_reg.h" #include "r300_emit.h" #include "r300_context.h" -#include "r300_queryobj.h" #include "vblank.h" @@ -755,19 +754,9 @@ static void r300Clear(GLcontext * ctx, GLbitfield mask) } } -static void r300Flush(GLcontext *ctx) -{ - r300ContextPtr r300 = R300_CONTEXT(ctx); - - radeonFlush(ctx); - - make_empty_list(&r300->query.not_flushed_head); -} - - void r300InitIoctlFuncs(struct dd_function_table *functions) { functions->Clear = r300Clear; functions->Finish = radeonFinish; - functions->Flush = r300Flush; + functions->Flush = radeonFlush; } diff --git a/src/mesa/drivers/dri/r300/r300_queryobj.c b/src/mesa/drivers/dri/r300/r300_queryobj.c deleted file mode 100644 index df1fb32ee7..0000000000 --- a/src/mesa/drivers/dri/r300/r300_queryobj.c +++ /dev/null @@ -1,250 +0,0 @@ -/* - * Copyright © 2008-2009 Maciej Cencora <m.cencora@gmail.com> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Maciej Cencora <m.cencora@gmail.com> - * - */ - -#include "r300_queryobj.h" -#include "r300_emit.h" - -#include "main/imports.h" -#include "main/simple_list.h" - -#define DDEBUG 0 - -#define PAGE_SIZE 4096 - -static void r300QueryGetResult(GLcontext *ctx, struct gl_query_object *q) -{ - struct r300_query_object *query = (struct r300_query_object *)q; - uint32_t *result; - int i; - - if (DDEBUG) fprintf(stderr, "%s: query id %d, result %d\n", __FUNCTION__, query->Base.Id, (int) query->Base.Result); - - radeon_bo_map(query->bo, GL_FALSE); - - result = query->bo->ptr; - - query->Base.Result = 0; - for (i = 0; i < query->curr_offset/sizeof(uint32_t); ++i) { - query->Base.Result += result[i]; - if (DDEBUG) fprintf(stderr, "result[%d] = %d\n", i, result[i]); - } - - radeon_bo_unmap(query->bo); -} - -static struct gl_query_object * r300NewQueryObject(GLcontext *ctx, GLuint id) -{ - struct r300_query_object *query; - - query = _mesa_calloc(sizeof(struct r300_query_object)); - - query->Base.Id = id; - query->Base.Result = 0; - query->Base.Active = GL_FALSE; - query->Base.Ready = GL_TRUE; - - if (DDEBUG) fprintf(stderr, "%s: query id %d\n", __FUNCTION__, query->Base.Id); - - return &query->Base; -} - -static void r300DeleteQuery(GLcontext *ctx, struct gl_query_object *q) -{ - struct r300_query_object *query = (struct r300_query_object *)q; - - if (DDEBUG) fprintf(stderr, "%s: query id %d\n", __FUNCTION__, q->Id); - - if (query->bo) { - radeon_bo_unref(query->bo); - } - - _mesa_free(query); -} - -static void r300BeginQuery(GLcontext *ctx, struct gl_query_object *q) -{ - r300ContextPtr r300 = R300_CONTEXT(ctx); - struct r300_query_object *query = (struct r300_query_object *)q; - - if (DDEBUG) fprintf(stderr, "%s: query id %d\n", __FUNCTION__, q->Id); - - assert(r300->query.current == NULL); - - if (!query->bo) { - query->bo = radeon_bo_open(r300->radeon.radeonScreen->bom, 0, PAGE_SIZE, PAGE_SIZE, RADEON_GEM_DOMAIN_GTT, 0); - } - query->curr_offset = 0; - - r300->query.current = query; - insert_at_tail(&r300->query.not_flushed_head, query); -} - -static void r300EndQuery(GLcontext *ctx, struct gl_query_object *q) -{ - r300ContextPtr r300 = R300_CONTEXT(ctx); - - if (DDEBUG) fprintf(stderr, "%s: query id %d\n", __FUNCTION__, q->Id); - - r300EmitQueryEnd(ctx); - - r300->query.current = NULL; -} - -static void r300WaitQuery(GLcontext *ctx, struct gl_query_object *q) -{ - r300ContextPtr r300 = R300_CONTEXT(ctx); - struct r300_query_object *tmp, *query = (struct r300_query_object *)q; - - /* If the cmdbuf with packets for this query hasn't been flushed yet, do it now */ - { - GLboolean found = GL_FALSE; - foreach(tmp, &r300->query.not_flushed_head) { - if (tmp == query) { - found = GL_TRUE; - break; - } - } - - if (found) - ctx->Driver.Flush(ctx); - } - - if (DDEBUG) fprintf(stderr, "%s: query id %d, bo %p, offset %d\n", __FUNCTION__, q->Id, query->bo, query->curr_offset); - - r300QueryGetResult(ctx, q); - - query->Base.Ready = GL_TRUE; -} - - -/** - * TODO: - * should check if bo is idle, bo there's no interface to do it - * just wait for result now - */ -static void r300CheckQuery(GLcontext *ctx, struct gl_query_object *q) -{ - if (DDEBUG) fprintf(stderr, "%s: query id %d\n", __FUNCTION__, q->Id); - - r300WaitQuery(ctx, q); -} - -void r300EmitQueryBegin(GLcontext *ctx) -{ - r300ContextPtr r300 = R300_CONTEXT(ctx); - struct r300_query_object *query = r300->query.current; - BATCH_LOCALS(&r300->radeon); - - if (!query || query->emitted_begin) - return; - - if (DDEBUG) fprintf(stderr, "%s: query id %d\n", __FUNCTION__, query->Base.Id); - - if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) { - BEGIN_BATCH_NO_AUTOSTATE(4); - OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); - OUT_BATCH_REGVAL(R300_ZB_ZPASS_DATA, 0); - END_BATCH(); - } else { - BEGIN_BATCH_NO_AUTOSTATE(4); - OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_ALL); - OUT_BATCH_REGVAL(R300_ZB_ZPASS_DATA, 0); - END_BATCH(); - } - - query->emitted_begin = GL_TRUE; -} - -void r300EmitQueryEnd(GLcontext *ctx) -{ - r300ContextPtr r300 = R300_CONTEXT(ctx); - struct r300_query_object *query = r300->query.current; - BATCH_LOCALS(&r300->radeon); - - if (!query || !query->emitted_begin) - return; - - if (DDEBUG) fprintf(stderr, "%s: query id %d, bo %p, offset %d\n", __FUNCTION__, query->Base.Id, query->bo, query->curr_offset); - - radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs, - query->bo, - 0, RADEON_GEM_DOMAIN_GTT); - - if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) { - BEGIN_BATCH_NO_AUTOSTATE(14); - OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); - OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_BATCH_RELOC(0, query->bo, query->curr_offset, 0, RADEON_GEM_DOMAIN_GTT, 0); - OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_1); - OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_BATCH_RELOC(0, query->bo, query->curr_offset + sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0); - OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); - END_BATCH(); - } else { - BEGIN_BATCH_NO_AUTOSTATE(3 * 2 *r300->num_z_pipes + 2); - switch (r300->num_z_pipes) { - case 4: - OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_3); - OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_BATCH_RELOC(0, query->bo, query->curr_offset+3*sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0); - case 3: - OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_2); - OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_BATCH_RELOC(0, query->bo, query->curr_offset+2*sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0); - case 2: - if (r300->radeon.radeonScreen->chip_family <= CHIP_FAMILY_RV380) { - OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_3); - } else { - OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_1); - } - OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_BATCH_RELOC(0, query->bo, query->curr_offset+1*sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0); - case 1: - default: - OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_0); - OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_BATCH_RELOC(0, query->bo, query->curr_offset, 0, RADEON_GEM_DOMAIN_GTT, 0); - break; - } - OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_ALL); - END_BATCH(); - } - - query->curr_offset += r300->num_z_pipes * sizeof(uint32_t); - assert(query->curr_offset < PAGE_SIZE); - query->emitted_begin = GL_FALSE; -} - -void r300InitQueryObjFunctions(struct dd_function_table *functions) -{ - functions->NewQueryObject = r300NewQueryObject; - functions->DeleteQuery = r300DeleteQuery; - functions->BeginQuery = r300BeginQuery; - functions->EndQuery = r300EndQuery; - functions->CheckQuery = r300CheckQuery; - functions->WaitQuery = r300WaitQuery; -} diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index 45330cda3c..e1a6fae57f 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -76,7 +76,6 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_tex.h" #include "r300_emit.h" #include "r300_fragprog_common.h" -#include "r300_queryobj.h" #include "r300_swtcl.h" /** diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 6081c33786..ce0666b901 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -471,7 +471,7 @@ static void r300SetEarlyZState(GLcontext * ctx) topZ = R300_ZTOP_DISABLE; else if (ctx->FragmentProgram._Current && ctx->FragmentProgram._Current->UsesKill) topZ = R300_ZTOP_DISABLE; - else if (r300->query.current) + else if (r300->radeon.query.current) topZ = R300_ZTOP_DISABLE; if (topZ != r300->hw.zstencil_format.cmd[2]) { diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c index a634cb5192..9d6f756879 100644 --- a/src/mesa/drivers/dri/r300/r300_swtcl.c +++ b/src/mesa/drivers/dri/r300/r300_swtcl.c @@ -39,6 +39,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_emit.h" #include "r300_tex.h" #include "r300_render.h" +#include "main/simple_list.h" #define EMIT_ATTR( ATTR, STYLE ) \ do { \ @@ -617,7 +618,7 @@ void r300_swtcl_flush(GLcontext *ctx, uint32_t current_offset) r300_emit_scissor(ctx); r300EmitVertexAOS(rmesa, rmesa->radeon.swtcl.vertex_size, - rmesa->radeon.dma.current, + first_elem(&rmesa->radeon.dma.reserved)->bo, current_offset); r300EmitVbufPrim(rmesa, diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c index 6f489ace7b..f030451b28 100644 --- a/src/mesa/drivers/dri/r300/r300_texstate.c +++ b/src/mesa/drivers/dri/r300/r300_texstate.c @@ -43,6 +43,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/teximage.h" #include "main/texobj.h" #include "main/enums.h" +#include "main/simple_list.h" #include "r300_context.h" #include "r300_state.h" @@ -323,7 +324,7 @@ GLboolean r300ValidateBuffers(GLcontext * ctx) RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); } - ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, rmesa->radeon.dma.current, RADEON_GEM_DOMAIN_GTT, 0); + ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, first_elem(&rmesa->radeon.dma.reserved)->bo, RADEON_GEM_DOMAIN_GTT, 0); if (ret) return GL_FALSE; return GL_TRUE; diff --git a/src/mesa/drivers/dri/r300/radeon_queryobj.c b/src/mesa/drivers/dri/r300/radeon_queryobj.c new file mode 120000 index 0000000000..1d6ebc1c48 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_queryobj.c @@ -0,0 +1 @@ +../radeon/radeon_queryobj.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_queryobj.h b/src/mesa/drivers/dri/r300/radeon_queryobj.h new file mode 120000 index 0000000000..8f6f842b0a --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_queryobj.h @@ -0,0 +1 @@ +../radeon/radeon_queryobj.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r600/Makefile b/src/mesa/drivers/dri/r600/Makefile index 5bdc1afbf0..3c3100ab91 100644 --- a/src/mesa/drivers/dri/r600/Makefile +++ b/src/mesa/drivers/dri/r600/Makefile @@ -36,7 +36,8 @@ RADEON_COMMON_SOURCES = \ radeon_cs_legacy.c \ radeon_mipmap_tree.c \ radeon_span.c \ - radeon_fbo.c + radeon_fbo.c \ + radeon_queryobj.c DRIVER_SOURCES = \ radeon_screen.c \ diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.c b/src/mesa/drivers/dri/r600/r600_cmdbuf.c index dc2fb0144a..38814b6d71 100644 --- a/src/mesa/drivers/dri/r600/r600_cmdbuf.c +++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.c @@ -129,10 +129,10 @@ int r600_cs_write_reloc(struct radeon_cs *cs, } relocs[i].indices = indices; relocs[i].reloc_indices = reloc_indices; - relocs[i].indices[relocs[i].cindices - 1] = cs->cdw - 1; - relocs[i].reloc_indices[relocs[i].cindices - 1] = cs->section_cdw; - cs->section_ndw += 2; + relocs[i].indices[relocs[i].cindices - 1] = cs->cdw; + relocs[i].reloc_indices[relocs[i].cindices - 1] = cs->cdw; cs->section_cdw += 2; + cs->cdw += 2; return 0; } @@ -156,10 +156,10 @@ int r600_cs_write_reloc(struct radeon_cs *cs, return -ENOMEM; } - relocs[cs->crelocs].indices[0] = cs->cdw - 1; - relocs[cs->crelocs].reloc_indices[0] = cs->section_cdw; - cs->section_ndw += 2; + relocs[cs->crelocs].indices[0] = cs->cdw; + relocs[cs->crelocs].reloc_indices[0] = cs->cdw; cs->section_cdw += 2; + cs->cdw += 2; relocs[cs->crelocs].cindices = 1; cs->relocs_total_size += radeon_bo_legacy_relocs_size(bo); cs->crelocs++; @@ -183,7 +183,14 @@ static int r600_cs_begin(struct radeon_cs *cs, return -EPIPE; } - if (cs->cdw + ndw + 32 > cs->ndw) { /* Left 32 DWORD (8 offset+pitch) spare room for reloc indices */ + cs->section = 1; + cs->section_ndw = ndw; + cs->section_cdw = 0; + cs->section_file = file; + cs->section_func = func; + cs->section_line = line; + + if (cs->cdw + ndw > cs->ndw) { uint32_t tmp, *ptr; int num = (ndw > 0x3FF) ? ndw : 0x3FF; @@ -196,13 +203,6 @@ static int r600_cs_begin(struct radeon_cs *cs, cs->ndw = tmp; } - cs->section = 1; - cs->section_ndw = 0; - cs->section_cdw = cs->cdw + ndw; /* start of reloc indices. */ - cs->section_file = file; - cs->section_func = func; - cs->section_line = line; - return 0; } @@ -219,8 +219,7 @@ static int r600_cs_end(struct radeon_cs *cs, } cs->section = 0; - if ( (cs->section_ndw + cs->cdw) != cs->section_cdw ) - { + if ( cs->section_ndw != cs->section_cdw ) { fprintf(stderr, "CS section size missmatch start at (%s,%s,%d) %d vs %d\n", cs->section_file, cs->section_func, cs->section_line, cs->section_ndw, cs->section_cdw); fprintf(stderr, "cs->section_ndw = %d, cs->cdw = %d, cs->section_cdw = %d \n", @@ -230,7 +229,6 @@ static int r600_cs_end(struct radeon_cs *cs, return -EPIPE; } - cs->cdw = cs->section_cdw; return 0; } @@ -453,12 +451,10 @@ struct radeon_cs_manager * r600_radeon_cs_manager_legacy_ctor(struct radeon_cont void r600InitCmdBuf(context_t *r600) /* from rcommonInitCmdBuf */ { - radeonContextPtr rmesa = &r600->radeon; - GLuint size; - rmesa->hw.max_state_size = 4000; /* rough estimate */ + radeonContextPtr rmesa = &r600->radeon; + GLuint size; - rmesa->hw.all_dirty = GL_TRUE; - rmesa->hw.is_dirty = GL_TRUE; + r600InitAtoms(r600); /* Initialize command buffer */ size = 256 * driQueryOptioni(&rmesa->optionCache, @@ -482,7 +478,7 @@ void r600InitCmdBuf(context_t *r600) /* from rcommonInitCmdBuf */ rmesa->cmdbuf.cs = radeon_cs_create(rmesa->cmdbuf.csm, size); assert(rmesa->cmdbuf.cs != NULL); rmesa->cmdbuf.size = size; - + if (!rmesa->radeonScreen->kernel_mm) { radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_VRAM, rmesa->radeonScreen->texSize[0]); radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_GTT, rmesa->radeonScreen->gartTextures.size); diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.h b/src/mesa/drivers/dri/r600/r600_cmdbuf.h index 5df0cf1ab6..06eddf2eee 100644 --- a/src/mesa/drivers/dri/r600/r600_cmdbuf.h +++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.h @@ -143,6 +143,9 @@ extern int r600_cs_write_reloc(struct radeon_cs *cs, static inline void r600_cs_write_dword(struct radeon_cs *cs, uint32_t dword) { cs->packets[cs->cdw++] = dword; + if (cs->section) { + cs->section_cdw++; + } } struct radeon_cs_manager * r600_radeon_cs_manager_legacy_ctor(struct radeon_context *ctx); @@ -175,7 +178,6 @@ struct radeon_cs_manager * r600_radeon_cs_manager_legacy_ctor(struct radeon_cont fprintf(stderr, "(%s:%s:%d) offset : %d\n", \ __FILE__, __FUNCTION__, __LINE__, offset); \ } \ - r600_cs_write_dword(b_l_rmesa->cmdbuf.cs, offset); \ r600_cs_write_reloc(b_l_rmesa->cmdbuf.cs, \ bo, rd, wd, flags); \ } while(0) diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c index 7009374b0c..4489064c0d 100644 --- a/src/mesa/drivers/dri/r600/r600_context.c +++ b/src/mesa/drivers/dri/r600/r600_context.c @@ -247,8 +247,6 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, */ _mesa_init_driver_functions(&functions); - r700InitChipObject(r600); /* let the eag... */ - r700InitStateFuncs(&functions); r600InitTextureFuncs(&functions); r700InitShaderFuncs(&functions); @@ -386,18 +384,4 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, return GL_TRUE; } -/* Clean our own things only, radeonDestroyContext will do every thing else. */ -void -r600DestroyContext (__DRIcontextPrivate * driContextPriv) -{ - GET_CURRENT_CONTEXT (ctx); - context_t *context = ctx ? R700_CONTEXT(ctx) : NULL; - - if (context) - FREE(context->hw.pStateList); - - radeonDestroyContext(driContextPriv); -} - - diff --git a/src/mesa/drivers/dri/r600/r600_context.h b/src/mesa/drivers/dri/r600/r600_context.h index 30ddce682c..a9b080baa3 100644 --- a/src/mesa/drivers/dri/r600/r600_context.h +++ b/src/mesa/drivers/dri/r600/r600_context.h @@ -55,28 +55,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. struct r600_context; typedef struct r600_context context_t; -GLboolean r700SendPSState(context_t *context); -GLboolean r700SendVSState(context_t *context); -GLboolean r700SendSQConfig(context_t *context); +extern GLboolean r700SendPSState(context_t *context); +extern GLboolean r700SendVSState(context_t *context); +extern GLboolean r700SendFSState(context_t *context); #include "main/mm.h" -/* From http://gcc. gnu.org/onlinedocs/gcc-3.2.3/gcc/Variadic-Macros.html . - I suppose we could inline this and use macro to fetch out __LINE__ and stuff in case we run into trouble - with other compilers ... GLUE! -*/ -#define WARN_ONCE(a, ...) { \ - static int warn##__LINE__=1; \ - if(warn##__LINE__){ \ - fprintf(stderr, "*********************************WARN_ONCE*********************************\n"); \ - fprintf(stderr, "File %s function %s line %d\n", \ - __FILE__, __FUNCTION__, __LINE__); \ - fprintf(stderr, a, ## __VA_ARGS__);\ - fprintf(stderr, "***************************************************************************\n"); \ - warn##__LINE__=0;\ - } \ - } - /************ DMA BUFFERS **************/ /* The blit width for texture uploads @@ -128,6 +112,22 @@ enum RIGHT_SHIFT = 2, }; +struct r600_hw_state { + struct radeon_state_atom sq; + struct radeon_state_atom db; + struct radeon_state_atom db_target; + struct radeon_state_atom sc; + struct radeon_state_atom cl; + struct radeon_state_atom ucp; + struct radeon_state_atom su; + struct radeon_state_atom cb; + struct radeon_state_atom cb_target; + struct radeon_state_atom sx; + struct radeon_state_atom vgt; + struct radeon_state_atom spi; + struct radeon_state_atom vpt; +}; + /** * \brief R600 context structure. */ @@ -137,6 +137,8 @@ struct r600_context { /* ------ */ R700_CHIP_CONTEXT hw; + struct r600_hw_state atoms; + /* Vertex buffers */ GLvector4f dummy_attrib[_TNL_ATTRIB_MAX]; @@ -147,22 +149,26 @@ struct r600_context { #define R700_CONTEXT(ctx) ((context_t *)(ctx->DriverCtx)) #define GL_CONTEXT(context) ((GLcontext *)(context->radeon.glCtx)) -extern void r600DestroyContext(__DRIcontextPrivate * driContextPriv); extern GLboolean r600CreateContext(const __GLcontextModes * glVisual, __DRIcontextPrivate * driContextPriv, void *sharedContextPrivate); #define R700_CONTEXT_STATES(context) ((R700_CHIP_CONTEXT *)(&context->hw)) -extern GLboolean r700InitChipObject(context_t *context); -extern GLboolean r700SendContextStates(context_t *context); -extern GLboolean r700SendViewportState(context_t *context, int id); -extern GLboolean r700SendRenderTargetState(context_t *context, int id); +#define R600_NEWPRIM( rmesa ) \ +do { \ + if ( rmesa->radeon.dma.flush ) \ + rmesa->radeon.dma.flush( rmesa->radeon.glCtx ); \ +} while (0) + +#define R600_STATECHANGE(r600, ATOM) \ +do { \ + R600_NEWPRIM(r600); \ + r600->atoms.ATOM.dirty = GL_TRUE; \ + r600->radeon.hw.is_dirty = GL_TRUE; \ +} while(0) + extern GLboolean r700SendTextureState(context_t *context); -extern GLboolean r700SendDepthTargetState(context_t *context); -extern GLboolean r700SendUCPState(context_t *context); -extern GLboolean r700SendFSState(context_t *context); -extern void r700EmitState(GLcontext * ctx); extern GLboolean r700SyncSurf(context_t *context, struct radeon_bo *pbo, @@ -178,6 +184,8 @@ extern void r700SetupVTXConstants(GLcontext * ctx, unsigned int stride, unsigned int Count); /* number of vectors in stream */ +extern void r600InitAtoms(context_t *context); + #define RADEON_D_CAPTURE 0 #define RADEON_D_PLAYBACK 1 #define RADEON_D_PLAYBACK_RAW 2 diff --git a/src/mesa/drivers/dri/r600/r600_emit.c b/src/mesa/drivers/dri/r600/r600_emit.c index b695ed9583..b0c7294682 100644 --- a/src/mesa/drivers/dri/r600/r600_emit.c +++ b/src/mesa/drivers/dri/r600/r600_emit.c @@ -49,7 +49,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. void r600EmitCacheFlush(context_t *rmesa) { - BATCH_LOCALS(&rmesa->radeon); } GLboolean r600EmitShader(GLcontext * ctx, diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c index ee9b64ee43..1057d7d8bb 100644 --- a/src/mesa/drivers/dri/r600/r600_texstate.c +++ b/src/mesa/drivers/dri/r600/r600_texstate.c @@ -43,6 +43,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/teximage.h" #include "main/texobj.h" #include "main/enums.h" +#include "main/simple_list.h" #include "r600_context.h" #include "r700_state.h" @@ -685,7 +686,7 @@ GLboolean r600ValidateBuffers(GLcontext * ctx) RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); } - ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, rmesa->radeon.dma.current, RADEON_GEM_DOMAIN_GTT, 0); + ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, first_elem(&rmesa->radeon.dma.reserved)->bo, RADEON_GEM_DOMAIN_GTT, 0); if (ret) return GL_FALSE; return GL_TRUE; diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c index 0fb355a0b6..2d68f021df 100644 --- a/src/mesa/drivers/dri/r600/r700_chip.c +++ b/src/mesa/drivers/dri/r600/r700_chip.c @@ -27,6 +27,7 @@ #include "main/imports.h" #include "main/glheader.h" +#include "main/simple_list.h" #include "r600_context.h" #include "r600_cmdbuf.h" @@ -40,222 +41,6 @@ #include "radeon_mipmap_tree.h" -#define LINK_STATES(reg) \ -do \ -{ \ - pStateListWork->puiValue = (unsigned int*)&(r700->reg); \ - pStateListWork->unOffset = mm##reg - ASIC_CONTEXT_BASE_INDEX; \ - pStateListWork->pNext = pStateListWork + 1; \ - pStateListWork++; \ -}while(0) - -GLboolean r700InitChipObject(context_t *context) -{ - ContextState * pStateListWork; - - R700_CHIP_CONTEXT *r700 = &context->hw; - - /* init state list */ - r700->pStateList = (ContextState*) MALLOC (sizeof(ContextState)*sizeof(R700_CHIP_CONTEXT)/sizeof(unsigned int)); - pStateListWork = r700->pStateList; - - // misc - LINK_STATES(TA_CNTL_AUX); - LINK_STATES(VC_ENHANCE); - LINK_STATES(SQ_DYN_GPR_CNTL_PS_FLUSH_REQ); - LINK_STATES(DB_DEBUG); - LINK_STATES(DB_WATERMARKS); - - // SC - LINK_STATES(PA_SC_SCREEN_SCISSOR_TL); - LINK_STATES(PA_SC_SCREEN_SCISSOR_BR); - LINK_STATES(PA_SC_WINDOW_OFFSET); - LINK_STATES(PA_SC_WINDOW_SCISSOR_TL); - LINK_STATES(PA_SC_WINDOW_SCISSOR_BR); - LINK_STATES(PA_SC_CLIPRECT_RULE); - LINK_STATES(PA_SC_CLIPRECT_0_TL); - LINK_STATES(PA_SC_CLIPRECT_0_BR); - LINK_STATES(PA_SC_CLIPRECT_1_TL); - LINK_STATES(PA_SC_CLIPRECT_1_BR); - LINK_STATES(PA_SC_CLIPRECT_2_TL); - LINK_STATES(PA_SC_CLIPRECT_2_BR); - LINK_STATES(PA_SC_CLIPRECT_3_TL); - LINK_STATES(PA_SC_CLIPRECT_3_BR); - LINK_STATES(PA_SC_EDGERULE); - LINK_STATES(PA_SC_GENERIC_SCISSOR_TL); - LINK_STATES(PA_SC_GENERIC_SCISSOR_BR); - LINK_STATES(PA_SC_LINE_STIPPLE); - LINK_STATES(PA_SC_MPASS_PS_CNTL); - LINK_STATES(PA_SC_MODE_CNTL); - LINK_STATES(PA_SC_LINE_CNTL); - LINK_STATES(PA_SC_AA_CONFIG); - LINK_STATES(PA_SC_AA_SAMPLE_LOCS_MCTX); - LINK_STATES(PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX); - LINK_STATES(PA_SC_AA_MASK); - - // SU - LINK_STATES(PA_SU_POINT_SIZE); - LINK_STATES(PA_SU_POINT_MINMAX); - LINK_STATES(PA_SU_LINE_CNTL); - LINK_STATES(PA_SU_SC_MODE_CNTL); - LINK_STATES(PA_SU_VTX_CNTL); - LINK_STATES(PA_SU_POLY_OFFSET_DB_FMT_CNTL); - LINK_STATES(PA_SU_POLY_OFFSET_CLAMP); - LINK_STATES(PA_SU_POLY_OFFSET_FRONT_SCALE); - LINK_STATES(PA_SU_POLY_OFFSET_FRONT_OFFSET); - LINK_STATES(PA_SU_POLY_OFFSET_BACK_SCALE); - LINK_STATES(PA_SU_POLY_OFFSET_BACK_OFFSET); - - // CL - LINK_STATES(PA_CL_CLIP_CNTL); - LINK_STATES(PA_CL_VTE_CNTL); - LINK_STATES(PA_CL_VS_OUT_CNTL); - LINK_STATES(PA_CL_NANINF_CNTL); - LINK_STATES(PA_CL_GB_VERT_CLIP_ADJ); - LINK_STATES(PA_CL_GB_VERT_DISC_ADJ); - LINK_STATES(PA_CL_GB_HORZ_CLIP_ADJ); - LINK_STATES(PA_CL_GB_HORZ_DISC_ADJ); - - // CB - LINK_STATES(CB_CLEAR_RED_R6XX); - LINK_STATES(CB_CLEAR_GREEN_R6XX); - LINK_STATES(CB_CLEAR_BLUE_R6XX); - LINK_STATES(CB_CLEAR_ALPHA_R6XX); - LINK_STATES(CB_TARGET_MASK); - LINK_STATES(CB_SHADER_MASK); - LINK_STATES(CB_BLEND_RED); - LINK_STATES(CB_BLEND_GREEN); - LINK_STATES(CB_BLEND_BLUE); - LINK_STATES(CB_BLEND_ALPHA); - LINK_STATES(CB_FOG_RED_R6XX); - LINK_STATES(CB_FOG_GREEN_R6XX); - LINK_STATES(CB_FOG_BLUE_R6XX); - LINK_STATES(CB_SHADER_CONTROL); - LINK_STATES(CB_COLOR_CONTROL); - LINK_STATES(CB_CLRCMP_CONTROL); - LINK_STATES(CB_CLRCMP_SRC); - LINK_STATES(CB_CLRCMP_DST); - LINK_STATES(CB_CLRCMP_MSK); - LINK_STATES(CB_BLEND_CONTROL); - - //DB - LINK_STATES(DB_HTILE_DATA_BASE); - LINK_STATES(DB_STENCIL_CLEAR); - LINK_STATES(DB_DEPTH_CLEAR); - LINK_STATES(DB_STENCILREFMASK); - LINK_STATES(DB_STENCILREFMASK_BF); - LINK_STATES(DB_DEPTH_CONTROL); - LINK_STATES(DB_SHADER_CONTROL); - LINK_STATES(DB_RENDER_CONTROL); - LINK_STATES(DB_RENDER_OVERRIDE); - LINK_STATES(DB_HTILE_SURFACE); - LINK_STATES(DB_ALPHA_TO_MASK); - - // SX - LINK_STATES(SX_MISC); - LINK_STATES(SX_ALPHA_TEST_CONTROL); - LINK_STATES(SX_ALPHA_REF); - - // VGT - LINK_STATES(VGT_MAX_VTX_INDX); - LINK_STATES(VGT_MIN_VTX_INDX); - LINK_STATES(VGT_INDX_OFFSET); - LINK_STATES(VGT_MULTI_PRIM_IB_RESET_INDX); - LINK_STATES(VGT_OUTPUT_PATH_CNTL); - LINK_STATES(VGT_HOS_CNTL); - LINK_STATES(VGT_HOS_MAX_TESS_LEVEL); - LINK_STATES(VGT_HOS_MIN_TESS_LEVEL); - LINK_STATES(VGT_HOS_REUSE_DEPTH); - LINK_STATES(VGT_GROUP_PRIM_TYPE); - LINK_STATES(VGT_GROUP_FIRST_DECR); - LINK_STATES(VGT_GROUP_DECR); - LINK_STATES(VGT_GROUP_VECT_0_CNTL); - LINK_STATES(VGT_GROUP_VECT_1_CNTL); - LINK_STATES(VGT_GROUP_VECT_0_FMT_CNTL); - LINK_STATES(VGT_GROUP_VECT_1_FMT_CNTL); - LINK_STATES(VGT_GS_MODE); - LINK_STATES(VGT_PRIMITIVEID_EN); - LINK_STATES(VGT_MULTI_PRIM_IB_RESET_EN); - LINK_STATES(VGT_INSTANCE_STEP_RATE_0); - LINK_STATES(VGT_INSTANCE_STEP_RATE_1); - LINK_STATES(VGT_STRMOUT_EN); - LINK_STATES(VGT_REUSE_OFF); - LINK_STATES(VGT_VTX_CNT_EN); - LINK_STATES(VGT_STRMOUT_BUFFER_EN); - - LINK_STATES(SQ_VTX_SEMANTIC_0); - LINK_STATES(SQ_VTX_SEMANTIC_1); - LINK_STATES(SQ_VTX_SEMANTIC_2); - LINK_STATES(SQ_VTX_SEMANTIC_3); - LINK_STATES(SQ_VTX_SEMANTIC_4); - LINK_STATES(SQ_VTX_SEMANTIC_5); - LINK_STATES(SQ_VTX_SEMANTIC_6); - LINK_STATES(SQ_VTX_SEMANTIC_7); - LINK_STATES(SQ_VTX_SEMANTIC_8); - LINK_STATES(SQ_VTX_SEMANTIC_9); - LINK_STATES(SQ_VTX_SEMANTIC_10); - LINK_STATES(SQ_VTX_SEMANTIC_11); - LINK_STATES(SQ_VTX_SEMANTIC_12); - LINK_STATES(SQ_VTX_SEMANTIC_13); - LINK_STATES(SQ_VTX_SEMANTIC_14); - LINK_STATES(SQ_VTX_SEMANTIC_15); - LINK_STATES(SQ_VTX_SEMANTIC_16); - LINK_STATES(SQ_VTX_SEMANTIC_17); - LINK_STATES(SQ_VTX_SEMANTIC_18); - LINK_STATES(SQ_VTX_SEMANTIC_19); - LINK_STATES(SQ_VTX_SEMANTIC_20); - LINK_STATES(SQ_VTX_SEMANTIC_21); - LINK_STATES(SQ_VTX_SEMANTIC_22); - LINK_STATES(SQ_VTX_SEMANTIC_23); - LINK_STATES(SQ_VTX_SEMANTIC_24); - LINK_STATES(SQ_VTX_SEMANTIC_25); - LINK_STATES(SQ_VTX_SEMANTIC_26); - LINK_STATES(SQ_VTX_SEMANTIC_27); - LINK_STATES(SQ_VTX_SEMANTIC_28); - LINK_STATES(SQ_VTX_SEMANTIC_29); - LINK_STATES(SQ_VTX_SEMANTIC_30); - LINK_STATES(SQ_VTX_SEMANTIC_31); - - // SPI - LINK_STATES(SPI_VS_OUT_ID_0); - LINK_STATES(SPI_VS_OUT_ID_1); - LINK_STATES(SPI_VS_OUT_ID_2); - LINK_STATES(SPI_VS_OUT_ID_3); - LINK_STATES(SPI_VS_OUT_ID_4); - LINK_STATES(SPI_VS_OUT_ID_5); - LINK_STATES(SPI_VS_OUT_ID_6); - LINK_STATES(SPI_VS_OUT_ID_7); - LINK_STATES(SPI_VS_OUT_ID_8); - LINK_STATES(SPI_VS_OUT_ID_9); - - LINK_STATES(SPI_VS_OUT_CONFIG); - LINK_STATES(SPI_THREAD_GROUPING); - LINK_STATES(SPI_PS_IN_CONTROL_0); - LINK_STATES(SPI_PS_IN_CONTROL_1); - LINK_STATES(SPI_INTERP_CONTROL_0); - LINK_STATES(SPI_INPUT_Z); - LINK_STATES(SPI_FOG_CNTL); - LINK_STATES(SPI_FOG_FUNC_SCALE); - LINK_STATES(SPI_FOG_FUNC_BIAS); - - // SQ - LINK_STATES(SQ_ESGS_RING_ITEMSIZE); - LINK_STATES(SQ_GSVS_RING_ITEMSIZE); - LINK_STATES(SQ_ESTMP_RING_ITEMSIZE); - LINK_STATES(SQ_GSTMP_RING_ITEMSIZE); - LINK_STATES(SQ_VSTMP_RING_ITEMSIZE); - LINK_STATES(SQ_PSTMP_RING_ITEMSIZE); - LINK_STATES(SQ_FBUF_RING_ITEMSIZE); - LINK_STATES(SQ_REDUC_RING_ITEMSIZE); - //LINK_STATES(SQ_GS_VERT_ITEMSIZE); - - pStateListWork->puiValue = (unsigned int*)&(r700->SQ_GS_VERT_ITEMSIZE); - pStateListWork->unOffset = mmSQ_GS_VERT_ITEMSIZE - ASIC_CONTEXT_BASE_INDEX; - pStateListWork->pNext = NULL; /* END OF STATE LIST */ - - return GL_TRUE; -} - GLboolean r700SendTextureState(context_t *context) { unsigned int i; @@ -276,11 +61,16 @@ GLboolean r700SendTextureState(context_t *context) RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, TC_ACTION_ENA_bit); - BEGIN_BATCH_NO_AUTOSTATE(9); + BEGIN_BATCH_NO_AUTOSTATE(9 + 4); R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); R600_OUT_BATCH(i * 7); R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE0); R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE1); + R600_OUT_BATCH(0); /* r700->textures[i]->SQ_TEX_RESOURCE2 */ + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE3); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE4); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE5); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE6); R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE2, bo, 0, @@ -289,9 +79,6 @@ GLboolean r700SendTextureState(context_t *context) bo, r700->textures[i]->SQ_TEX_RESOURCE3, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE4); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE5); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE6); END_BATCH(); BEGIN_BATCH_NO_AUTOSTATE(5); @@ -362,22 +149,21 @@ void r700SetupVTXConstants(GLcontext * ctx, SETfield(uSQ_VTX_CONSTANT_WORD6_0, SQ_TEX_VTX_VALID_BUFFER, SQ_TEX_RESOURCE_WORD6_0__TYPE_shift, SQ_TEX_RESOURCE_WORD6_0__TYPE_mask); - BEGIN_BATCH_NO_AUTOSTATE(9); + BEGIN_BATCH_NO_AUTOSTATE(9 + 2); R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); R600_OUT_BATCH((nStreamID + SQ_FETCH_RESOURCE_VS_OFFSET) * FETCH_RESOURCE_STRIDE); - - R600_OUT_BATCH_RELOC(uSQ_VTX_CONSTANT_WORD0_0, - paos->bo, - uSQ_VTX_CONSTANT_WORD0_0, - RADEON_GEM_DOMAIN_GTT, 0, 0); + R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD0_0); R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD1_0); R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD2_0); R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD3_0); R600_OUT_BATCH(0); R600_OUT_BATCH(0); R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD6_0); - + R600_OUT_BATCH_RELOC(uSQ_VTX_CONSTANT_WORD0_0, + paos->bo, + uSQ_VTX_CONSTANT_WORD0_0, + RADEON_GEM_DOMAIN_GTT, 0, 0); END_BATCH(); COMMIT_BATCH(); @@ -408,7 +194,6 @@ int r700SetupStreams(GLcontext * ctx) END_BATCH(); COMMIT_BATCH(); - context->radeon.tcl.aos_count = 0; for(i=0; i<VERT_ATTRIB_MAX; i++) { unBit = 1 << i; @@ -429,83 +214,16 @@ int r700SetupStreams(GLcontext * ctx) (unsigned int)context->radeon.tcl.aos[j].stride * 4, (unsigned int)context->radeon.tcl.aos[j].count); j++; - context->radeon.tcl.aos_count++; } } + context->radeon.tcl.aos_count = j; return R600_FALLBACK_NONE; } -GLboolean r700SendContextStates(context_t *context) -{ - BATCH_LOCALS(&context->radeon); - - R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); - - ContextState * pState = r700->pStateList; - ContextState * pInit; - unsigned int toSend; - unsigned int ui; - - while(NULL != pState) - { - toSend = 1; - - pInit = pState; - - while(NULL != pState->pNext) - { - if ((pState->pNext->unOffset - pState->unOffset) > 1) - { - break; - } - else - { - pState = pState->pNext; - toSend++; - } - } - - pState = pState->pNext; - - BEGIN_BATCH_NO_AUTOSTATE(toSend + 2); - R600_OUT_BATCH_REGSEQ(((pInit->unOffset + ASIC_CONTEXT_BASE_INDEX)<<2), toSend); - for(ui=0; ui<toSend; ui++) - { - R600_OUT_BATCH(*(pInit->puiValue)); - pInit = pInit->pNext; - }; - END_BATCH(); - }; - - /* todo: - * - split this into a separate function? - * - only emit the ones we use - */ - BEGIN_BATCH_NO_AUTOSTATE(2 + R700_MAX_SHADER_EXPORTS); - R600_OUT_BATCH_REGSEQ(SPI_PS_INPUT_CNTL_0, R700_MAX_SHADER_EXPORTS); - for(ui = 0; ui < R700_MAX_SHADER_EXPORTS; ui++) - R600_OUT_BATCH(r700->SPI_PS_INPUT_CNTL[ui].u32All); - END_BATCH(); - - if (context->radeon.radeonScreen->chip_family > CHIP_FAMILY_R600) { - for (ui = 0; ui < R700_MAX_RENDER_TARGETS; ui++) { - if (r700->render_target[ui].enabled) { - BEGIN_BATCH_NO_AUTOSTATE(3); - R600_OUT_BATCH_REGVAL(CB_BLEND0_CONTROL + (4 * ui), - r700->render_target[ui].CB_BLEND0_CONTROL.u32All); - END_BATCH(); - } - } - } - - COMMIT_BATCH(); - - return GL_TRUE; -} - -GLboolean r700SendDepthTargetState(context_t *context) +static void r700SendDepthTargetState(GLcontext *ctx, struct radeon_state_atom *atom) { + context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); struct radeon_renderbuffer *rrb; BATCH_LOCALS(&context->radeon); @@ -513,19 +231,20 @@ GLboolean r700SendDepthTargetState(context_t *context) rrb = radeon_get_depthbuffer(&context->radeon); if (!rrb || !rrb->bo) { fprintf(stderr, "no rrb\n"); - return GL_FALSE; + return; } - BEGIN_BATCH_NO_AUTOSTATE(8); + BEGIN_BATCH_NO_AUTOSTATE(8 + 2); R600_OUT_BATCH_REGSEQ(DB_DEPTH_SIZE, 2); R600_OUT_BATCH(r700->DB_DEPTH_SIZE.u32All); R600_OUT_BATCH(r700->DB_DEPTH_VIEW.u32All); R600_OUT_BATCH_REGSEQ(DB_DEPTH_BASE, 2); + R600_OUT_BATCH(r700->DB_DEPTH_BASE.u32All); + R600_OUT_BATCH(r700->DB_DEPTH_INFO.u32All); R600_OUT_BATCH_RELOC(r700->DB_DEPTH_BASE.u32All, rrb->bo, r700->DB_DEPTH_BASE.u32All, 0, RADEON_GEM_DOMAIN_VRAM, 0); - R600_OUT_BATCH(r700->DB_DEPTH_INFO.u32All); END_BATCH(); if ((context->radeon.radeonScreen->chip_family > CHIP_FAMILY_R600) && @@ -538,32 +257,31 @@ GLboolean r700SendDepthTargetState(context_t *context) COMMIT_BATCH(); - r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM, - DB_ACTION_ENA_bit | DB_DEST_BASE_ENA_bit); - - return GL_TRUE; } -GLboolean r700SendRenderTargetState(context_t *context, int id) +static void r700SendRenderTargetState(GLcontext *ctx, struct radeon_state_atom *atom) { + context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); struct radeon_renderbuffer *rrb; BATCH_LOCALS(&context->radeon); + int id = 0; rrb = radeon_get_colorbuffer(&context->radeon); if (!rrb || !rrb->bo) { fprintf(stderr, "no rrb\n"); - return GL_FALSE; + return; } if (id > R700_MAX_RENDER_TARGETS) - return GL_FALSE; + return; if (!r700->render_target[id].enabled) - return GL_FALSE; + return; - BEGIN_BATCH_NO_AUTOSTATE(3); + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); R600_OUT_BATCH_REGSEQ(CB_COLOR0_BASE + (4 * id), 1); + R600_OUT_BATCH(r700->render_target[id].CB_COLOR0_BASE.u32All); R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_BASE.u32All, rrb->bo, r700->render_target[id].CB_COLOR0_BASE.u32All, @@ -589,10 +307,6 @@ GLboolean r700SendRenderTargetState(context_t *context, int id) COMMIT_BATCH(); - r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM, - CB_ACTION_ENA_bit | (1 << (id + 6))); - - return GL_TRUE; } GLboolean r700SendPSState(context_t *context) @@ -608,8 +322,9 @@ GLboolean r700SendPSState(context_t *context) r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit); - BEGIN_BATCH_NO_AUTOSTATE(3); + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); R600_OUT_BATCH_REGSEQ(SQ_PGM_START_PS, 1); + R600_OUT_BATCH(r700->ps.SQ_PGM_START_PS.u32All); R600_OUT_BATCH_RELOC(r700->ps.SQ_PGM_START_PS.u32All, pbo, r700->ps.SQ_PGM_START_PS.u32All, @@ -624,6 +339,8 @@ GLboolean r700SendPSState(context_t *context) COMMIT_BATCH(); + r700->ps.dirty = GL_FALSE; + return GL_TRUE; } @@ -640,8 +357,9 @@ GLboolean r700SendVSState(context_t *context) r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit); - BEGIN_BATCH_NO_AUTOSTATE(3); + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); R600_OUT_BATCH_REGSEQ(SQ_PGM_START_VS, 1); + R600_OUT_BATCH(r700->vs.SQ_PGM_START_VS.u32All); R600_OUT_BATCH_RELOC(r700->vs.SQ_PGM_START_VS.u32All, pbo, r700->vs.SQ_PGM_START_VS.u32All, @@ -655,6 +373,8 @@ GLboolean r700SendVSState(context_t *context) COMMIT_BATCH(); + r700->vs.dirty = GL_FALSE; + return GL_TRUE; } @@ -680,8 +400,9 @@ GLboolean r700SendFSState(context_t *context) r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit); - BEGIN_BATCH_NO_AUTOSTATE(3); + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); R600_OUT_BATCH_REGSEQ(SQ_PGM_START_FS, 1); + R600_OUT_BATCH(r700->fs.SQ_PGM_START_FS.u32All); R600_OUT_BATCH_RELOC(r700->fs.SQ_PGM_START_FS.u32All, pbo, r700->fs.SQ_PGM_START_FS.u32All, @@ -695,19 +416,23 @@ GLboolean r700SendFSState(context_t *context) COMMIT_BATCH(); + r700->fs.dirty = GL_FALSE; + return GL_TRUE; } -GLboolean r700SendViewportState(context_t *context, int id) +static void r700SendViewportState(GLcontext *ctx, struct radeon_state_atom *atom) { + context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); BATCH_LOCALS(&context->radeon); + int id = 0; if (id > R700_MAX_VIEWPORTS) - return GL_FALSE; + return; if (!r700->viewport[id].enabled) - return GL_FALSE; + return; BEGIN_BATCH_NO_AUTOSTATE(16); R600_OUT_BATCH_REGSEQ(PA_SC_VPORT_SCISSOR_0_TL + (8 * id), 2); @@ -727,15 +452,15 @@ GLboolean r700SendViewportState(context_t *context, int id) COMMIT_BATCH(); - return GL_TRUE; } -GLboolean r700SendSQConfig(context_t *context) +static void r700SendSQConfig(GLcontext *ctx, struct radeon_state_atom *atom) { + context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); BATCH_LOCALS(&context->radeon); - BEGIN_BATCH_NO_AUTOSTATE(8); + BEGIN_BATCH_NO_AUTOSTATE(34); R600_OUT_BATCH_REGSEQ(SQ_CONFIG, 6); R600_OUT_BATCH(r700->sq_config.SQ_CONFIG.u32All); R600_OUT_BATCH(r700->sq_config.SQ_GPR_RESOURCE_MGMT_1.u32All); @@ -743,14 +468,31 @@ GLboolean r700SendSQConfig(context_t *context) R600_OUT_BATCH(r700->sq_config.SQ_THREAD_RESOURCE_MGMT.u32All); R600_OUT_BATCH(r700->sq_config.SQ_STACK_RESOURCE_MGMT_1.u32All); R600_OUT_BATCH(r700->sq_config.SQ_STACK_RESOURCE_MGMT_2.u32All); + + R600_OUT_BATCH_REGVAL(TA_CNTL_AUX, r700->TA_CNTL_AUX.u32All); + R600_OUT_BATCH_REGVAL(VC_ENHANCE, r700->VC_ENHANCE.u32All); + R600_OUT_BATCH_REGVAL(R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, r700->SQ_DYN_GPR_CNTL_PS_FLUSH_REQ.u32All); + R600_OUT_BATCH_REGVAL(DB_DEBUG, r700->DB_DEBUG.u32All); + R600_OUT_BATCH_REGVAL(DB_WATERMARKS, r700->DB_WATERMARKS.u32All); + + R600_OUT_BATCH_REGSEQ(SQ_ESGS_RING_ITEMSIZE, 9); + R600_OUT_BATCH(r700->SQ_ESGS_RING_ITEMSIZE.u32All); + R600_OUT_BATCH(r700->SQ_GSVS_RING_ITEMSIZE.u32All); + R600_OUT_BATCH(r700->SQ_ESTMP_RING_ITEMSIZE.u32All); + R600_OUT_BATCH(r700->SQ_GSTMP_RING_ITEMSIZE.u32All); + R600_OUT_BATCH(r700->SQ_VSTMP_RING_ITEMSIZE.u32All); + R600_OUT_BATCH(r700->SQ_PSTMP_RING_ITEMSIZE.u32All); + R600_OUT_BATCH(r700->SQ_FBUF_RING_ITEMSIZE.u32All); + R600_OUT_BATCH(r700->SQ_REDUC_RING_ITEMSIZE.u32All); + R600_OUT_BATCH(r700->SQ_GS_VERT_ITEMSIZE.u32All); END_BATCH(); - COMMIT_BATCH(); - return GL_TRUE; + COMMIT_BATCH(); } -GLboolean r700SendUCPState(context_t *context) +static void r700SendUCPState(GLcontext *ctx, struct radeon_state_atom *atom) { + context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); BATCH_LOCALS(&context->radeon); int i; @@ -767,7 +509,366 @@ GLboolean r700SendUCPState(context_t *context) COMMIT_BATCH(); } } +} - return GL_TRUE; +static void r700SendSPIState(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); + BATCH_LOCALS(&context->radeon); + unsigned int ui; + + BEGIN_BATCH_NO_AUTOSTATE(59 + R700_MAX_SHADER_EXPORTS); + + R600_OUT_BATCH_REGSEQ(SQ_VTX_SEMANTIC_0, 32); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_0.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_1.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_2.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_3.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_4.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_5.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_6.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_7.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_8.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_9.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_10.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_11.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_12.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_13.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_14.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_15.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_16.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_17.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_18.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_19.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_20.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_21.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_22.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_23.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_24.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_25.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_26.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_27.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_28.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_29.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_30.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_31.u32All); + + R600_OUT_BATCH_REGSEQ(SPI_VS_OUT_ID_0, 10); + R600_OUT_BATCH(r700->SPI_VS_OUT_ID_0.u32All); + R600_OUT_BATCH(r700->SPI_VS_OUT_ID_1.u32All); + R600_OUT_BATCH(r700->SPI_VS_OUT_ID_2.u32All); + R600_OUT_BATCH(r700->SPI_VS_OUT_ID_3.u32All); + R600_OUT_BATCH(r700->SPI_VS_OUT_ID_4.u32All); + R600_OUT_BATCH(r700->SPI_VS_OUT_ID_5.u32All); + R600_OUT_BATCH(r700->SPI_VS_OUT_ID_6.u32All); + R600_OUT_BATCH(r700->SPI_VS_OUT_ID_7.u32All); + R600_OUT_BATCH(r700->SPI_VS_OUT_ID_8.u32All); + R600_OUT_BATCH(r700->SPI_VS_OUT_ID_9.u32All); + + R600_OUT_BATCH_REGSEQ(SPI_VS_OUT_CONFIG, 9); + R600_OUT_BATCH(r700->SPI_VS_OUT_CONFIG.u32All); + R600_OUT_BATCH(r700->SPI_THREAD_GROUPING.u32All); + R600_OUT_BATCH(r700->SPI_PS_IN_CONTROL_0.u32All); + R600_OUT_BATCH(r700->SPI_PS_IN_CONTROL_1.u32All); + R600_OUT_BATCH(r700->SPI_INTERP_CONTROL_0.u32All); + R600_OUT_BATCH(r700->SPI_INPUT_Z.u32All); + R600_OUT_BATCH(r700->SPI_FOG_CNTL.u32All); + R600_OUT_BATCH(r700->SPI_FOG_FUNC_SCALE.u32All); + R600_OUT_BATCH(r700->SPI_FOG_FUNC_BIAS.u32All); + + R600_OUT_BATCH_REGSEQ(SPI_PS_INPUT_CNTL_0, R700_MAX_SHADER_EXPORTS); + for(ui = 0; ui < R700_MAX_SHADER_EXPORTS; ui++) + R600_OUT_BATCH(r700->SPI_PS_INPUT_CNTL[ui].u32All); + + END_BATCH(); + COMMIT_BATCH(); } +static void r700SendVGTState(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); + BATCH_LOCALS(&context->radeon); + + BEGIN_BATCH_NO_AUTOSTATE(41); + + R600_OUT_BATCH_REGSEQ(VGT_MAX_VTX_INDX, 4); + R600_OUT_BATCH(r700->VGT_MAX_VTX_INDX.u32All); + R600_OUT_BATCH(r700->VGT_MIN_VTX_INDX.u32All); + R600_OUT_BATCH(r700->VGT_INDX_OFFSET.u32All); + R600_OUT_BATCH(r700->VGT_MULTI_PRIM_IB_RESET_INDX.u32All); + + R600_OUT_BATCH_REGSEQ(VGT_OUTPUT_PATH_CNTL, 13); + R600_OUT_BATCH(r700->VGT_OUTPUT_PATH_CNTL.u32All); + R600_OUT_BATCH(r700->VGT_HOS_CNTL.u32All); + R600_OUT_BATCH(r700->VGT_HOS_MAX_TESS_LEVEL.u32All); + R600_OUT_BATCH(r700->VGT_HOS_MIN_TESS_LEVEL.u32All); + R600_OUT_BATCH(r700->VGT_HOS_REUSE_DEPTH.u32All); + R600_OUT_BATCH(r700->VGT_GROUP_PRIM_TYPE.u32All); + R600_OUT_BATCH(r700->VGT_GROUP_FIRST_DECR.u32All); + R600_OUT_BATCH(r700->VGT_GROUP_DECR.u32All); + R600_OUT_BATCH(r700->VGT_GROUP_VECT_0_CNTL.u32All); + R600_OUT_BATCH(r700->VGT_GROUP_VECT_1_CNTL.u32All); + R600_OUT_BATCH(r700->VGT_GROUP_VECT_0_FMT_CNTL.u32All); + R600_OUT_BATCH(r700->VGT_GROUP_VECT_1_FMT_CNTL.u32All); + R600_OUT_BATCH(r700->VGT_GS_MODE.u32All); + + R600_OUT_BATCH_REGVAL(VGT_PRIMITIVEID_EN, r700->VGT_PRIMITIVEID_EN.u32All); + R600_OUT_BATCH_REGVAL(VGT_MULTI_PRIM_IB_RESET_EN, r700->VGT_MULTI_PRIM_IB_RESET_EN.u32All); + R600_OUT_BATCH_REGVAL(VGT_INSTANCE_STEP_RATE_0, r700->VGT_INSTANCE_STEP_RATE_0.u32All); + R600_OUT_BATCH_REGVAL(VGT_INSTANCE_STEP_RATE_1, r700->VGT_INSTANCE_STEP_RATE_1.u32All); + + R600_OUT_BATCH_REGSEQ(VGT_STRMOUT_EN, 3); + R600_OUT_BATCH(r700->VGT_STRMOUT_EN.u32All); + R600_OUT_BATCH(r700->VGT_REUSE_OFF.u32All); + R600_OUT_BATCH(r700->VGT_VTX_CNT_EN.u32All); + + R600_OUT_BATCH_REGVAL(VGT_STRMOUT_BUFFER_EN, r700->VGT_STRMOUT_BUFFER_EN.u32All); + + END_BATCH(); + COMMIT_BATCH(); +} + +static void r700SendSXState(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); + BATCH_LOCALS(&context->radeon); + + BEGIN_BATCH_NO_AUTOSTATE(9); + R600_OUT_BATCH_REGVAL(SX_MISC, r700->SX_MISC.u32All); + R600_OUT_BATCH_REGVAL(SX_ALPHA_TEST_CONTROL, r700->SX_ALPHA_TEST_CONTROL.u32All); + R600_OUT_BATCH_REGVAL(SX_ALPHA_REF, r700->SX_ALPHA_REF.u32All); + END_BATCH(); + COMMIT_BATCH(); +} + +static void r700SendDBState(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); + BATCH_LOCALS(&context->radeon); + + BEGIN_BATCH_NO_AUTOSTATE(27); + R600_OUT_BATCH_REGVAL(DB_HTILE_DATA_BASE, r700->DB_HTILE_DATA_BASE.u32All); + + R600_OUT_BATCH_REGSEQ(DB_STENCIL_CLEAR, 2); + R600_OUT_BATCH(r700->DB_STENCIL_CLEAR.u32All); + R600_OUT_BATCH(r700->DB_DEPTH_CLEAR.u32All); + + R600_OUT_BATCH_REGSEQ(DB_STENCILREFMASK, 2); + R600_OUT_BATCH(r700->DB_STENCILREFMASK.u32All); + R600_OUT_BATCH(r700->DB_STENCILREFMASK_BF.u32All); + + R600_OUT_BATCH_REGVAL(DB_DEPTH_CONTROL, r700->DB_DEPTH_CONTROL.u32All); + R600_OUT_BATCH_REGVAL(DB_SHADER_CONTROL, r700->DB_SHADER_CONTROL.u32All); + + R600_OUT_BATCH_REGSEQ(DB_RENDER_CONTROL, 2); + R600_OUT_BATCH(r700->DB_RENDER_CONTROL.u32All); + R600_OUT_BATCH(r700->DB_RENDER_OVERRIDE.u32All); + + R600_OUT_BATCH_REGVAL(DB_HTILE_SURFACE, r700->DB_HTILE_SURFACE.u32All); + R600_OUT_BATCH_REGVAL(DB_ALPHA_TO_MASK, r700->DB_ALPHA_TO_MASK.u32All); + + END_BATCH(); + COMMIT_BATCH(); +} + +static void r700SendCBState(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); + BATCH_LOCALS(&context->radeon); + unsigned int ui; + + if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) { + BEGIN_BATCH_NO_AUTOSTATE(14); + R600_OUT_BATCH_REGSEQ(CB_CLEAR_RED, 4); + R600_OUT_BATCH(r700->CB_CLEAR_RED_R6XX.u32All); + R600_OUT_BATCH(r700->CB_CLEAR_GREEN_R6XX.u32All); + R600_OUT_BATCH(r700->CB_CLEAR_BLUE_R6XX.u32All); + R600_OUT_BATCH(r700->CB_CLEAR_ALPHA_R6XX.u32All); + R600_OUT_BATCH_REGSEQ(CB_FOG_RED, 3); + R600_OUT_BATCH(r700->CB_FOG_RED_R6XX.u32All); + R600_OUT_BATCH(r700->CB_FOG_GREEN_R6XX.u32All); + R600_OUT_BATCH(r700->CB_FOG_BLUE_R6XX.u32All); + /* R600 does not have per-MRT blend */ + R600_OUT_BATCH_REGVAL(CB_BLEND_CONTROL, r700->CB_BLEND_CONTROL.u32All); + END_BATCH(); + } + + BEGIN_BATCH_NO_AUTOSTATE(22); + R600_OUT_BATCH_REGSEQ(CB_TARGET_MASK, 2); + R600_OUT_BATCH(r700->CB_TARGET_MASK.u32All); + R600_OUT_BATCH(r700->CB_SHADER_MASK.u32All); + + R600_OUT_BATCH_REGSEQ(CB_BLEND_RED, 4); + R600_OUT_BATCH(r700->CB_BLEND_RED.u32All); + R600_OUT_BATCH(r700->CB_BLEND_GREEN.u32All); + R600_OUT_BATCH(r700->CB_BLEND_BLUE.u32All); + R600_OUT_BATCH(r700->CB_BLEND_ALPHA.u32All); + + R600_OUT_BATCH_REGVAL(R7xx_CB_SHADER_CONTROL, r700->CB_SHADER_CONTROL.u32All); + R600_OUT_BATCH_REGVAL(CB_COLOR_CONTROL, r700->CB_COLOR_CONTROL.u32All); + + R600_OUT_BATCH_REGSEQ(CB_CLRCMP_CONTROL, 4); + R600_OUT_BATCH(r700->CB_CLRCMP_CONTROL.u32All); + R600_OUT_BATCH(r700->CB_CLRCMP_SRC.u32All); + R600_OUT_BATCH(r700->CB_CLRCMP_DST.u32All); + R600_OUT_BATCH(r700->CB_CLRCMP_MSK.u32All); + END_BATCH(); + + if (context->radeon.radeonScreen->chip_family > CHIP_FAMILY_R600) { + for (ui = 0; ui < R700_MAX_RENDER_TARGETS; ui++) { + if (r700->render_target[ui].enabled) { + BEGIN_BATCH_NO_AUTOSTATE(3); + R600_OUT_BATCH_REGVAL(CB_BLEND0_CONTROL + (4 * ui), + r700->render_target[ui].CB_BLEND0_CONTROL.u32All); + END_BATCH(); + } + } + } + + COMMIT_BATCH(); + +} + +static void r700SendSUState(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); + BATCH_LOCALS(&context->radeon); + + BEGIN_BATCH_NO_AUTOSTATE(19); + R600_OUT_BATCH_REGVAL(PA_SU_SC_MODE_CNTL, r700->PA_SU_SC_MODE_CNTL.u32All); + + R600_OUT_BATCH_REGSEQ(PA_SU_POINT_SIZE, 4); + R600_OUT_BATCH(r700->PA_SU_POINT_SIZE.u32All); + R600_OUT_BATCH(r700->PA_SU_POINT_MINMAX.u32All); + R600_OUT_BATCH(r700->PA_SU_LINE_CNTL.u32All); + R600_OUT_BATCH(r700->PA_SU_VTX_CNTL.u32All); + + R600_OUT_BATCH_REGSEQ(PA_SU_POLY_OFFSET_DB_FMT_CNTL, 2); + R600_OUT_BATCH(r700->PA_SU_POLY_OFFSET_DB_FMT_CNTL.u32All); + R600_OUT_BATCH(r700->PA_SU_POLY_OFFSET_CLAMP.u32All); + + R600_OUT_BATCH_REGSEQ(PA_SU_POLY_OFFSET_FRONT_SCALE, 4); + R600_OUT_BATCH(r700->PA_SU_POLY_OFFSET_FRONT_SCALE.u32All); + R600_OUT_BATCH(r700->PA_SU_POLY_OFFSET_FRONT_OFFSET.u32All); + R600_OUT_BATCH(r700->PA_SU_POLY_OFFSET_BACK_SCALE.u32All); + R600_OUT_BATCH(r700->PA_SU_POLY_OFFSET_BACK_OFFSET.u32All); + + END_BATCH(); + COMMIT_BATCH(); + +} + +static void r700SendCLState(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); + BATCH_LOCALS(&context->radeon); + + BEGIN_BATCH_NO_AUTOSTATE(18); + R600_OUT_BATCH_REGVAL(PA_CL_CLIP_CNTL, r700->PA_CL_CLIP_CNTL.u32All); + R600_OUT_BATCH_REGVAL(PA_CL_VTE_CNTL, r700->PA_CL_VTE_CNTL.u32All); + R600_OUT_BATCH_REGVAL(PA_CL_VS_OUT_CNTL, r700->PA_CL_VS_OUT_CNTL.u32All); + R600_OUT_BATCH_REGVAL(PA_CL_NANINF_CNTL, r700->PA_CL_NANINF_CNTL.u32All); + + R600_OUT_BATCH_REGSEQ(PA_CL_GB_VERT_CLIP_ADJ, 4); + R600_OUT_BATCH(r700->PA_CL_GB_VERT_CLIP_ADJ.u32All); + R600_OUT_BATCH(r700->PA_CL_GB_VERT_DISC_ADJ.u32All); + R600_OUT_BATCH(r700->PA_CL_GB_HORZ_CLIP_ADJ.u32All); + R600_OUT_BATCH(r700->PA_CL_GB_HORZ_DISC_ADJ.u32All); + + END_BATCH(); + COMMIT_BATCH(); +} + +// XXX need to split this up +static void r700SendSCState(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); + BATCH_LOCALS(&context->radeon); + + BEGIN_BATCH_NO_AUTOSTATE(47); + R600_OUT_BATCH_REGSEQ(PA_SC_SCREEN_SCISSOR_TL, 2); + R600_OUT_BATCH(r700->PA_SC_SCREEN_SCISSOR_TL.u32All); + R600_OUT_BATCH(r700->PA_SC_SCREEN_SCISSOR_BR.u32All); + + R600_OUT_BATCH_REGSEQ(PA_SC_WINDOW_OFFSET, 13); + R600_OUT_BATCH(r700->PA_SC_WINDOW_OFFSET.u32All); + R600_OUT_BATCH(r700->PA_SC_WINDOW_SCISSOR_TL.u32All); + R600_OUT_BATCH(r700->PA_SC_WINDOW_SCISSOR_BR.u32All); + R600_OUT_BATCH(r700->PA_SC_CLIPRECT_RULE.u32All); + R600_OUT_BATCH(r700->PA_SC_CLIPRECT_0_TL.u32All); + R600_OUT_BATCH(r700->PA_SC_CLIPRECT_0_BR.u32All); + R600_OUT_BATCH(r700->PA_SC_CLIPRECT_1_TL.u32All); + R600_OUT_BATCH(r700->PA_SC_CLIPRECT_1_BR.u32All); + R600_OUT_BATCH(r700->PA_SC_CLIPRECT_2_TL.u32All); + R600_OUT_BATCH(r700->PA_SC_CLIPRECT_2_BR.u32All); + R600_OUT_BATCH(r700->PA_SC_CLIPRECT_3_TL.u32All); + R600_OUT_BATCH(r700->PA_SC_CLIPRECT_3_BR.u32All); + R600_OUT_BATCH(r700->PA_SC_EDGERULE.u32All); + + R600_OUT_BATCH_REGSEQ(PA_SC_GENERIC_SCISSOR_TL, 2); + R600_OUT_BATCH(r700->PA_SC_GENERIC_SCISSOR_TL.u32All); + R600_OUT_BATCH(r700->PA_SC_GENERIC_SCISSOR_BR.u32All); + + R600_OUT_BATCH_REGVAL(PA_SC_LINE_STIPPLE, r700->PA_SC_LINE_STIPPLE.u32All); + R600_OUT_BATCH_REGVAL(PA_SC_MPASS_PS_CNTL, r700->PA_SC_MPASS_PS_CNTL.u32All); + R600_OUT_BATCH_REGVAL(PA_SC_MODE_CNTL, r700->PA_SC_MODE_CNTL.u32All); + R600_OUT_BATCH_REGVAL(PA_SC_LINE_CNTL, r700->PA_SC_LINE_CNTL.u32All); + R600_OUT_BATCH_REGVAL(PA_SC_AA_CONFIG, r700->PA_SC_AA_CONFIG.u32All); + R600_OUT_BATCH_REGVAL(PA_SC_AA_SAMPLE_LOCS_MCTX, r700->PA_SC_AA_SAMPLE_LOCS_MCTX.u32All); + R600_OUT_BATCH_REGVAL(PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX, r700->PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX.u32All); + R600_OUT_BATCH_REGVAL(PA_SC_AA_MASK, r700->PA_SC_AA_MASK.u32All); + + END_BATCH(); + COMMIT_BATCH(); +} + +static int check_always(GLcontext *ctx, struct radeon_state_atom *atom) +{ + return atom->cmd_size; +} + +#define ALLOC_STATE( ATOM, SZ, EMIT ) \ +do { \ + context->atoms.ATOM.cmd_size = (SZ); \ + context->atoms.ATOM.cmd = NULL; \ + context->atoms.ATOM.name = #ATOM; \ + context->atoms.ATOM.idx = 0; \ + context->atoms.ATOM.check = check_always; \ + context->atoms.ATOM.dirty = GL_FALSE; \ + context->atoms.ATOM.emit = (EMIT); \ + context->radeon.hw.max_state_size += (SZ); \ + insert_at_tail(&context->radeon.hw.atomlist, &context->atoms.ATOM); \ +} while (0) + +void r600InitAtoms(context_t *context) +{ + + /* Setup the atom linked list */ + make_empty_list(&context->radeon.hw.atomlist); + context->radeon.hw.atomlist.name = "atom-list"; + + ALLOC_STATE(sq, 34, r700SendSQConfig); + ALLOC_STATE(db, 27, r700SendDBState); + ALLOC_STATE(db_target, 19, r700SendDepthTargetState); + ALLOC_STATE(sc, 47, r700SendSCState); + ALLOC_STATE(cl, 18, r700SendCLState); + ALLOC_STATE(ucp, 36, r700SendUCPState); + ALLOC_STATE(su, 19, r700SendSUState); + ALLOC_STATE(cb, 39, r700SendCBState); + ALLOC_STATE(cb_target, 32, r700SendRenderTargetState); + ALLOC_STATE(sx, 9, r700SendSXState); + ALLOC_STATE(vgt, 41, r700SendVGTState); + ALLOC_STATE(spi, (59 + R700_MAX_SHADER_EXPORTS), r700SendSPIState); + ALLOC_STATE(vpt, 16, r700SendViewportState); + + context->radeon.hw.is_dirty = GL_TRUE; + context->radeon.hw.all_dirty = GL_TRUE; +} diff --git a/src/mesa/drivers/dri/r600/r700_chip.h b/src/mesa/drivers/dri/r600/r700_chip.h index 4e89c75f2f..c0ec4b0dd5 100644 --- a/src/mesa/drivers/dri/r600/r700_chip.h +++ b/src/mesa/drivers/dri/r600/r700_chip.h @@ -188,6 +188,7 @@ typedef struct _RENDER_TARGET_STATE_STRUCT union UINT_FLOAT CB_COLOR0_MASK; /* 0xA040 */ union UINT_FLOAT CB_BLEND0_CONTROL; /* 0xA1E0 */ GLboolean enabled; + GLboolean dirty; } RENDER_TARGET_STATE_STRUCT; typedef struct _VIEWPORT_STATE_STRUCT @@ -203,6 +204,7 @@ typedef struct _VIEWPORT_STATE_STRUCT union UINT_FLOAT PA_CL_VPORT_ZSCALE; /* 0xA113 */ union UINT_FLOAT PA_CL_VPORT_ZOFFSET; /* 0xA114 */ GLboolean enabled; + GLboolean dirty; } VIEWPORT_STATE_STRUCT; typedef struct _UCP_STATE_STRUCT @@ -212,6 +214,7 @@ typedef struct _UCP_STATE_STRUCT union UINT_FLOAT PA_CL_UCP_0_Z; union UINT_FLOAT PA_CL_UCP_0_W; GLboolean enabled; + GLboolean dirty; } UCP_STATE_STRUCT; typedef struct _PS_STATE_STRUCT @@ -220,6 +223,7 @@ typedef struct _PS_STATE_STRUCT union UINT_FLOAT SQ_PGM_RESOURCES_PS ; /* 0xA214 */ union UINT_FLOAT SQ_PGM_EXPORTS_PS ; /* 0xA215 */ union UINT_FLOAT SQ_PGM_CF_OFFSET_PS ; /* 0xA233 */ + GLboolean dirty; } PS_STATE_STRUCT; typedef struct _VS_STATE_STRUCT @@ -227,6 +231,7 @@ typedef struct _VS_STATE_STRUCT union UINT_FLOAT SQ_PGM_START_VS ; /* 0xA216 */ union UINT_FLOAT SQ_PGM_RESOURCES_VS ; /* 0xA21A */ union UINT_FLOAT SQ_PGM_CF_OFFSET_VS ; /* 0xA234 */ + GLboolean dirty; } VS_STATE_STRUCT; typedef struct _GS_STATE_STRUCT @@ -234,6 +239,7 @@ typedef struct _GS_STATE_STRUCT union UINT_FLOAT SQ_PGM_START_GS ; /* 0xA21B */ union UINT_FLOAT SQ_PGM_RESOURCES_GS ; /* 0xA21F */ union UINT_FLOAT SQ_PGM_CF_OFFSET_GS ; /* 0xA235 */ + GLboolean dirty; } GS_STATE_STRUCT; typedef struct _ES_STATE_STRUCT @@ -241,6 +247,7 @@ typedef struct _ES_STATE_STRUCT union UINT_FLOAT SQ_PGM_START_ES ; /* 0xA220 */ union UINT_FLOAT SQ_PGM_RESOURCES_ES ; /* 0xA224 */ union UINT_FLOAT SQ_PGM_CF_OFFSET_ES ; /* 0xA236 */ + GLboolean dirty; } ES_STATE_STRUCT; typedef struct _FS_STATE_STRUCT @@ -248,6 +255,7 @@ typedef struct _FS_STATE_STRUCT union UINT_FLOAT SQ_PGM_START_FS ; /* 0xA225 */ union UINT_FLOAT SQ_PGM_RESOURCES_FS ; /* 0xA229 */ union UINT_FLOAT SQ_PGM_CF_OFFSET_FS ; /* 0xA237 */ + GLboolean dirty; } FS_STATE_STRUCT; typedef struct _SQ_CONFIG_STRUCT @@ -260,27 +268,14 @@ typedef struct _SQ_CONFIG_STRUCT union UINT_FLOAT SQ_STACK_RESOURCE_MGMT_2 ; /* 0x2305 */ } SQ_CONFIG_STRUCT; -typedef struct ContextState -{ - unsigned int * puiValue; - unsigned int unOffset; - struct ContextState * pNext; -} ContextState; - typedef struct _R700_CHIP_CONTEXT { - // misc - union UINT_FLOAT TA_CNTL_AUX ; /* 0x2542 */ - union UINT_FLOAT VC_ENHANCE ; /* 0x25C5 */ - union UINT_FLOAT SQ_DYN_GPR_CNTL_PS_FLUSH_REQ; /* 0x2363 */ - union UINT_FLOAT DB_DEBUG ; /* 0x260C */ - union UINT_FLOAT DB_WATERMARKS ; /* 0x260E */ - // DB union UINT_FLOAT DB_DEPTH_SIZE ; /* 0xA000 */ union UINT_FLOAT DB_DEPTH_VIEW ; /* 0xA001 */ union UINT_FLOAT DB_DEPTH_BASE ; /* 0xA003 */ union UINT_FLOAT DB_DEPTH_INFO ; /* 0xA004 */ + GLboolean db_target_dirty; union UINT_FLOAT DB_HTILE_DATA_BASE ; /* 0xA005 */ union UINT_FLOAT DB_STENCIL_CLEAR ; /* 0xA00A */ union UINT_FLOAT DB_DEPTH_CLEAR ; /* 0xA00B */ @@ -292,6 +287,7 @@ typedef struct _R700_CHIP_CONTEXT union UINT_FLOAT DB_ALPHA_TO_MASK ; /* 0xA351 */ union UINT_FLOAT DB_DEPTH_CONTROL ; /* 0xA200 */ union UINT_FLOAT DB_SHADER_CONTROL ; /* 0xA203 */ + GLboolean db_dirty; // SC union UINT_FLOAT PA_SC_SCREEN_SCISSOR_TL ; /* 0xA00C */ @@ -311,6 +307,8 @@ typedef struct _R700_CHIP_CONTEXT union UINT_FLOAT PA_SC_EDGERULE ; /* 0xA08C */ union UINT_FLOAT PA_SC_GENERIC_SCISSOR_TL ; /* 0xA090 */ union UINT_FLOAT PA_SC_GENERIC_SCISSOR_BR ; /* 0xA091 */ + GLboolean scissor_dirty; + union UINT_FLOAT PA_SC_LINE_STIPPLE ; /* 0xA283 */ union UINT_FLOAT PA_SC_LINE_CNTL ; /* 0xA300 */ union UINT_FLOAT PA_SC_AA_CONFIG ; /* 0xA301 */ @@ -319,6 +317,7 @@ typedef struct _R700_CHIP_CONTEXT union UINT_FLOAT PA_SC_AA_SAMPLE_LOCS_MCTX ; /* 0xA307 */ union UINT_FLOAT PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX; /* 0xA308 */ union UINT_FLOAT PA_SC_AA_MASK ; /* 0xA312 */ + GLboolean sc_dirty; // CL union UINT_FLOAT PA_CL_CLIP_CNTL ; /* 0xA204 */ @@ -329,6 +328,7 @@ typedef struct _R700_CHIP_CONTEXT union UINT_FLOAT PA_CL_GB_VERT_DISC_ADJ ; /* 0xA304 */ union UINT_FLOAT PA_CL_GB_HORZ_CLIP_ADJ ; /* 0xA305 */ union UINT_FLOAT PA_CL_GB_HORZ_DISC_ADJ ; /* 0xA306 */ + GLboolean cl_dirty; // SU union UINT_FLOAT PA_SU_SC_MODE_CNTL ; /* 0xA205 */ @@ -342,6 +342,7 @@ typedef struct _R700_CHIP_CONTEXT union UINT_FLOAT PA_SU_POLY_OFFSET_FRONT_OFFSET; /* 0xA381 */ union UINT_FLOAT PA_SU_POLY_OFFSET_BACK_SCALE; /* 0xA382 */ union UINT_FLOAT PA_SU_POLY_OFFSET_BACK_OFFSET; /* 0xA383 */ + GLboolean su_dirty; VIEWPORT_STATE_STRUCT viewport[R700_MAX_VIEWPORTS]; UCP_STATE_STRUCT ucp[R700_MAX_UCP]; @@ -367,12 +368,14 @@ typedef struct _R700_CHIP_CONTEXT union UINT_FLOAT CB_CLRCMP_DST ; /* 0xA30E */ union UINT_FLOAT CB_CLRCMP_MSK ; /* 0xA30F */ union UINT_FLOAT CB_BLEND_CONTROL ; /* 0xABD0 */ + GLboolean cb_dirty; RENDER_TARGET_STATE_STRUCT render_target[R700_MAX_RENDER_TARGETS]; // SX union UINT_FLOAT SX_MISC ; /* 0xA0D4 */ union UINT_FLOAT SX_ALPHA_TEST_CONTROL ; /* 0xA104 */ union UINT_FLOAT SX_ALPHA_REF ; /* 0xA10E */ + GLboolean sx_dirty; // VGT union UINT_FLOAT VGT_MAX_VTX_INDX ; /* 0xA100 */ @@ -400,6 +403,7 @@ typedef struct _R700_CHIP_CONTEXT union UINT_FLOAT VGT_REUSE_OFF ; /* 0xA2AD */ union UINT_FLOAT VGT_VTX_CNT_EN ; /* 0xA2AE */ union UINT_FLOAT VGT_STRMOUT_BUFFER_EN ; /* 0xA2C8 */ + GLboolean vgt_dirty; // SPI union UINT_FLOAT SPI_VS_OUT_ID_0 ; /* 0xA185 */ @@ -454,8 +458,8 @@ typedef struct _R700_CHIP_CONTEXT union UINT_FLOAT SQ_VTX_SEMANTIC_29 ; /* 0xA0FD */ union UINT_FLOAT SQ_VTX_SEMANTIC_30 ; /* 0xA0FE */ union UINT_FLOAT SQ_VTX_SEMANTIC_31 ; /* 0xA0FF */ - - union UINT_FLOAT SPI_PS_INPUT_CNTL[R700_MAX_SHADER_EXPORTS]; + union UINT_FLOAT SPI_PS_INPUT_CNTL[R700_MAX_SHADER_EXPORTS]; + GLboolean spi_dirty; // shaders PS_STATE_STRUCT ps; @@ -466,7 +470,12 @@ typedef struct _R700_CHIP_CONTEXT // SQ CONFIG SQ_CONFIG_STRUCT sq_config; - + // misc + union UINT_FLOAT TA_CNTL_AUX ; /* 0x2542 */ + union UINT_FLOAT VC_ENHANCE ; /* 0x25C5 */ + union UINT_FLOAT SQ_DYN_GPR_CNTL_PS_FLUSH_REQ; /* 0x2363 */ + union UINT_FLOAT DB_DEBUG ; /* 0x260C */ + union UINT_FLOAT DB_WATERMARKS ; /* 0x260E */ // SQ union UINT_FLOAT SQ_ESGS_RING_ITEMSIZE ; /* 0xA22A */ union UINT_FLOAT SQ_GSVS_RING_ITEMSIZE ; /* 0xA22B */ @@ -477,8 +486,7 @@ typedef struct _R700_CHIP_CONTEXT union UINT_FLOAT SQ_FBUF_RING_ITEMSIZE ; /* 0xA230 */ union UINT_FLOAT SQ_REDUC_RING_ITEMSIZE ; /* 0xA231 */ union UINT_FLOAT SQ_GS_VERT_ITEMSIZE ; /* 0xA232 */ - - ContextState* pStateList; + GLboolean sq_dirty; radeonTexObj* textures[R700_TEXTURE_NUMBERUNITS]; diff --git a/src/mesa/drivers/dri/r600/r700_clear.c b/src/mesa/drivers/dri/r600/r700_clear.c index 05d4af331e..6d4ea90ccc 100644 --- a/src/mesa/drivers/dri/r600/r700_clear.c +++ b/src/mesa/drivers/dri/r600/r700_clear.c @@ -46,12 +46,6 @@ static GLboolean r700ClearFast(context_t *context, GLbitfield mask) return GL_FALSE; } -#define R600_NEWPRIM( rmesa ) \ - do { \ - if ( rmesa->radeon.dma.flush ) \ - rmesa->radeon.dma.flush( rmesa->radeon.glCtx ); \ - } while (0) - void r700Clear(GLcontext * ctx, GLbitfield mask) { context_t *context = R700_CONTEXT(ctx); diff --git a/src/mesa/drivers/dri/r600/r700_ioctl.c b/src/mesa/drivers/dri/r600/r700_ioctl.c index 23cc128d6d..72a8978976 100644 --- a/src/mesa/drivers/dri/r600/r700_ioctl.c +++ b/src/mesa/drivers/dri/r600/r700_ioctl.c @@ -31,6 +31,7 @@ #include "main/imports.h" #include "main/macros.h" #include "main/context.h" +#include "main/simple_list.h" #include "swrast/swrast.h" #include "radeon_common.h" @@ -40,33 +41,10 @@ #include "r700_ioctl.h" #include "r700_clear.h" -static void r700Flush(GLcontext *ctx) -{ - radeonContextPtr radeon = RADEON_CONTEXT(ctx); - - if (RADEON_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "%s %d\n", __FUNCTION__, radeon->cmdbuf.cs->cdw); - - /* okay if we have no cmds in the buffer && - we have no DMA flush && - we have no DMA buffer allocated. - then no point flushing anything at all. - */ - if (!radeon->dma.flush && !radeon->cmdbuf.cs->cdw && !radeon->dma.current) - return; - - if (radeon->dma.flush) - radeon->dma.flush( ctx ); - - r700EmitState(ctx); - - if (radeon->cmdbuf.cs->cdw) - rcommonFlushCmdBuf(radeon, __FUNCTION__); -} void r700InitIoctlFuncs(struct dd_function_table *functions) { functions->Clear = r700Clear; functions->Finish = radeonFinish; - functions->Flush = r700Flush; + functions->Flush = radeonFlush; } diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c index 6985bd4ffa..f0cd357c76 100644 --- a/src/mesa/drivers/dri/r600/r700_render.c +++ b/src/mesa/drivers/dri/r600/r700_render.c @@ -166,15 +166,16 @@ GLboolean r700SyncSurf(context_t *context, else cp_coher_size = ((pbo->size + 255) >> 8); - BEGIN_BATCH_NO_AUTOSTATE(5); + BEGIN_BATCH_NO_AUTOSTATE(5 + 2); R600_OUT_BATCH(CP_PACKET3(R600_IT_SURFACE_SYNC, 3)); R600_OUT_BATCH(sync_type); R600_OUT_BATCH(cp_coher_size); + R600_OUT_BATCH(0); + R600_OUT_BATCH(10); R600_OUT_BATCH_RELOC(0, pbo, 0, read_domain, write_domain, 0); // ??? - R600_OUT_BATCH(10); END_BATCH(); COMMIT_BATCH(); @@ -331,42 +332,28 @@ static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim } -void r700EmitState(GLcontext * ctx) -{ - context_t *context = R700_CONTEXT(ctx); - radeonContextPtr radeon = &context->radeon; - - if (radeon->cmdbuf.cs->cdw && !radeon->hw.is_dirty && !radeon->hw.all_dirty) - return; - - rcommonEnsureCmdBufSpace(&context->radeon, - context->radeon.hw.max_state_size, __FUNCTION__); - - r700SendSQConfig(context); - - r700SendUCPState(context); - r700SendContextStates(context); - r700SendViewportState(context, 0); - r700SendRenderTargetState(context, 0); - r700SendDepthTargetState(context); - -} - static GLboolean r700RunRender(GLcontext * ctx, struct tnl_pipeline_stage *stage) { context_t *context = R700_CONTEXT(ctx); - unsigned int i; + radeonContextPtr radeon = &context->radeon; + unsigned int i, ind_count = 0, id = 0; TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *vb = &tnl->vb; + struct radeon_renderbuffer *rrb; - r700Start3D(context); + for (i = 0; i < vb->PrimitiveCount; i++) + ind_count += vb->Primitive[i].count + 10; + + /* just an estimate, need to properly calculate this */ + rcommonEnsureCmdBufSpace(&context->radeon, + radeon->hw.max_state_size + ind_count + 1000, __FUNCTION__); + r700Start3D(context); r700UpdateShaders(ctx); r700SetScissor(context); r700SetupShaders(ctx); - - r700EmitState(ctx); + radeonEmitState(radeon); /* richard test code */ for (i = 0; i < vb->PrimitiveCount; i++) { @@ -379,6 +366,16 @@ static GLboolean r700RunRender(GLcontext * ctx, /* Flush render op cached for last several quads. */ r700WaitForIdleClean(context); + rrb = radeon_get_colorbuffer(&context->radeon); + if (!rrb || !rrb->bo) + r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM, + CB_ACTION_ENA_bit | (1 << (id + 6))); + + rrb = radeon_get_depthbuffer(&context->radeon); + if (!rrb || !rrb->bo) + r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM, + DB_ACTION_ENA_bit | DB_DEST_BASE_ENA_bit); + radeonReleaseArrays(ctx, ~0); return GL_FALSE; diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c index 835b5e18c2..6b44cc0ceb 100644 --- a/src/mesa/drivers/dri/r600/r700_state.c +++ b/src/mesa/drivers/dri/r600/r700_state.c @@ -148,8 +148,15 @@ void r700UpdateViewportOffset(GLcontext * ctx) //------------------ GLfloat tx = v[MAT_TX] + xoffset; GLfloat ty = (-v[MAT_TY]) + yoffset; - r700->viewport[id].PA_CL_VPORT_XOFFSET.f32All = tx; - r700->viewport[id].PA_CL_VPORT_YOFFSET.f32All = ty; + if (r700->viewport[id].PA_CL_VPORT_XOFFSET.f32All != tx || + r700->viewport[id].PA_CL_VPORT_YOFFSET.f32All != ty) { + /* Note: this should also modify whatever data the context reset + * code uses... + */ + R600_STATECHANGE(context, vpt); + r700->viewport[id].PA_CL_VPORT_XOFFSET.f32All = tx; + r700->viewport[id].PA_CL_VPORT_YOFFSET.f32All = ty; + } radeonUpdateScissor(ctx); } @@ -161,6 +168,10 @@ void r700UpdateViewportOffset(GLcontext * ctx) //------------------ void r700UpdateDrawBuffer(GLcontext * ctx) /* TODO */ //--------------------- { context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); + + R600_STATECHANGE(context, cb_target); + R600_STATECHANGE(context, db_target); r700SetRenderTarget(context, 0); r700SetDepthTarget(context); @@ -233,6 +244,9 @@ static void r700InvalidateState(GLcontext * ctx, GLuint new_state) //----------- r700UpdateStateParameters(ctx, new_state); + R600_STATECHANGE(context, cl); + R600_STATECHANGE(context, spi); + if(GL_TRUE == r700->bEnablePerspective) { /* Do scale XY and Z by 1/W0 for perspective correction on pos. For orthogonal case, set both to one. */ @@ -256,14 +270,15 @@ static void r700InvalidateState(GLcontext * ctx, GLuint new_state) //----------- SETbit(r700->SPI_PS_IN_CONTROL_0.u32All, LINEAR_GRADIENT_ENA_bit); } - context->radeon.NewGLState |= new_state; + context->radeon.NewGLState |= new_state; } static void r700SetDepthState(GLcontext * ctx) { context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); - R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); + R600_STATECHANGE(context, db); if (ctx->Depth.Test) { @@ -331,6 +346,8 @@ static void r700SetAlphaState(GLcontext * ctx) uint32_t alpha_func = REF_ALWAYS; GLboolean really_enabled = ctx->Color.AlphaEnabled; + R600_STATECHANGE(context, sx); + switch (ctx->Color.AlphaFunc) { case GL_NEVER: alpha_func = REF_NEVER; @@ -383,6 +400,8 @@ static void r700BlendColor(GLcontext * ctx, const GLfloat cf[4]) //------------- context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); + R600_STATECHANGE(context, cb); + r700->CB_BLEND_RED.f32All = cf[0]; r700->CB_BLEND_GREEN.f32All = cf[1]; r700->CB_BLEND_BLUE.f32All = cf[2]; @@ -451,6 +470,8 @@ static void r700SetBlendState(GLcontext * ctx) int id = 0; uint32_t blend_reg = 0, eqn, eqnA; + R600_STATECHANGE(context, cb); + if (RGBA_LOGICOP_ENABLED(ctx) || !ctx->Color.BlendEnabled) { SETfield(blend_reg, BLEND_ONE, COLOR_SRCBLEND_shift, COLOR_SRCBLEND_mask); @@ -637,8 +658,11 @@ static GLuint translate_logicop(GLenum logicop) */ static void r700SetLogicOpState(GLcontext *ctx) { + context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&R700_CONTEXT(ctx)->hw); + R600_STATECHANGE(context, cb); + if (RGBA_LOGICOP_ENABLED(ctx)) SETfield(r700->CB_COLOR_CONTROL.u32All, translate_logicop(ctx->Color.LogicOp), ROP3_shift, ROP3_mask); @@ -658,7 +682,10 @@ static void r700LogicOpcode(GLcontext *ctx, GLenum logicop) static void r700UpdateCulling(GLcontext * ctx) { - R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&R700_CONTEXT(ctx)->hw); + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&R700_CONTEXT(ctx)->hw); + + R600_STATECHANGE(context, su); CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, FACE_bit); CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_FRONT_bit); @@ -703,7 +730,11 @@ static void r700UpdateCulling(GLcontext * ctx) static void r700UpdateLineStipple(GLcontext * ctx) { - R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&R700_CONTEXT(ctx)->hw); + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&R700_CONTEXT(ctx)->hw); + + R600_STATECHANGE(context, sc); + if (ctx->Line.StippleFlag) { SETbit(r700->PA_SC_MODE_CNTL.u32All, LINE_STIPPLE_ENABLE_bit); @@ -778,14 +809,17 @@ static void r700Enable(GLcontext * ctx, GLenum cap, GLboolean state) //--------- static void r700ColorMask(GLcontext * ctx, GLboolean r, GLboolean g, GLboolean b, GLboolean a) //------------------ { + context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&R700_CONTEXT(ctx)->hw); unsigned int mask = ((r ? 1 : 0) | (g ? 2 : 0) | (b ? 4 : 0) | (a ? 8 : 0)); - if (mask != r700->CB_SHADER_MASK.u32All) + if (mask != r700->CB_SHADER_MASK.u32All) { + R600_STATECHANGE(context, cb); SETfield(r700->CB_SHADER_MASK.u32All, mask, OUTPUT0_ENABLE_shift, OUTPUT0_ENABLE_mask); + } } /** @@ -841,6 +875,8 @@ static void r700ShadeModel(GLcontext * ctx, GLenum mode) //-------------------- context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); + R600_STATECHANGE(context, spi); + /* also need to set/clear FLAT_SHADE bit per param in SPI_PS_INPUT_CNTL_[0-31] */ switch (mode) { case GL_FLAT: @@ -862,6 +898,8 @@ static void r700PointSize(GLcontext * ctx, GLfloat size) context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); + R600_STATECHANGE(context, su); + /* We need to clamp to user defined range here, because * the HW clamping happens only for per vertex point size. */ size = CLAMP(size, ctx->Point.MinSize, ctx->Point.MaxSize); @@ -882,6 +920,8 @@ static void r700PointParameter(GLcontext * ctx, GLenum pname, const GLfloat * pa context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); + R600_STATECHANGE(context, su); + /* format is 12.4 fixed point */ switch (pname) { case GL_POINT_SIZE_MIN: @@ -966,6 +1006,7 @@ static void r700SetStencilState(GLcontext * ctx, GLboolean state) } if (hw_stencil) { + R600_STATECHANGE(context, db); if (state) SETbit(r700->DB_DEPTH_CONTROL.u32All, STENCIL_ENABLE_bit); else @@ -983,6 +1024,8 @@ static void r700StencilFuncSeparate(GLcontext * ctx, GLenum face, //fixme //r300CatchStencilFallback(ctx); + R600_STATECHANGE(context, db); + //front SETfield(r700->DB_STENCILREFMASK.u32All, ctx->Stencil.Ref[0], STENCILREF_shift, STENCILREF_mask); @@ -1012,6 +1055,8 @@ static void r700StencilMaskSeparate(GLcontext * ctx, GLenum face, GLuint mask) / //fixme //r300CatchStencilFallback(ctx); + R600_STATECHANGE(context, db); + // front SETfield(r700->DB_STENCILREFMASK.u32All, ctx->Stencil.WriteMask[0], STENCILWRITEMASK_shift, STENCILWRITEMASK_mask); @@ -1032,6 +1077,8 @@ static void r700StencilOpSeparate(GLcontext * ctx, GLenum face, //fixme //r300CatchStencilFallback(ctx); + R600_STATECHANGE(context, db); + SETfield(r700->DB_DEPTH_CONTROL.u32All, translate_stencil_op(ctx->Stencil.FailFunc[0]), STENCILFAIL_shift, STENCILFAIL_mask); SETfield(r700->DB_DEPTH_CONTROL.u32All, translate_stencil_op(ctx->Stencil.ZFailFunc[0]), @@ -1074,7 +1121,7 @@ static void r700UpdateWindow(GLcontext * ctx, int id) //-------------------- GLfloat sz = v[MAT_SZ] * depthScale; GLfloat tz = v[MAT_TZ] * depthScale; - /* TODO : Need DMA flush as well. */ + R600_STATECHANGE(context, vpt); r700->viewport[id].PA_CL_VPORT_XSCALE.f32All = sx; r700->viewport[id].PA_CL_VPORT_XOFFSET.f32All = tx; @@ -1112,10 +1159,13 @@ static void r700LineWidth(GLcontext * ctx, GLfloat widthf) //--------------- context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); uint32_t lineWidth = (uint32_t)((widthf * 0.5) * (1 << 4)); + + R600_STATECHANGE(context, su); + if (lineWidth > 0xFFFF) - lineWidth = 0xFFFF; + lineWidth = 0xFFFF; SETfield(r700->PA_SU_LINE_CNTL.u32All,(uint16_t)lineWidth, - PA_SU_LINE_CNTL__WIDTH_shift, PA_SU_LINE_CNTL__WIDTH_mask); + PA_SU_LINE_CNTL__WIDTH_shift, PA_SU_LINE_CNTL__WIDTH_mask); } static void r700LineStipple(GLcontext *ctx, GLint factor, GLushort pattern) @@ -1123,6 +1173,8 @@ static void r700LineStipple(GLcontext *ctx, GLint factor, GLushort pattern) context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); + R600_STATECHANGE(context, sc); + SETfield(r700->PA_SC_LINE_STIPPLE.u32All, pattern, LINE_PATTERN_shift, LINE_PATTERN_mask); SETfield(r700->PA_SC_LINE_STIPPLE.u32All, (factor-1), REPEAT_COUNT_shift, REPEAT_COUNT_mask); SETfield(r700->PA_SC_LINE_STIPPLE.u32All, 1, AUTO_RESET_CNTL_shift, AUTO_RESET_CNTL_mask); @@ -1133,6 +1185,8 @@ static void r700SetPolygonOffsetState(GLcontext * ctx, GLboolean state) context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); + R600_STATECHANGE(context, su); + if (state) { SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, POLY_OFFSET_FRONT_ENABLE_bit); SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, POLY_OFFSET_BACK_ENABLE_bit); @@ -1161,6 +1215,8 @@ static void r700PolygonOffset(GLcontext * ctx, GLfloat factor, GLfloat units) // factor *= 12.0; + R600_STATECHANGE(context, su); + r700->PA_SU_POLY_OFFSET_FRONT_SCALE.f32All = factor; r700->PA_SU_POLY_OFFSET_FRONT_OFFSET.f32All = constant; r700->PA_SU_POLY_OFFSET_BACK_SCALE.f32All = factor; @@ -1172,6 +1228,8 @@ static void r700UpdatePolygonMode(GLcontext * ctx) context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); + R600_STATECHANGE(context, su); + SETfield(r700->PA_SU_SC_MODE_CNTL.u32All, X_DISABLE_POLY_MODE, POLY_MODE_shift, POLY_MODE_mask); /* Only do something if a polygon mode is wanted, default is GL_FILL */ @@ -1247,6 +1305,8 @@ static void r700ClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq ) p = (GLint) plane - (GLint) GL_CLIP_PLANE0; ip = (GLint *)ctx->Transform._ClipUserPlane[p]; + R600_STATECHANGE(context, ucp); + r700->ucp[p].PA_CL_UCP_0_X.u32All = ip[0]; r700->ucp[p].PA_CL_UCP_0_Y.u32All = ip[1]; r700->ucp[p].PA_CL_UCP_0_Z.u32All = ip[2]; @@ -1260,6 +1320,9 @@ static void r700SetClipPlaneState(GLcontext * ctx, GLenum cap, GLboolean state) GLuint p; p = cap - GL_CLIP_PLANE0; + + R600_STATECHANGE(context, cl); + if (state) { r700->PA_CL_CLIP_CNTL.u32All |= (UCP_ENA_0_bit << p); r700->ucp[p].enabled = GL_TRUE; @@ -1293,6 +1356,8 @@ void r700SetScissor(context_t *context) //--------------- y2 = rrb->dPriv->y + rrb->dPriv->h; } + R600_STATECHANGE(context, sc); + /* window */ SETbit(r700->PA_SC_WINDOW_SCISSOR_TL.u32All, WINDOW_OFFSET_DISABLE_bit); SETfield(r700->PA_SC_WINDOW_SCISSOR_TL.u32All, x1, @@ -1361,6 +1426,9 @@ static void r700SetRenderTarget(context_t *context, int id) return; } + R600_STATECHANGE(context, cb_target); + R600_STATECHANGE(context, cb); + /* screen/window/view */ SETfield(r700->CB_TARGET_MASK.u32All, 0xF, (4 * id), TARGET0_ENABLE_mask); @@ -1407,6 +1475,8 @@ static void r700SetDepthTarget(context_t *context) if (!rrb) return; + R600_STATECHANGE(context, db_target); + /* depth buf */ r700->DB_DEPTH_SIZE.u32All = 0; r700->DB_DEPTH_BASE.u32All = 0; @@ -1467,6 +1537,8 @@ static void r700InitSQConfig(GLcontext * ctx) int num_gs_stack_entries; int num_es_stack_entries; + R600_STATECHANGE(context, sq); + // SQ ps_prio = 0; vs_prio = 1; diff --git a/src/mesa/drivers/dri/r600/radeon_queryobj.c b/src/mesa/drivers/dri/r600/radeon_queryobj.c new file mode 120000 index 0000000000..1d6ebc1c48 --- /dev/null +++ b/src/mesa/drivers/dri/r600/radeon_queryobj.c @@ -0,0 +1 @@ +../radeon/radeon_queryobj.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r600/radeon_queryobj.h b/src/mesa/drivers/dri/r600/radeon_queryobj.h new file mode 120000 index 0000000000..8f6f842b0a --- /dev/null +++ b/src/mesa/drivers/dri/r600/radeon_queryobj.h @@ -0,0 +1 @@ +../radeon/radeon_queryobj.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/radeon/Makefile b/src/mesa/drivers/dri/radeon/Makefile index b59ad68f44..6bf67d2ea5 100644 --- a/src/mesa/drivers/dri/radeon/Makefile +++ b/src/mesa/drivers/dri/radeon/Makefile @@ -24,7 +24,8 @@ RADEON_COMMON_SOURCES = \ radeon_cs_legacy.c \ radeon_mipmap_tree.c \ radeon_span.c \ - radeon_fbo.c + radeon_fbo.c \ + radeon_queryobj.c DRIVER_SOURCES = \ radeon_context.c \ diff --git a/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c index 5575da6971..b1cc155f71 100644 --- a/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c +++ b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c @@ -235,8 +235,9 @@ static int legacy_wait_pending(struct radeon_bo *bo) return 0; } -static void legacy_track_pending(struct bo_manager_legacy *boml, int debug) +void legacy_track_pending(struct radeon_bo_manager *bom, int debug) { + struct bo_manager_legacy *boml = (struct bo_manager_legacy*) bom; struct bo_legacy *bo_legacy; struct bo_legacy *next; @@ -244,8 +245,8 @@ static void legacy_track_pending(struct bo_manager_legacy *boml, int debug) bo_legacy = boml->pending_bos.pnext; while (bo_legacy) { if (debug) - fprintf(stderr,"pending %p %d %d %d\n", bo_legacy, bo_legacy->base.size, - boml->current_age, bo_legacy->pending); + fprintf(stderr,"pending %p %d %d %d\n", bo_legacy, bo_legacy->base.size, + boml->current_age, bo_legacy->pending); next = bo_legacy->pnext; if (legacy_is_pending(&(bo_legacy->base))) { } @@ -444,7 +445,7 @@ static struct radeon_bo *bo_open(struct radeon_bo_manager *bom, if (bo_legacy->base.domains & RADEON_GEM_DOMAIN_GTT) { retry: - legacy_track_pending(boml, 0); + legacy_track_pending(&boml->base, 0); /* dma buffers */ r = bo_dma_alloc(&(bo_legacy->base)); @@ -580,7 +581,7 @@ static int bo_vram_validate(struct radeon_bo *bo, if (r) { pending_retry = 0; while(boml->cpendings && pending_retry++ < 10000) { - legacy_track_pending(boml, 0); + legacy_track_pending(&boml->base, 0); retry_count++; if (retry_count > 2) { free(bo_legacy->tobj); @@ -706,7 +707,7 @@ int radeon_bo_legacy_validate(struct radeon_bo *bo, r = bo_vram_validate(bo, soffset, eoffset); if (r) { - legacy_track_pending(boml, 0); + legacy_track_pending(&boml->base, 0); legacy_kick_all_buffers(boml); retries++; if (retries == 2) { diff --git a/src/mesa/drivers/dri/radeon/radeon_bo_legacy.h b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.h index 455adebc09..2cf15dfaff 100644 --- a/src/mesa/drivers/dri/radeon/radeon_bo_legacy.h +++ b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.h @@ -45,5 +45,6 @@ unsigned radeon_bo_legacy_relocs_size(struct radeon_bo *bo); struct radeon_bo *radeon_legacy_bo_alloc_fake(struct radeon_bo_manager *bom, int size, uint32_t offset); +void legacy_track_pending(struct radeon_bo_manager *bom, int debug); #endif diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c index 0614c89459..b5b4fed8fa 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common.c +++ b/src/mesa/drivers/dri/radeon/radeon_common.c @@ -83,6 +83,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_lock.h" #include "radeon_drm.h" #include "radeon_mipmap_tree.h" +#include "radeon_queryobj.h" #define DEBUG_CMDBUF 0 @@ -1042,7 +1043,7 @@ void radeonFlush(GLcontext *ctx) we have no DMA buffer allocated. then no point flushing anything at all. */ - if (!radeon->dma.flush && !radeon->cmdbuf.cs->cdw && !radeon->dma.current) + if (!radeon->dma.flush && !radeon->cmdbuf.cs->cdw && is_empty_list(&radeon->dma.reserved)) return; if (radeon->dma.flush) @@ -1072,6 +1073,9 @@ void radeonFlush(GLcontext *ctx) } } } + + make_empty_list(&radeon->query.not_flushed_head); + } /* Make sure all commands have been sent to the hardware and have @@ -1128,6 +1132,8 @@ int rcommonFlushCmdBufLocked(radeonContextPtr rmesa, const char *caller) __FUNCTION__, caller, rmesa->numClipRects); } + radeonEmitQueryEnd(rmesa->glCtx); + if (rmesa->cmdbuf.cs->cdw) { ret = radeon_cs_emit(rmesa->cmdbuf.cs); rmesa->hw.all_dirty = GL_TRUE; @@ -1146,7 +1152,7 @@ int rcommonFlushCmdBuf(radeonContextPtr rmesa, const char *caller) { int ret; - radeonReleaseDmaRegion(rmesa); + radeonReleaseDmaRegions(rmesa); LOCK_HARDWARE(rmesa); ret = rcommonFlushCmdBufLocked(rmesa, caller); diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c index c0abcbfa21..ad4584a2bd 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c @@ -263,6 +263,9 @@ GLboolean radeonInitContext(radeonContextPtr radeon, radeon->texture_compressed_row_align = 64; } + make_empty_list(&radeon->query.not_flushed_head); + radeon_init_dma(radeon); + return GL_TRUE; } @@ -295,10 +298,6 @@ void radeonDestroyContext(__DRIcontextPrivate *driContextPriv ) GET_CURRENT_CONTEXT(ctx); radeonContextPtr radeon = (radeonContextPtr) driContextPriv->driverPrivate; radeonContextPtr current = ctx ? RADEON_CONTEXT(ctx) : NULL; -#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600) /* +r6/r7 */ - __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv; - radeonScreenPtr screen = (radeonScreenPtr) (sPriv->private); -#endif if (radeon == current) { radeon_firevertices(radeon); @@ -307,10 +306,11 @@ void radeonDestroyContext(__DRIcontextPrivate *driContextPriv ) assert(radeon); if (radeon) { - if (radeon->dma.current) { + if (!is_empty_list(&radeon->dma.reserved)) { rcommonFlushCmdBuf( radeon, __FUNCTION__ ); } + radeonFreeDmaRegions(radeon); radeonReleaseArrays(radeon->glCtx, ~0); meta_destroy_metaops(&radeon->meta); if (radeon->vtbl.free_context) @@ -334,9 +334,6 @@ void radeonDestroyContext(__DRIcontextPrivate *driContextPriv ) rcommonDestroyCmdBuf(radeon); -#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600) /* +r6/r7 */ - if (!IS_R600_CLASS(screen)) -#endif radeon_destroy_atom_list(radeon); if (radeon->state.scissor.pClipRects) { diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.h b/src/mesa/drivers/dri/radeon/radeon_common_context.h index ee46c1f81a..9e9c35650d 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.h +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.h @@ -18,6 +18,22 @@ struct radeon_context; #include "radeon_bocs_wrapper.h" +/* From http://gcc. gnu.org/onlinedocs/gcc-3.2.3/gcc/Variadic-Macros.html . + I suppose we could inline this and use macro to fetch out __LINE__ and stuff in case we run into trouble + with other compilers ... GLUE! +*/ +#define WARN_ONCE(a, ...) { \ + static int warn##__LINE__=1; \ + if(warn##__LINE__){ \ + fprintf(stderr, "*********************************WARN_ONCE*********************************\n"); \ + fprintf(stderr, "File %s function %s line %d\n", \ + __FILE__, __FUNCTION__, __LINE__); \ + fprintf(stderr, a, ## __VA_ARGS__);\ + fprintf(stderr, "***************************************************************************\n"); \ + warn##__LINE__=0;\ + } \ + } + /* This union is used to avoid warnings/miscompilation with float to uint32_t casts due to strict-aliasing */ typedef union { GLfloat f; uint32_t ui32; } float_ui32_type; @@ -163,6 +179,7 @@ struct radeon_hw_state { /* Head of the linked list of state atoms. */ struct radeon_state_atom atomlist; int max_state_size; /* Number of bytes necessary for a full state emit. */ + int max_post_flush_size; /* Number of bytes necessary for post flushing emits */ GLboolean is_dirty, all_dirty; }; @@ -254,6 +271,17 @@ static INLINE radeonTexObj* radeon_tex_obj(struct gl_texture_object *texObj) return (radeonTexObj*)texObj; } +/* occlusion query */ +struct radeon_query_object { + struct gl_query_object Base; + struct radeon_bo *bo; + int curr_offset; + GLboolean emitted_begin; + + /* Double linked list of not flushed query objects */ + struct radeon_query_object *prev, *next; +}; + /* Need refcounting on dma buffers: */ struct radeon_dma_buffer { @@ -269,14 +297,25 @@ struct radeon_aos { int count; /** Number of vertices */ }; +#define DMA_BO_FREE_TIME 100 + +struct radeon_dma_bo { + struct radeon_dma_bo *next, *prev; + struct radeon_bo *bo; + int expire_counter; +}; + struct radeon_dma { /* Active dma region. Allocations for vertices and retained * regions come from here. Also used for emitting random vertices, * these may be flushed by calling flush_current(); */ - struct radeon_bo *current; /** Buffer that DMA memory is allocated from */ - int current_used; /** Number of bytes allocated and forgotten about */ - int current_vertexptr; /** End of active vertex region */ + struct radeon_dma_bo free; + struct radeon_dma_bo wait; + struct radeon_dma_bo reserved; + size_t current_used; /** Number of bytes allocated and forgotten about */ + size_t current_vertexptr; /** End of active vertex region */ + size_t minimum_size; /** * If current_vertexptr != current_used then flush must be non-zero. @@ -284,12 +323,6 @@ struct radeon_dma { * performed. */ void (*flush) (GLcontext *); - - /* Number of "in-flight" DMA buffers, i.e. the number of buffers - * for which a DISCARD command is currently queued in the command buffer -. - */ - GLuint nr_released_bufs; }; /* radeon_swtcl.c @@ -500,6 +533,12 @@ struct radeon_context { struct dri_metaops meta; struct { + struct radeon_query_object *current; + struct radeon_query_object not_flushed_head; + struct radeon_state_atom queryobj; + } query; + + struct { void (*get_lock)(radeonContextPtr radeon); void (*update_viewport_offset)(GLcontext *ctx); void (*emit_cs_header)(struct radeon_cs *cs, radeonContextPtr rmesa); @@ -508,6 +547,7 @@ struct radeon_context { void (*pre_emit_state)(radeonContextPtr rmesa); void (*fallback)(GLcontext *ctx, GLuint bit, GLboolean mode); void (*free_context)(GLcontext *ctx); + void (*emit_query_finish)(radeonContextPtr radeon); } vtbl; }; @@ -523,7 +563,6 @@ static inline __DRIdrawablePrivate* radeon_get_readable(radeonContextPtr radeon) return radeon->dri.context->driReadablePriv; } - /** * This function takes a float and packs it into a uint32_t */ diff --git a/src/mesa/drivers/dri/radeon/radeon_dma.c b/src/mesa/drivers/dri/radeon/radeon_dma.c index 5e755c51ed..7e6b74add8 100644 --- a/src/mesa/drivers/dri/radeon/radeon_dma.c +++ b/src/mesa/drivers/dri/radeon/radeon_dma.c @@ -31,6 +31,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. **************************************************************************/ #include "radeon_common.h" +#include "main/simple_list.h" #if defined(USE_X86_ASM) #define COPY_DWORDS( dst, src, nr ) \ @@ -161,10 +162,20 @@ void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos, } } -void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size) +void radeon_init_dma(radeonContextPtr rmesa) { + make_empty_list(&rmesa->dma.free); + make_empty_list(&rmesa->dma.wait); + make_empty_list(&rmesa->dma.reserved); + rmesa->dma.minimum_size = MAX_DMA_BUF_SZ; +} - size = MAX2(size, MAX_DMA_BUF_SZ); +void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size) +{ + /* we set minimum sizes to at least requested size + aligned to next 16 bytes. */ + if (size > rmesa->dma.minimum_size) + rmesa->dma.minimum_size = (size + 15) & (~15); if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA)) fprintf(stderr, "%s\n", __FUNCTION__); @@ -173,43 +184,49 @@ void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size) rmesa->dma.flush(rmesa->glCtx); } - if (rmesa->dma.nr_released_bufs > 4) { - rcommonFlushCmdBuf(rmesa, __FUNCTION__); - rmesa->dma.nr_released_bufs = 0; - } + /* unmap old reserved bo */ + if (!is_empty_list(&rmesa->dma.reserved)) + radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo); - if (rmesa->dma.current) { - radeon_bo_unmap(rmesa->dma.current); - radeon_bo_unref(rmesa->dma.current); - rmesa->dma.current = 0; - } + if (is_empty_list(&rmesa->dma.free) + || last_elem(&rmesa->dma.free)->bo->size < size) { + struct radeon_dma_bo *dma_bo = CALLOC(sizeof(struct radeon_dma_bo)); + assert(dma_bo); again_alloc: - rmesa->dma.current = radeon_bo_open(rmesa->radeonScreen->bom, - 0, size, 4, RADEON_GEM_DOMAIN_GTT, - 0); + dma_bo->bo = radeon_bo_open(rmesa->radeonScreen->bom, + 0, rmesa->dma.minimum_size, 4, + RADEON_GEM_DOMAIN_GTT, 0); - if (!rmesa->dma.current) { - rcommonFlushCmdBuf(rmesa, __FUNCTION__); - rmesa->dma.nr_released_bufs = 0; - goto again_alloc; + if (!dma_bo->bo) { + rcommonFlushCmdBuf(rmesa, __FUNCTION__); + goto again_alloc; + } + insert_at_head(&rmesa->dma.reserved, dma_bo); + } else { + /* We push and pop buffers from end of list so we can keep + counter on unused buffers for later freeing them from + begin of list */ + struct radeon_dma_bo *dma_bo = last_elem(&rmesa->dma.free); + assert(dma_bo->bo->cref == 1); + remove_from_list(dma_bo); + insert_at_head(&rmesa->dma.reserved, dma_bo); } rmesa->dma.current_used = 0; rmesa->dma.current_vertexptr = 0; if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs, - rmesa->dma.current, + first_elem(&rmesa->dma.reserved)->bo, RADEON_GEM_DOMAIN_GTT, 0)) fprintf(stderr,"failure to revalidate BOs - badness\n"); - if (!rmesa->dma.current) { + if (is_empty_list(&rmesa->dma.reserved)) { /* Cmd buff have been flushed in radeon_revalidate_bos */ - rmesa->dma.nr_released_bufs = 0; goto again_alloc; } - radeon_bo_map(rmesa->dma.current, 1); + radeon_bo_map(first_elem(&rmesa->dma.reserved)->bo, 1); } /* Allocates a region from rmesa->dma.current. If there isn't enough @@ -230,30 +247,142 @@ void radeonAllocDmaRegion(radeonContextPtr rmesa, alignment--; rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment; - if (!rmesa->dma.current || rmesa->dma.current_used + bytes > rmesa->dma.current->size) - radeonRefillCurrentDmaRegion(rmesa, (bytes + 15) & ~15); + if (is_empty_list(&rmesa->dma.reserved) + || rmesa->dma.current_used + bytes > first_elem(&rmesa->dma.reserved)->bo->size) + radeonRefillCurrentDmaRegion(rmesa, bytes); *poffset = rmesa->dma.current_used; - *pbo = rmesa->dma.current; + *pbo = first_elem(&rmesa->dma.reserved)->bo; radeon_bo_ref(*pbo); /* Always align to at least 16 bytes */ rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15; rmesa->dma.current_vertexptr = rmesa->dma.current_used; - assert(rmesa->dma.current_used <= rmesa->dma.current->size); + assert(rmesa->dma.current_used <= first_elem(&rmesa->dma.reserved)->bo->size); } -void radeonReleaseDmaRegion(radeonContextPtr rmesa) +void radeonFreeDmaRegions(radeonContextPtr rmesa) { + struct radeon_dma_bo *dma_bo; + struct radeon_dma_bo *temp; + if (RADEON_DEBUG & DEBUG_DMA) + fprintf(stderr, "%s\n", __FUNCTION__); + + foreach_s(dma_bo, temp, &rmesa->dma.free) { + remove_from_list(dma_bo); + radeon_bo_unref(dma_bo->bo); + FREE(dma_bo); + } + + foreach_s(dma_bo, temp, &rmesa->dma.wait) { + remove_from_list(dma_bo); + radeon_bo_unref(dma_bo->bo); + FREE(dma_bo); + } + + foreach_s(dma_bo, temp, &rmesa->dma.reserved) { + remove_from_list(dma_bo); + radeon_bo_unmap(dma_bo->bo); + radeon_bo_unref(dma_bo->bo); + FREE(dma_bo); + } +} + +void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes) +{ + if (is_empty_list(&rmesa->dma.reserved)) + return; + if (RADEON_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "%s %p\n", __FUNCTION__, rmesa->dma.current); - if (rmesa->dma.current) { - rmesa->dma.nr_released_bufs++; - radeon_bo_unmap(rmesa->dma.current); - radeon_bo_unref(rmesa->dma.current); + fprintf(stderr, "%s %d\n", __FUNCTION__, return_bytes); + rmesa->dma.current_used -= return_bytes; + rmesa->dma.current_vertexptr = rmesa->dma.current_used; +} + +static int radeon_bo_is_idle(struct radeon_bo* bo) +{ + return bo->cref == 1; +} + +void radeonReleaseDmaRegions(radeonContextPtr rmesa) +{ + struct radeon_dma_bo *dma_bo; + struct radeon_dma_bo *temp; + const int expire_at = ++rmesa->dma.free.expire_counter + DMA_BO_FREE_TIME; + const int time = rmesa->dma.free.expire_counter; + + if (RADEON_DEBUG & DEBUG_DMA) { + size_t free = 0, + wait = 0, + reserved = 0; + foreach(dma_bo, &rmesa->dma.free) + ++free; + + foreach(dma_bo, &rmesa->dma.wait) + ++wait; + + foreach(dma_bo, &rmesa->dma.reserved) + ++reserved; + + fprintf(stderr, "%s: free %u, wait %u, reserved %u, minimum_size: %u\n", + __FUNCTION__, free, wait, reserved, rmesa->dma.minimum_size); + } + + if (!rmesa->radeonScreen->driScreen->dri2.enabled) { + /* request updated cs processing information from kernel */ + legacy_track_pending(rmesa->radeonScreen->bom, 0); + } + /* move waiting bos to free list. + wait list provides gpu time to handle data before reuse */ + foreach_s(dma_bo, temp, &rmesa->dma.wait) { + if (dma_bo->expire_counter == time) { + WARN_ONCE("Leaking dma buffer object!\n"); + radeon_bo_unref(dma_bo->bo); + remove_from_list(dma_bo); + FREE(dma_bo); + continue; + } + /* free objects that are too small to be used because of large request */ + if (dma_bo->bo->size < rmesa->dma.minimum_size) { + radeon_bo_unref(dma_bo->bo); + remove_from_list(dma_bo); + FREE(dma_bo); + continue; + } + if (!radeon_bo_is_idle(dma_bo->bo)) + continue; + remove_from_list(dma_bo); + dma_bo->expire_counter = expire_at; + insert_at_tail(&rmesa->dma.free, dma_bo); + } + + /* unmap the last dma region */ + if (!is_empty_list(&rmesa->dma.reserved)) + radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo); + /* move reserved to wait list */ + foreach_s(dma_bo, temp, &rmesa->dma.reserved) { + /* free objects that are too small to be used because of large request */ + if (dma_bo->bo->size < rmesa->dma.minimum_size) { + radeon_bo_unref(dma_bo->bo); + remove_from_list(dma_bo); + FREE(dma_bo); + continue; + } + remove_from_list(dma_bo); + dma_bo->expire_counter = expire_at; + insert_at_tail(&rmesa->dma.wait, dma_bo); + } + + /* free bos that have been unused for some time */ + foreach_s(dma_bo, temp, &rmesa->dma.free) { + if (dma_bo->expire_counter != time) + break; + remove_from_list(dma_bo); + radeon_bo_unref(dma_bo->bo); + FREE(dma_bo); } - rmesa->dma.current = NULL; + } @@ -266,10 +395,10 @@ void rcommon_flush_last_swtcl_prim( GLcontext *ctx ) if (RADEON_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "%s %p\n", __FUNCTION__, dma->current); + fprintf(stderr, "%s\n", __FUNCTION__); dma->flush = NULL; - if (dma->current) { + if (!is_empty_list(&dma->reserved)) { GLuint current_offset = dma->current_used; assert (dma->current_used + @@ -292,7 +421,10 @@ rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize ) GLuint bytes = vsize * nverts; void *head; restart: - if (!rmesa->dma.current || rmesa->dma.current_vertexptr + bytes > rmesa->dma.current->size) { + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "%s\n", __FUNCTION__); + if (is_empty_list(&rmesa->dma.reserved) + || rmesa->dma.current_vertexptr + bytes > first_elem(&rmesa->dma.reserved)->bo->size) { radeonRefillCurrentDmaRegion(rmesa, bytes); } @@ -302,7 +434,7 @@ restart: rmesa->hw.max_state_size + (20*sizeof(int)), __FUNCTION__); /* if cmdbuf flushed DMA restart */ - if (!rmesa->dma.current) + if (is_empty_list(&rmesa->dma.reserved)) goto restart; rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; rmesa->dma.flush = rcommon_flush_last_swtcl_prim; @@ -314,7 +446,7 @@ restart: rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == rmesa->dma.current_vertexptr ); - head = (rmesa->dma.current->ptr + rmesa->dma.current_vertexptr); + head = (first_elem(&rmesa->dma.reserved)->bo->ptr + rmesa->dma.current_vertexptr); rmesa->dma.current_vertexptr += bytes; rmesa->swtcl.numverts += nverts; return head; @@ -324,18 +456,17 @@ void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs ) { radeonContextPtr radeon = RADEON_CONTEXT( ctx ); int i; + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "%s\n", __FUNCTION__); if (radeon->dma.flush) { radeon->dma.flush(radeon->glCtx); } - if (radeon->tcl.elt_dma_bo) { - radeon_bo_unref(radeon->tcl.elt_dma_bo); - radeon->tcl.elt_dma_bo = NULL; - } for (i = 0; i < radeon->tcl.aos_count; i++) { if (radeon->tcl.aos[i].bo) { radeon_bo_unref(radeon->tcl.aos[i].bo); radeon->tcl.aos[i].bo = NULL; + } } } diff --git a/src/mesa/drivers/dri/radeon/radeon_dma.h b/src/mesa/drivers/dri/radeon/radeon_dma.h index 55509ed00c..74e653fd18 100644 --- a/src/mesa/drivers/dri/radeon/radeon_dma.h +++ b/src/mesa/drivers/dri/radeon/radeon_dma.h @@ -41,14 +41,18 @@ void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count); void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos, const GLvoid * data, int size, int stride, int count); +void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes); void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size); +void radeon_init_dma(radeonContextPtr rmesa); +void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes); void radeonAllocDmaRegion(radeonContextPtr rmesa, struct radeon_bo **pbo, int *poffset, int bytes, int alignment); -void radeonReleaseDmaRegion(radeonContextPtr rmesa); +void radeonReleaseDmaRegions(radeonContextPtr rmesa); void rcommon_flush_last_swtcl_prim(GLcontext *ctx); void *rcommonAllocDmaLowVerts(radeonContextPtr rmesa, int nverts, int vsize); +void radeonFreeDmaRegions(radeonContextPtr rmesa); void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs ); #endif diff --git a/src/mesa/drivers/dri/radeon/radeon_queryobj.c b/src/mesa/drivers/dri/radeon/radeon_queryobj.c new file mode 100644 index 0000000000..70251946df --- /dev/null +++ b/src/mesa/drivers/dri/radeon/radeon_queryobj.c @@ -0,0 +1,216 @@ +/* + * Copyright © 2008-2009 Maciej Cencora <m.cencora@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Maciej Cencora <m.cencora@gmail.com> + * + */ +#include "radeon_common.h" +#include "radeon_queryobj.h" + +#include "main/imports.h" +#include "main/simple_list.h" + +#define DDEBUG 0 + +#define PAGE_SIZE 4096 + +static void radeonQueryGetResult(GLcontext *ctx, struct gl_query_object *q) +{ + struct radeon_query_object *query = (struct radeon_query_object *)q; + uint32_t *result; + int i; + + if (DDEBUG) fprintf(stderr, "%s: query id %d, result %d\n", __FUNCTION__, query->Base.Id, (int) query->Base.Result); + + radeon_bo_map(query->bo, GL_FALSE); + + result = query->bo->ptr; + + query->Base.Result = 0; + for (i = 0; i < query->curr_offset/sizeof(uint32_t); ++i) { + query->Base.Result += result[i]; + if (DDEBUG) fprintf(stderr, "result[%d] = %d\n", i, result[i]); + } + + radeon_bo_unmap(query->bo); +} + +static struct gl_query_object * radeonNewQueryObject(GLcontext *ctx, GLuint id) +{ + struct radeon_query_object *query; + + query = _mesa_calloc(sizeof(struct radeon_query_object)); + + query->Base.Id = id; + query->Base.Result = 0; + query->Base.Active = GL_FALSE; + query->Base.Ready = GL_TRUE; + + if (DDEBUG) fprintf(stderr, "%s: query id %d\n", __FUNCTION__, query->Base.Id); + + return &query->Base; +} + +static void radeonDeleteQuery(GLcontext *ctx, struct gl_query_object *q) +{ + struct radeon_query_object *query = (struct radeon_query_object *)q; + + if (DDEBUG) fprintf(stderr, "%s: query id %d\n", __FUNCTION__, q->Id); + + if (query->bo) { + radeon_bo_unref(query->bo); + } + + _mesa_free(query); +} + +static void radeonWaitQuery(GLcontext *ctx, struct gl_query_object *q) +{ + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + struct radeon_query_object *tmp, *query = (struct radeon_query_object *)q; + + /* If the cmdbuf with packets for this query hasn't been flushed yet, do it now */ + { + GLboolean found = GL_FALSE; + foreach(tmp, &radeon->query.not_flushed_head) { + if (tmp == query) { + found = GL_TRUE; + break; + } + } + + if (found) + ctx->Driver.Flush(ctx); + } + + if (DDEBUG) fprintf(stderr, "%s: query id %d, bo %p, offset %d\n", __FUNCTION__, q->Id, query->bo, query->curr_offset); + + radeonQueryGetResult(ctx, q); + + query->Base.Ready = GL_TRUE; +} + + +static void radeonBeginQuery(GLcontext *ctx, struct gl_query_object *q) +{ + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + struct radeon_query_object *query = (struct radeon_query_object *)q; + + if (DDEBUG) fprintf(stderr, "%s: query id %d\n", __FUNCTION__, q->Id); + + assert(radeon->query.current == NULL); + + if (radeon->dma.flush) + radeon->dma.flush(radeon->glCtx); + + if (!query->bo) { + query->bo = radeon_bo_open(radeon->radeonScreen->bom, 0, PAGE_SIZE, PAGE_SIZE, RADEON_GEM_DOMAIN_GTT, 0); + } + query->curr_offset = 0; + + radeon->query.current = query; + + radeon->query.queryobj.dirty = GL_TRUE; + insert_at_tail(&radeon->query.not_flushed_head, query); + +} + +void radeonEmitQueryEnd(GLcontext *ctx) +{ + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + struct radeon_query_object *query = radeon->query.current; + + if (!query) + return; + + if (query->emitted_begin == GL_FALSE) + return; + + if (DDEBUG) fprintf(stderr, "%s: query id %d, bo %p, offset %d\n", __FUNCTION__, query->Base.Id, query->bo, query->curr_offset); + + radeon_cs_space_check_with_bo(radeon->cmdbuf.cs, + query->bo, + 0, RADEON_GEM_DOMAIN_GTT); + + radeon->vtbl.emit_query_finish(radeon); +} + +static void radeonEndQuery(GLcontext *ctx, struct gl_query_object *q) +{ + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + + if (DDEBUG) fprintf(stderr, "%s: query id %d\n", __FUNCTION__, q->Id); + + if (radeon->dma.flush) + radeon->dma.flush(radeon->glCtx); + radeonEmitQueryEnd(ctx); + + radeon->query.current = NULL; +} + +/** + * TODO: + * should check if bo is idle, bo there's no interface to do it + * just wait for result now + */ +static void radeonCheckQuery(GLcontext *ctx, struct gl_query_object *q) +{ + if (DDEBUG) fprintf(stderr, "%s: query id %d\n", __FUNCTION__, q->Id); + + radeonWaitQuery(ctx, q); +} + +void radeonInitQueryObjFunctions(struct dd_function_table *functions) +{ + functions->NewQueryObject = radeonNewQueryObject; + functions->DeleteQuery = radeonDeleteQuery; + functions->BeginQuery = radeonBeginQuery; + functions->EndQuery = radeonEndQuery; + functions->CheckQuery = radeonCheckQuery; + functions->WaitQuery = radeonWaitQuery; +} + +int radeon_check_query_active(GLcontext *ctx, struct radeon_state_atom *atom) +{ + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + struct radeon_query_object *query = radeon->query.current; + + if (!query || query->emitted_begin) + return 0; + return atom->cmd_size; +} + +void radeon_emit_queryobj(GLcontext *ctx, struct radeon_state_atom *atom) +{ + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + BATCH_LOCALS(radeon); + int dwords; + + dwords = (*atom->check) (ctx, atom); + + BEGIN_BATCH_NO_AUTOSTATE(dwords); + OUT_BATCH_TABLE(atom->cmd, dwords); + END_BATCH(); + + radeon->query.current->emitted_begin = GL_TRUE; +} diff --git a/src/mesa/drivers/dri/r300/r300_queryobj.h b/src/mesa/drivers/dri/radeon/radeon_queryobj.h index f301f0b113..19374dc76b 100644 --- a/src/mesa/drivers/dri/r300/r300_queryobj.h +++ b/src/mesa/drivers/dri/radeon/radeon_queryobj.h @@ -26,9 +26,30 @@ */ #include "main/imports.h" -#include "r300_context.h" +#include "main/simple_list.h" +#include "radeon_common_context.h" -extern void r300EmitQueryBegin(GLcontext *ctx); -extern void r300EmitQueryEnd(GLcontext *ctx); +extern void radeonEmitQueryBegin(GLcontext *ctx); +extern void radeonEmitQueryEnd(GLcontext *ctx); + +extern void radeonInitQueryObjFunctions(struct dd_function_table *functions); + +#define RADEON_QUERY_PAGE_SIZE 4096 + +int radeon_check_query_active(GLcontext *ctx, struct radeon_state_atom *atom); +void radeon_emit_queryobj(GLcontext *ctx, struct radeon_state_atom *atom); + +static inline void radeon_init_query_stateobj(radeonContextPtr radeon, int SZ) +{ + radeon->query.queryobj.cmd_size = (SZ); + radeon->query.queryobj.cmd = (uint32_t*)CALLOC((SZ) * sizeof(uint32_t)); + radeon->query.queryobj.name = "queryobj"; + radeon->query.queryobj.idx = 0; + radeon->query.queryobj.check = radeon_check_query_active; + radeon->query.queryobj.dirty = GL_FALSE; + radeon->query.queryobj.emit = radeon_emit_queryobj; + + radeon->hw.max_state_size += (SZ); + insert_at_tail(&radeon->hw.atomlist, &radeon->query.queryobj); +} -extern void r300InitQueryObjFunctions(struct dd_function_table *functions); diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index c8d491621a..e28543d855 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -1797,7 +1797,7 @@ const struct __DriverAPIRec driDriverAPI = { .DestroyContext = r200DestroyContext, #elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R600) .CreateContext = r600CreateContext, - .DestroyContext = r600DestroyContext, + .DestroyContext = radeonDestroyContext, #else .CreateContext = radeonCreateContext, .DestroyContext = radeonDestroyContext, diff --git a/src/mesa/drivers/dri/radeon/radeon_state.c b/src/mesa/drivers/dri/radeon/radeon_state.c index 0d1728b747..56f82bdb0b 100644 --- a/src/mesa/drivers/dri/radeon/radeon_state.c +++ b/src/mesa/drivers/dri/radeon/radeon_state.c @@ -40,6 +40,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/state.h" #include "main/context.h" #include "main/framebuffer.h" +#include "main/simple_list.h" #include "vbo/vbo.h" #include "tnl/tnl.h" @@ -2099,7 +2100,7 @@ static GLboolean r100ValidateBuffers(GLcontext *ctx) RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); } - ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, rmesa->radeon.dma.current, RADEON_GEM_DOMAIN_GTT, 0); + ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, first_elem(&rmesa->radeon.dma.reserved)->bo, RADEON_GEM_DOMAIN_GTT, 0); if (ret) return GL_FALSE; return GL_TRUE; diff --git a/src/mesa/drivers/dri/radeon/radeon_state_init.c b/src/mesa/drivers/dri/radeon/radeon_state_init.c index 57aa7f1ca4..501ea0b66b 100644 --- a/src/mesa/drivers/dri/radeon/radeon_state_init.c +++ b/src/mesa/drivers/dri/radeon/radeon_state_init.c @@ -798,7 +798,7 @@ void radeonInitState( r100ContextPtr rmesa ) rmesa->hw.glt.emit = vec_emit; rmesa->hw.eye.emit = vec_emit; - for (i = 0; i <= 6; i++) + for (i = 0; i < 6; i++) rmesa->hw.mat[i].emit = vec_emit; for (i = 0; i < 8; i++) diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.c b/src/mesa/drivers/dri/radeon/radeon_swtcl.c index e31f045991..58b3be9391 100644 --- a/src/mesa/drivers/dri/radeon/radeon_swtcl.c +++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.c @@ -38,6 +38,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/enums.h" #include "main/imports.h" #include "main/macros.h" +#include "main/simple_list.h" #include "swrast_setup/swrast_setup.h" #include "math/m_translate.h" @@ -291,7 +292,7 @@ void r100_swtcl_flush(GLcontext *ctx, uint32_t current_offset) radeonEmitState(&rmesa->radeon); radeonEmitVertexAOS( rmesa, rmesa->radeon.swtcl.vertex_size, - rmesa->radeon.dma.current, + first_elem(&rmesa->radeon.dma.reserved)->bo, current_offset); diff --git a/src/mesa/main/drawpix.c b/src/mesa/main/drawpix.c index 6d31f32443..67311f71a2 100644 --- a/src/mesa/main/drawpix.c +++ b/src/mesa/main/drawpix.c @@ -152,7 +152,14 @@ _mesa_CopyPixels( GLint srcx, GLint srcy, GLsizei width, GLsizei height, return; } - if (type != GL_COLOR && type != GL_DEPTH && type != GL_STENCIL) { + /* Note: more detailed 'type' checking is done by the + * _mesa_source/dest_buffer_exists() calls below. That's where we + * check if the stencil buffer exists, etc. + */ + if (type != GL_COLOR && + type != GL_DEPTH && + type != GL_STENCIL && + type != GL_DEPTH_STENCIL) { _mesa_error(ctx, GL_INVALID_ENUM, "glCopyPixels(type=%s)", _mesa_lookup_enum_by_nr(type)); return; diff --git a/src/mesa/main/ffvertex_prog.c b/src/mesa/main/ffvertex_prog.c index 80dde4b5aa..8e21a27f89 100644 --- a/src/mesa/main/ffvertex_prog.c +++ b/src/mesa/main/ffvertex_prog.c @@ -1306,7 +1306,9 @@ static void build_fog( struct tnl_program *p ) input = swizzle1(register_input(p, VERT_ATTRIB_FOG), X); } + /* result.fog = {abs(f),0,0,1}; */ emit_op1(p, OPCODE_ABS, fog, WRITEMASK_X, input); + emit_op1(p, OPCODE_MOV, fog, WRITEMASK_YZW, get_identity_param(p)); } diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c index 79f06a3c40..d0c9cea00c 100644 --- a/src/mesa/main/get.c +++ b/src/mesa/main/get.c @@ -276,6 +276,7 @@ _mesa_GetBooleanv( GLenum pname, GLboolean *params ) case GL_CURRENT_TEXTURE_COORDS: { const GLuint texUnit = ctx->Texture.CurrentUnit; + FLUSH_CURRENT(ctx, 0); params[0] = FLOAT_TO_BOOLEAN(ctx->Current.Attrib[VERT_ATTRIB_TEX0 + texUnit][0]); params[1] = FLOAT_TO_BOOLEAN(ctx->Current.Attrib[VERT_ATTRIB_TEX0 + texUnit][1]); params[2] = FLOAT_TO_BOOLEAN(ctx->Current.Attrib[VERT_ATTRIB_TEX0 + texUnit][2]); @@ -2102,6 +2103,7 @@ _mesa_GetFloatv( GLenum pname, GLfloat *params ) case GL_CURRENT_TEXTURE_COORDS: { const GLuint texUnit = ctx->Texture.CurrentUnit; + FLUSH_CURRENT(ctx, 0); params[0] = ctx->Current.Attrib[VERT_ATTRIB_TEX0 + texUnit][0]; params[1] = ctx->Current.Attrib[VERT_ATTRIB_TEX0 + texUnit][1]; params[2] = ctx->Current.Attrib[VERT_ATTRIB_TEX0 + texUnit][2]; @@ -3928,6 +3930,7 @@ _mesa_GetIntegerv( GLenum pname, GLint *params ) case GL_CURRENT_TEXTURE_COORDS: { const GLuint texUnit = ctx->Texture.CurrentUnit; + FLUSH_CURRENT(ctx, 0); params[0] = IROUND(ctx->Current.Attrib[VERT_ATTRIB_TEX0 + texUnit][0]); params[1] = IROUND(ctx->Current.Attrib[VERT_ATTRIB_TEX0 + texUnit][1]); params[2] = IROUND(ctx->Current.Attrib[VERT_ATTRIB_TEX0 + texUnit][2]); diff --git a/src/mesa/main/get_gen.py b/src/mesa/main/get_gen.py index e9c8226d08..97dc785020 100644 --- a/src/mesa/main/get_gen.py +++ b/src/mesa/main/get_gen.py @@ -176,7 +176,8 @@ StateVars = [ "ctx->Current.Attrib[VERT_ATTRIB_TEX0 + texUnit][1]", "ctx->Current.Attrib[VERT_ATTRIB_TEX0 + texUnit][2]", "ctx->Current.Attrib[VERT_ATTRIB_TEX0 + texUnit][3]"], - "const GLuint texUnit = ctx->Texture.CurrentUnit;", None ), + """const GLuint texUnit = ctx->Texture.CurrentUnit; + FLUSH_CURRENT(ctx, 0);""", None ), ( "GL_DEPTH_BIAS", GLfloat, ["ctx->Pixel.DepthBias"], "", None ), ( "GL_DEPTH_BITS", GLint, ["ctx->DrawBuffer->Visual.depthBits"], "", None ), diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c index d58803991a..bbc2830e69 100644 --- a/src/mesa/state_tracker/st_texture.c +++ b/src/mesa/state_tracker/st_texture.c @@ -240,7 +240,7 @@ st_surface_data(struct pipe_context *pipe, struct pipe_screen *screen = pipe->screen; void *map = screen->transfer_map(screen, dst); - pipe_copy_rect(map, + util_copy_rect(map, &dst->block, dst->stride, dstx, dsty, |