diff options
Diffstat (limited to 'src/mesa/drivers/dri/r300')
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_cmdbuf.c | 316 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_cmdbuf.h | 6 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_context.c | 3 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_context.h | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_draw.c | 171 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_ioctl.c | 26 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_render.c | 6 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_state.c | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_vertprog.c | 3 |
9 files changed, 322 insertions, 212 deletions
diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index 6b33f48885..87651716fe 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -74,6 +74,21 @@ static unsigned packet0_count(r300ContextPtr r300, uint32_t *pkt) #define vpu_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count) #define r500fp_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->r500fp.count) +int check_vpu(GLcontext *ctx, struct radeon_state_atom *atom) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + int cnt; + int extra = 1; + cnt = vpu_count(atom->cmd); + + if (r300->radeon.radeonScreen->kernel_mm) { + extra = 5; + } + + return cnt ? (cnt * 4) + extra : 0; +} + + void emit_vpu(GLcontext *ctx, struct radeon_state_atom * atom) { r300ContextPtr r300 = R300_CONTEXT(ctx); @@ -81,39 +96,18 @@ void emit_vpu(GLcontext *ctx, struct radeon_state_atom * atom) drm_r300_cmd_header_t cmd; uint32_t addr, ndw; - if (!r300->radeon.radeonScreen->kernel_mm) { - uint32_t dwords; - dwords = (*atom->check) (ctx, atom); - BEGIN_BATCH_NO_AUTOSTATE(dwords); - OUT_BATCH_TABLE(atom->cmd, dwords); - END_BATCH(); - return; - } - cmd.u = atom->cmd[0]; addr = (cmd.vpu.adrhi << 8) | cmd.vpu.adrlo; - ndw = cmd.vpu.count * 4; - if (ndw) { + ndw = atom->check(ctx, atom); - if (r300->vap_flush_needed) { - BEGIN_BATCH_NO_AUTOSTATE(15 + ndw); + BEGIN_BATCH_NO_AUTOSTATE(ndw); - /* flush processing vertices */ - OUT_BATCH_REGVAL(R300_SC_SCREENDOOR, 0); - OUT_BATCH_REGVAL(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D); - OUT_BATCH_REGVAL(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); - OUT_BATCH_REGVAL(R300_SC_SCREENDOOR, 0xffffff); - OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0); - r300->vap_flush_needed = GL_FALSE; - } else { - BEGIN_BATCH_NO_AUTOSTATE(5 + ndw); - } - OUT_BATCH_REGVAL(R300_VAP_PVS_VECTOR_INDX_REG, addr); - OUT_BATCH(CP_PACKET0(R300_VAP_PVS_UPLOAD_DATA, ndw-1) | RADEON_ONE_REG_WR); - OUT_BATCH_TABLE(&atom->cmd[1], ndw); - OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0); - END_BATCH(); - } + ndw -= 5; + OUT_BATCH_REGVAL(R300_VAP_PVS_VECTOR_INDX_REG, addr); + OUT_BATCH(CP_PACKET0(R300_VAP_PVS_UPLOAD_DATA, ndw-1) | RADEON_ONE_REG_WR); + OUT_BATCH_TABLE(&atom->cmd[1], ndw); + OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0); + END_BATCH(); } void emit_r500fp(GLcontext *ctx, struct radeon_state_atom * atom) @@ -122,16 +116,9 @@ void emit_r500fp(GLcontext *ctx, struct radeon_state_atom * atom) BATCH_LOCALS(&r300->radeon); drm_r300_cmd_header_t cmd; uint32_t addr, ndw, sz; - int type, clamp, stride; + int type, clamp; - if (!r300->radeon.radeonScreen->kernel_mm) { - uint32_t dwords; - dwords = (*atom->check) (ctx, atom); - BEGIN_BATCH_NO_AUTOSTATE(dwords); - OUT_BATCH_TABLE(atom->cmd, dwords); - END_BATCH(); - return; - } + ndw = atom->check(ctx, atom); cmd.u = atom->cmd[0]; sz = cmd.r500fp.count; @@ -142,18 +129,34 @@ void emit_r500fp(GLcontext *ctx, struct radeon_state_atom * atom) addr |= (type << 16); addr |= (clamp << 17); - stride = type ? 4 : 6; - - ndw = sz * stride; - if (ndw) { + BEGIN_BATCH_NO_AUTOSTATE(ndw); + OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_INDEX, 0)); + OUT_BATCH(addr); + ndw-=3; + OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_DATA, ndw-1) | RADEON_ONE_REG_WR); + OUT_BATCH_TABLE(&atom->cmd[1], ndw); + END_BATCH(); +} - BEGIN_BATCH_NO_AUTOSTATE(3 + ndw); - OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_INDEX, 0)); - OUT_BATCH(addr); - OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_DATA, ndw-1) | RADEON_ONE_REG_WR); - OUT_BATCH_TABLE(&atom->cmd[1], ndw); - END_BATCH(); +static int check_tex_offsets(GLcontext *ctx, struct radeon_state_atom * atom) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + int numtmus = packet0_count(r300, r300->hw.tex.offset.cmd); + int dw = 0, i; + if (atom->cmd[0] == CP_PACKET2) { + return dw; + } + for(i = 0; i < numtmus; ++i) { + radeonTexObj *t = r300->hw.textures[i]; + if (!t && !r300->radeon.radeonScreen->kernel_mm) { + dw += 0; + } else if (t && t->image_override && !t->bo) { + if (!r300->radeon.radeonScreen->kernel_mm) + dw += 2; + } else + dw += 4; } + return dw; } static void emit_tex_offsets(GLcontext *ctx, struct radeon_state_atom * atom) @@ -243,6 +246,17 @@ void r300_emit_scissor(GLcontext *ctx) OUT_BATCH((x2 << R300_SCISSORS_X_SHIFT)|(y2 << R300_SCISSORS_Y_SHIFT)); END_BATCH(); } +static int check_cb_offset(GLcontext *ctx, struct radeon_state_atom * atom) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + uint32_t dw = 6 + 3 + 16; + if (r300->radeon.radeonScreen->kernel_mm) + dw += 2; + if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) { + dw -= 3 + 16; + } + return dw; +} static void emit_cb_offset(GLcontext *ctx, struct radeon_state_atom * atom) { @@ -252,7 +266,7 @@ static void emit_cb_offset(GLcontext *ctx, struct radeon_state_atom * atom) uint32_t cbpitch; uint32_t offset = r300->radeon.state.color.draw_offset; uint32_t dw = 6; - int i; + int i; rrb = radeon_get_colorbuffer(&r300->radeon); if (!rrb || !rrb->bo) { @@ -334,13 +348,23 @@ static void emit_cb_offset(GLcontext *ctx, struct radeon_state_atom * atom) } } +static int check_zb_offset(GLcontext *ctx, struct radeon_state_atom * atom) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + uint32_t dw; + dw = 6; + if (r300->radeon.radeonScreen->kernel_mm) + dw += 2; + return dw; +} + static void emit_zb_offset(GLcontext *ctx, struct radeon_state_atom * atom) { r300ContextPtr r300 = R300_CONTEXT(ctx); BATCH_LOCALS(&r300->radeon); struct radeon_renderbuffer *rrb; uint32_t zbpitch; - uint32_t dw; + uint32_t dw = atom->check(ctx, atom); rrb = radeon_get_depthbuffer(&r300->radeon); if (!rrb) @@ -356,9 +380,6 @@ static void emit_zb_offset(GLcontext *ctx, struct radeon_state_atom * atom) } } - dw = 6; - if (r300->radeon.radeonScreen->kernel_mm) - dw += 2; BEGIN_BATCH_NO_AUTOSTATE(dw); OUT_BATCH_REGSEQ(R300_ZB_DEPTHOFFSET, 1); OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); @@ -370,46 +391,6 @@ static void emit_zb_offset(GLcontext *ctx, struct radeon_state_atom * atom) END_BATCH(); } -static void emit_gb_misc(GLcontext *ctx, struct radeon_state_atom * atom) -{ - r300ContextPtr r300 = R300_CONTEXT(ctx); - BATCH_LOCALS(&r300->radeon); - if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) { - BEGIN_BATCH_NO_AUTOSTATE(4); - OUT_BATCH(atom->cmd[0]); - OUT_BATCH(atom->cmd[1]); - OUT_BATCH(atom->cmd[2]); - OUT_BATCH(atom->cmd[3]); - END_BATCH(); - } -} - -static void emit_threshold_misc(GLcontext *ctx, struct radeon_state_atom * atom) -{ - r300ContextPtr r300 = R300_CONTEXT(ctx); - BATCH_LOCALS(&r300->radeon); - if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { - BEGIN_BATCH_NO_AUTOSTATE(3); - OUT_BATCH(atom->cmd[0]); - OUT_BATCH(atom->cmd[1]); - OUT_BATCH(atom->cmd[2]); - END_BATCH(); - } -} - -static void emit_shade_misc(GLcontext *ctx, struct radeon_state_atom * atom) -{ - r300ContextPtr r300 = R300_CONTEXT(ctx); - BATCH_LOCALS(&r300->radeon); - - if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) { - BEGIN_BATCH_NO_AUTOSTATE(2); - OUT_BATCH(atom->cmd[0]); - OUT_BATCH(atom->cmd[1]); - END_BATCH(); - } -} - static void emit_zstencil_format(GLcontext *ctx, struct radeon_state_atom * atom) { r300ContextPtr r300 = R300_CONTEXT(ctx); @@ -427,7 +408,7 @@ static void emit_zstencil_format(GLcontext *ctx, struct radeon_state_atom * atom format = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL; } - BEGIN_BATCH_NO_AUTOSTATE(5); + BEGIN_BATCH_NO_AUTOSTATE(atom->cmd_size); OUT_BATCH(atom->cmd[0]); atom->cmd[1] &= ~0xf; atom->cmd[1] |= format; @@ -438,6 +419,11 @@ static void emit_zstencil_format(GLcontext *ctx, struct radeon_state_atom * atom END_BATCH(); } +static int check_never(GLcontext *ctx, struct radeon_state_atom *atom) +{ + return 0; +} + static int check_always(GLcontext *ctx, struct radeon_state_atom *atom) { return atom->cmd_size; @@ -454,28 +440,29 @@ static int check_variable(GLcontext *ctx, struct radeon_state_atom *atom) return cnt ? cnt + 1 : 0; } -int check_vpu(GLcontext *ctx, struct radeon_state_atom *atom) -{ - int cnt; - - cnt = vpu_count(atom->cmd); - return cnt ? (cnt * 4) + 1 : 0; -} - int check_r500fp(GLcontext *ctx, struct radeon_state_atom *atom) { int cnt; - + r300ContextPtr r300 = R300_CONTEXT(ctx); + int extra = 1; cnt = r500fp_count(atom->cmd); - return cnt ? (cnt * 6) + 1 : 0; + if (r300->radeon.radeonScreen->kernel_mm) + extra = 3; + + return cnt ? (cnt * 6) + extra : 0; } int check_r500fp_const(GLcontext *ctx, struct radeon_state_atom *atom) { int cnt; + r300ContextPtr r300 = R300_CONTEXT(ctx); + int extra = 1; + cnt = r500fp_count(atom->cmd); + if (r300->radeon.radeonScreen->kernel_mm) + extra = 3; cnt = r500fp_count(atom->cmd); - return cnt ? (cnt * 4) + 1 : 0; + return cnt ? (cnt * 4) + extra : 0; } #define ALLOC_STATE( ATOM, CHK, SZ, IDX ) \ @@ -565,11 +552,14 @@ void r300InitCmdBuf(r300ContextPtr r300) ALLOC_STATE(gb_enable, always, 2, 0); r300->hw.gb_enable.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GB_ENABLE, 1); - ALLOC_STATE(gb_misc, always, R300_GB_MISC_CMDSIZE, 0); + if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) { + ALLOC_STATE(gb_misc, always, R300_GB_MISC_CMDSIZE, 0); + } else { + ALLOC_STATE(gb_misc, never, R300_GB_MISC_CMDSIZE, 0); + } r300->hw.gb_misc.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GB_MSPOS0, 3); - r300->hw.gb_misc.emit = emit_gb_misc; ALLOC_STATE(gb_misc2, always, R300_GB_MISC2_CMDSIZE, 0); - r300->hw.gb_misc2.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, 0x401C, 2); + r300->hw.gb_misc2.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, 0x401C, 2); ALLOC_STATE(txe, always, R300_TXE_CMDSIZE, 0); r300->hw.txe.cmd[R300_TXE_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_ENABLE, 1); ALLOC_STATE(ga_point_s0, always, 5, 0); @@ -584,9 +574,12 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.lcntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_LINE_CNTL, 1); ALLOC_STATE(ga_line_stipple, always, 4, 0); r300->hw.ga_line_stipple.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_LINE_STIPPLE_VALUE, 3); - ALLOC_STATE(shade, always, 2, 0); + if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) { + ALLOC_STATE(shade, always, 2, 0); + } else { + ALLOC_STATE(shade, never, 2, 0); + } r300->hw.shade.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_ENHANCE, 1); - r300->hw.shade.emit = emit_shade_misc; ALLOC_STATE(shade2, always, 4, 0); r300->hw.shade2.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, 0x4278, 3); ALLOC_STATE(polygon_mode, always, 4, 0); @@ -635,11 +628,14 @@ void r300InitCmdBuf(r300ContextPtr r300) ALLOC_STATE(r500fp, r500fp, R500_FPI_CMDSIZE, 0); r300->hw.r500fp.cmd[R300_FPI_CMD_0] = cmdr500fp(r300->radeon.radeonScreen, 0, 0, 0, 0); - r300->hw.r500fp.emit = emit_r500fp; + if (r300->radeon.radeonScreen->kernel_mm) + r300->hw.r500fp.emit = emit_r500fp; + ALLOC_STATE(r500fp_const, r500fp_const, R500_FPP_CMDSIZE, 0); r300->hw.r500fp_const.cmd[R300_FPI_CMD_0] = cmdr500fp(r300->radeon.radeonScreen, 0, 0, 1, 0); - r300->hw.r500fp_const.emit = emit_r500fp; + if (r300->radeon.radeonScreen->kernel_mm) + r300->hw.r500fp_const.emit = emit_r500fp; } else { ALLOC_STATE(fp, always, R300_FP_CMDSIZE, 0); r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_CONFIG, 3); @@ -682,15 +678,18 @@ void r300InitCmdBuf(r300ContextPtr r300) } ALLOC_STATE(rop, always, 2, 0); r300->hw.rop.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_ROPCNTL, 1); - ALLOC_STATE(cb, always, R300_CB_CMDSIZE, 0); + ALLOC_STATE(cb, cb_offset, R300_CB_CMDSIZE, 0); r300->hw.cb.emit = &emit_cb_offset; ALLOC_STATE(rb3d_dither_ctl, always, 10, 0); r300->hw.rb3d_dither_ctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_DITHER_CTL, 9); ALLOC_STATE(rb3d_aaresolve_ctl, always, 2, 0); r300->hw.rb3d_aaresolve_ctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_AARESOLVE_CTL, 1); - ALLOC_STATE(rb3d_discard_src_pixel_lte_threshold, always, 3, 0); - r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 2); - r300->hw.rb3d_discard_src_pixel_lte_threshold.emit = emit_threshold_misc; + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + ALLOC_STATE(rb3d_discard_src_pixel_lte_threshold, always, 3, 0); + } else { + ALLOC_STATE(rb3d_discard_src_pixel_lte_threshold, never, 3, 0); + } + r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 2); ALLOC_STATE(zs, always, R300_ZS_CMDSIZE, 0); r300->hw.zs.cmd[R300_ZS_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_CNTL, 3); @@ -700,7 +699,7 @@ void r300InitCmdBuf(r300ContextPtr r300) cmdpacket0(r300->radeon.radeonScreen, R300_ZB_FORMAT, 4); r300->hw.zstencil_format.emit = emit_zstencil_format; - ALLOC_STATE(zb, always, R300_ZB_CMDSIZE, 0); + ALLOC_STATE(zb, zb_offset, R300_ZB_CMDSIZE, 0); r300->hw.zb.emit = emit_zb_offset; ALLOC_STATE(zb_depthclearvalue, always, 2, 0); r300->hw.zb_depthclearvalue.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_DEPTHCLEARVALUE, 1); @@ -713,47 +712,72 @@ void r300InitCmdBuf(r300ContextPtr r300) /* VPU only on TCL */ if (has_tcl) { - int i; + int i; + if (r300->radeon.radeonScreen->kernel_mm) { + ALLOC_STATE(vap_flush, always, 10, 0); + /* flush processing vertices */ + r300->hw.vap_flush.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SC_SCREENDOOR, 1); + r300->hw.vap_flush.cmd[1] = 0; + r300->hw.vap_flush.cmd[2] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_DSTCACHE_CTLSTAT, 1); + r300->hw.vap_flush.cmd[3] = R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D; + r300->hw.vap_flush.cmd[4] = cmdpacket0(r300->radeon.radeonScreen, RADEON_WAIT_UNTIL, 1); + r300->hw.vap_flush.cmd[5] = RADEON_WAIT_3D_IDLECLEAN; + r300->hw.vap_flush.cmd[6] = cmdpacket0(r300->radeon.radeonScreen, R300_SC_SCREENDOOR, 1); + r300->hw.vap_flush.cmd[7] = 0xffffff; + r300->hw.vap_flush.cmd[8] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PVS_STATE_FLUSH_REG, 1); + r300->hw.vap_flush.cmd[9] = 0; + } else { + ALLOC_STATE(vap_flush, never, 10, 0); + } + + ALLOC_STATE(vpi, vpu, R300_VPI_CMDSIZE, 0); r300->hw.vpi.cmd[0] = - cmdvpu(r300->radeon.radeonScreen, R300_PVS_CODE_START, 0); - r300->hw.vpi.emit = emit_vpu; + cmdvpu(r300->radeon.radeonScreen, R300_PVS_CODE_START, 0); + if (r300->radeon.radeonScreen->kernel_mm) + r300->hw.vpi.emit = emit_vpu; if (is_r500) { - ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0); - r300->hw.vpp.cmd[0] = - cmdvpu(r300->radeon.radeonScreen, R500_PVS_CONST_START, 0); - r300->hw.vpp.emit = emit_vpu; - - ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0); - r300->hw.vps.cmd[0] = - cmdvpu(r300->radeon.radeonScreen, R500_POINT_VPORT_SCALE_OFFSET, 1); - r300->hw.vps.emit = emit_vpu; + ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0); + r300->hw.vpp.cmd[0] = + cmdvpu(r300->radeon.radeonScreen, R500_PVS_CONST_START, 0); + if (r300->radeon.radeonScreen->kernel_mm) + r300->hw.vpp.emit = emit_vpu; + + ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0); + r300->hw.vps.cmd[0] = + cmdvpu(r300->radeon.radeonScreen, R500_POINT_VPORT_SCALE_OFFSET, 1); + if (r300->radeon.radeonScreen->kernel_mm) + r300->hw.vps.emit = emit_vpu; for (i = 0; i < 6; i++) { - ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0); - r300->hw.vpucp[i].cmd[0] = - cmdvpu(r300->radeon.radeonScreen, - R500_PVS_UCP_START + i, 1); - r300->hw.vpucp[i].emit = emit_vpu; + ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0); + r300->hw.vpucp[i].cmd[0] = + cmdvpu(r300->radeon.radeonScreen, + R500_PVS_UCP_START + i, 1); + if (r300->radeon.radeonScreen->kernel_mm) + r300->hw.vpucp[i].emit = emit_vpu; } } else { - ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0); - r300->hw.vpp.cmd[0] = - cmdvpu(r300->radeon.radeonScreen, R300_PVS_CONST_START, 0); - r300->hw.vpp.emit = emit_vpu; - - ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0); - r300->hw.vps.cmd[0] = - cmdvpu(r300->radeon.radeonScreen, R300_POINT_VPORT_SCALE_OFFSET, 1); - r300->hw.vps.emit = emit_vpu; + ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0); + r300->hw.vpp.cmd[0] = + cmdvpu(r300->radeon.radeonScreen, R300_PVS_CONST_START, 0); + if (r300->radeon.radeonScreen->kernel_mm) + r300->hw.vpp.emit = emit_vpu; + + ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0); + r300->hw.vps.cmd[0] = + cmdvpu(r300->radeon.radeonScreen, R300_POINT_VPORT_SCALE_OFFSET, 1); + if (r300->radeon.radeonScreen->kernel_mm) + r300->hw.vps.emit = emit_vpu; for (i = 0; i < 6; i++) { ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0); r300->hw.vpucp[i].cmd[0] = cmdvpu(r300->radeon.radeonScreen, - R300_PVS_UCP_START + i, 1); - r300->hw.vpucp[i].emit = emit_vpu; + R300_PVS_UCP_START + i, 1); + if (r300->radeon.radeonScreen->kernel_mm) + r300->hw.vpucp[i].emit = emit_vpu; } } } @@ -777,7 +801,7 @@ void r300InitCmdBuf(r300ContextPtr r300) ALLOC_STATE(tex.pitch, variable, mtu + 1, 0); r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT2_0, 0); - ALLOC_STATE(tex.offset, variable, 1, 0); + ALLOC_STATE(tex.offset, tex_offsets, 1, 0); r300->hw.tex.offset.cmd[R300_TEX_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_OFFSET_0, 0); r300->hw.tex.offset.emit = &emit_tex_offsets; diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.h b/src/mesa/drivers/dri/r300/r300_cmdbuf.h index 53bcc0eeb4..1b703e518a 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.h +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.h @@ -38,6 +38,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_context.h" +#define CACHE_FLUSH_BUFSZ (4*2) +#define PRE_EMIT_STATE_BUFSZ (2+2) +#define AOS_BUFSZ(nr) (3+(nr >>1)*3 + (nr&1)*2 + (nr*2)) +#define FIREAOS_BUFSZ (3) +#define SCISSORS_BUFSZ (3) + extern void r300InitCmdBuf(r300ContextPtr r300); void r300_emit_scissor(GLcontext *ctx); diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c index ca8021df16..971a202638 100644 --- a/src/mesa/drivers/dri/r300/r300_context.c +++ b/src/mesa/drivers/dri/r300/r300_context.c @@ -212,11 +212,8 @@ static void r300_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmes static void r300_vtbl_pre_emit_atoms(radeonContextPtr radeon) { - r300ContextPtr r300 = (r300ContextPtr)radeon; BATCH_LOCALS(radeon); - r300->vap_flush_needed = GL_TRUE; - cp_wait(radeon, R300_WAIT_3D | R300_WAIT_3D_CLEAN); BEGIN_BATCH_NO_AUTOSTATE(2); OUT_BATCH_REGVAL(R300_TX_INVALTAGS, R300_TX_FLUSH); diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 339b304558..3202c96c2e 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -350,6 +350,7 @@ struct r300_hw_state { struct radeon_state_atom zb_hiz_offset; /* (4F44) */ struct radeon_state_atom zb_hiz_pitch; /* (4F54) */ + struct radeon_state_atom vap_flush; struct radeon_state_atom vpi; /* vp instructions */ struct radeon_state_atom vpp; /* vp parameters */ struct radeon_state_atom vps; /* vertex point size (?) */ @@ -524,7 +525,6 @@ struct r300_context { struct r300_swtcl_info swtcl; struct r300_vertex_buffer vbuf; struct r300_index_buffer ind_buf; - GLboolean vap_flush_needed; uint32_t fallback; diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c index e98dc33518..2ac898bd59 100644 --- a/src/mesa/drivers/dri/r300/r300_draw.c +++ b/src/mesa/drivers/dri/r300/r300_draw.c @@ -39,6 +39,7 @@ #include "r300_render.h" #include "r300_state.h" #include "r300_tex.h" +#include "r300_cmdbuf.h" #include "radeon_buffer_objects.h" @@ -328,7 +329,6 @@ static void r300TranslateAttrib(GLcontext *ctx, GLuint attr, int count, const st type = GL_FLOAT; - r300ConvertAttrib(ctx, count, input, &r300_attr); if (input->StrideB == 0) { r300_attr.stride = 0; } else { @@ -339,41 +339,14 @@ static void r300TranslateAttrib(GLcontext *ctx, GLuint attr, int count, const st } else { type = input->Type; r300_attr.dwords = (getTypeSize(type) * input->Size + 3)/ 4; - if (input->BufferObj->Name) { - if (stride % 4 != 0) { - assert(((int) input->Ptr) % input->StrideB == 0); - r300AlignDataToDword(ctx, input, count, &r300_attr); - r300_attr.is_named_bo = GL_FALSE; - } else { - r300_attr.stride = input->StrideB; - r300_attr.bo_offset = (GLuint) input->Ptr; - r300_attr.bo = get_radeon_buffer_object(input->BufferObj)->bo; - r300_attr.is_named_bo = GL_TRUE; - } - } else { - int size; - uint32_t *dst; + if (!input->BufferObj->Name) { if (input->StrideB == 0) { - size = getTypeSize(input->Type) * input->Size; - count = 1; r300_attr.stride = 0; } else { - size = getTypeSize(input->Type) * input->Size * count; r300_attr.stride = (getTypeSize(type) * input->Size + 3) & ~3; } - radeonAllocDmaRegion(&r300->radeon, &r300_attr.bo, &r300_attr.bo_offset, size, 32); - assert(r300_attr.bo->ptr != NULL); - dst = (uint32_t *)ADD_POINTERS(r300_attr.bo->ptr, r300_attr.bo_offset); - switch (r300_attr.dwords) { - case 1: radeonEmitVec4(dst, input->Ptr, input->StrideB, count); break; - case 2: radeonEmitVec8(dst, input->Ptr, input->StrideB, count); break; - case 3: radeonEmitVec12(dst, input->Ptr, input->StrideB, count); break; - case 4: radeonEmitVec16(dst, input->Ptr, input->StrideB, count); break; - default: assert(0); break; - } - r300_attr.is_named_bo = GL_FALSE; } } @@ -468,7 +441,6 @@ static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *ar { r300ContextPtr r300 = R300_CONTEXT(ctx); struct r300_vertex_buffer *vbuf = &r300->vbuf; - int ret; { int i, tmp; @@ -492,29 +464,83 @@ static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *ar r300SwitchFallback(ctx, R300_FALLBACK_AOS_LIMIT, vbuf->num_attribs > R300_MAX_AOS_ARRAYS); if (r300->fallback) return; +} - { - int i; +static void r300AllocDmaRegions(GLcontext *ctx, const struct gl_client_array *input[], int count) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + struct r300_vertex_buffer *vbuf = &r300->vbuf; + GLuint stride; + int ret; + int i, index; - for (i = 0; i < vbuf->num_attribs; i++) { - struct radeon_aos *aos = &r300->radeon.tcl.aos[i]; + for (index = 0; index < vbuf->num_attribs; index++) { + struct radeon_aos *aos = &r300->radeon.tcl.aos[index]; + i = vbuf->attribs[index].element; - aos->count = vbuf->attribs[i].stride == 0 ? 1 : count; - aos->stride = vbuf->attribs[i].stride / sizeof(float); - aos->offset = vbuf->attribs[i].bo_offset; - aos->components = vbuf->attribs[i].dwords; - aos->bo = vbuf->attribs[i].bo; + stride = (input[i]->StrideB == 0) ? getTypeSize(input[i]->Type) * input[i]->Size : input[i]->StrideB; + + if (input[i]->Type == GL_DOUBLE || input[i]->Type == GL_UNSIGNED_INT || input[i]->Type == GL_INT || +#if MESA_BIG_ENDIAN + getTypeSize(input[i]->Type) != 4 || +#endif + stride < 4) { + + r300ConvertAttrib(ctx, count, input[i], &vbuf->attribs[index]); + } else { + if (input[i]->BufferObj->Name) { + if (stride % 4 != 0) { + assert(((intptr_t) input[i]->Ptr) % input[i]->StrideB == 0); + r300AlignDataToDword(ctx, input[i], count, &vbuf->attribs[index]); + vbuf->attribs[index].is_named_bo = GL_FALSE; + } else { + vbuf->attribs[index].stride = input[i]->StrideB; + vbuf->attribs[index].bo_offset = (intptr_t) input[i]->Ptr; + vbuf->attribs[index].bo = get_radeon_buffer_object(input[i]->BufferObj)->bo; + vbuf->attribs[index].is_named_bo = GL_TRUE; + } + } else { + + int size; + int local_count = count; + uint32_t *dst; + + if (input[i]->StrideB == 0) { + size = getTypeSize(input[i]->Type) * input[i]->Size; + local_count = 1; + } else { + size = getTypeSize(input[i]->Type) * input[i]->Size * local_count; + } + + radeonAllocDmaRegion(&r300->radeon, &vbuf->attribs[index].bo, &vbuf->attribs[index].bo_offset, size, 32); + assert(vbuf->attribs[index].bo->ptr != NULL); + dst = (uint32_t *)ADD_POINTERS(vbuf->attribs[index].bo->ptr, vbuf->attribs[index].bo_offset); + switch (vbuf->attribs[index].dwords) { + case 1: radeonEmitVec4(dst, input[i]->Ptr, input[i]->StrideB, local_count); break; + case 2: radeonEmitVec8(dst, input[i]->Ptr, input[i]->StrideB, local_count); break; + case 3: radeonEmitVec12(dst, input[i]->Ptr, input[i]->StrideB, local_count); break; + case 4: radeonEmitVec16(dst, input[i]->Ptr, input[i]->StrideB, local_count); break; + default: assert(0); break; + } - if (vbuf->attribs[i].is_named_bo) { - radeon_cs_space_add_persistent_bo(r300->radeon.cmdbuf.cs, r300->vbuf.attribs[i].bo, RADEON_GEM_DOMAIN_GTT, 0); } } - r300->radeon.tcl.aos_count = vbuf->num_attribs; - ret = radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs, first_elem(&r300->radeon.dma.reserved)->bo, RADEON_GEM_DOMAIN_GTT, 0); - if (ret) - r300SwitchFallback(ctx, R300_FALLBACK_INVALID_BUFFERS, GL_TRUE); + aos->count = vbuf->attribs[index].stride == 0 ? 1 : count; + aos->stride = vbuf->attribs[index].stride / sizeof(float); + aos->components = vbuf->attribs[index].dwords; + aos->bo = vbuf->attribs[index].bo; + aos->offset = vbuf->attribs[index].bo_offset; + + if (vbuf->attribs[index].is_named_bo) { + radeon_cs_space_add_persistent_bo(r300->radeon.cmdbuf.cs, r300->vbuf.attribs[index].bo, RADEON_GEM_DOMAIN_GTT, 0); + } } + + r300->radeon.tcl.aos_count = vbuf->num_attribs; + ret = radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs, first_elem(&r300->radeon.dma.reserved)->bo, RADEON_GEM_DOMAIN_GTT, 0); + r300SwitchFallback(ctx, R300_FALLBACK_INVALID_BUFFERS, ret); + } static void r300FreeData(GLcontext *ctx) @@ -542,6 +568,34 @@ static void r300FreeData(GLcontext *ctx) } } +static GLuint r300PredictTryDrawPrimsSize(GLcontext *ctx, GLuint nr_prims) +{ + struct r300_context *r300 = R300_CONTEXT(ctx); + struct r300_vertex_buffer *vbuf = &r300->vbuf; + GLboolean flushed; + GLuint dwords; + GLuint state_size; + + dwords = 2*CACHE_FLUSH_BUFSZ; + dwords += PRE_EMIT_STATE_BUFSZ; + dwords += (AOS_BUFSZ(vbuf->num_attribs) + + SCISSORS_BUFSZ + + FIREAOS_BUFSZ )*nr_prims; + + state_size = radeonCountStateEmitSize(&r300->radeon); + flushed = rcommonEnsureCmdBufSpace(&r300->radeon, + dwords + state_size, + __FUNCTION__); + if (flushed) + dwords += radeonCountStateEmitSize(&r300->radeon); + else + dwords += state_size; + + if (RADEON_DEBUG & DEBUG_PRIMS) + fprintf(stderr, "%s: total prediction size is %d.\n", __FUNCTION__, dwords); + return dwords; +} + static GLboolean r300TryDrawPrims(GLcontext *ctx, const struct gl_client_array *arrays[], const struct _mesa_prim *prim, @@ -553,6 +607,10 @@ static GLboolean r300TryDrawPrims(GLcontext *ctx, struct r300_context *r300 = R300_CONTEXT(ctx); GLuint i; + if (RADEON_DEBUG & DEBUG_PRIMS) + fprintf(stderr, "%s: %u (%d-%d) cs begin at %d\n", + __FUNCTION__, nr_prims, min_index, max_index, r300->radeon.cmdbuf.cs->cdw ); + if (ctx->NewState) _mesa_update_state( ctx ); @@ -563,14 +621,6 @@ static GLboolean r300TryDrawPrims(GLcontext *ctx, r300SwitchFallback(ctx, R300_FALLBACK_INVALID_BUFFERS, !r300ValidateBuffers(ctx)); - /* ensure we have the cmd buf space in advance to cover - * the state + DMA AOS pointers */ - rcommonEnsureCmdBufSpace(&r300->radeon, - r300->radeon.hw.max_state_size + (60*sizeof(int)), - __FUNCTION__); - - r300SetupIndexBuffer(ctx, ib); - r300SetVertexFormat(ctx, arrays, max_index + 1); if (r300->fallback) @@ -580,6 +630,17 @@ static GLboolean r300TryDrawPrims(GLcontext *ctx, r300UpdateShaderStates(r300); + /* ensure we have the cmd buf space in advance to cover + * the state + DMA AOS pointers */ + r300PredictTryDrawPrimsSize(ctx, nr_prims); + + r300SetupIndexBuffer(ctx, ib); + + r300AllocDmaRegions(ctx, arrays, max_index + 1); + + if (r300->fallback) + return GL_FALSE; + r300EmitCacheFlush(r300); radeonEmitState(&r300->radeon); @@ -591,6 +652,10 @@ static GLboolean r300TryDrawPrims(GLcontext *ctx, r300FreeData(ctx); + if (RADEON_DEBUG & DEBUG_PRIMS) + fprintf(stderr, "%s: %u (%d-%d) cs ending at %d\n", + __FUNCTION__, nr_prims, min_index, max_index, r300->radeon.cmdbuf.cs->cdw ); + return GL_TRUE; } diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index 7ab6928247..3303078e39 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -507,7 +507,15 @@ static void r300EmitClearState(GLcontext * ctx) R500_ALU_RGBA_A_SWIZ_0; r500fp.cmd[7] = 0; - emit_r500fp(ctx, &r500fp); + if (r300->radeon.radeonScreen->kernel_mm) { + emit_r500fp(ctx, &r500fp); + } else { + int dwords = r500fp.check(ctx,&r500fp); + BEGIN_BATCH_NO_AUTOSTATE(dwords); + OUT_BATCH_TABLE(r500fp.cmd, dwords); + END_BATCH(); + } + } BEGIN_BATCH(2); @@ -551,6 +559,7 @@ static void r300EmitClearState(GLcontext * ctx) struct radeon_state_atom vpu; uint32_t _cmd[10]; R300_STATECHANGE(r300, pvs); + R300_STATECHANGE(r300, vap_flush); R300_STATECHANGE(r300, vpi); BEGIN_BATCH(4); @@ -592,8 +601,19 @@ static void r300EmitClearState(GLcontext * ctx) PVS_SRC_REG_INPUT, NEGATE_NONE); vpu.cmd[8] = 0x0; - r300->vap_flush_needed = GL_TRUE; - emit_vpu(ctx, &vpu); + if (r300->radeon.radeonScreen->kernel_mm) { + int dwords = r300->hw.vap_flush.check(ctx,&r300->hw.vap_flush); + BEGIN_BATCH_NO_AUTOSTATE(dwords); + OUT_BATCH_TABLE(r300->hw.vap_flush.cmd, dwords); + END_BATCH(); + emit_vpu(ctx, &vpu); + } else { + int dwords = vpu.check(ctx,&vpu); + BEGIN_BATCH_NO_AUTOSTATE(dwords); + OUT_BATCH_TABLE(vpu.cmd, dwords); + END_BATCH(); + } + } } diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index 37a40f6c36..446cf40131 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -341,12 +341,6 @@ void r300RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim) if (type < 0 || num_verts <= 0) return; - /* Make space for at least 128 dwords. - * This is supposed to ensure that we can get all rendering - * commands into a single command buffer. - */ - rcommonEnsureCmdBufSpace(&rmesa->radeon, 128, __FUNCTION__); - if (rmesa->ind_buf.bo) { GLuint first, incr, offset = 0; diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index ce0666b901..4fe9175b61 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -371,6 +371,7 @@ static void r300ClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq ) p = (GLint) plane - (GLint) GL_CLIP_PLANE0; ip = (GLint *)ctx->Transform._ClipUserPlane[p]; + R300_STATECHANGE( rmesa, vap_flush ); R300_STATECHANGE( rmesa, vpucp[p] ); rmesa->hw.vpucp[p].cmd[R300_VPUCP_X] = ip[0]; rmesa->hw.vpucp[p].cmd[R300_VPUCP_Y] = ip[1]; diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c index c5edbd0052..862f212085 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -298,6 +298,8 @@ static void r300EmitVertexProgram(r300ContextPtr r300, int dest, struct r300_ver assert((code->length > 0) && (code->length % 4 == 0)); + R300_STATECHANGE( r300, vap_flush ); + switch ((dest >> 8) & 0xf) { case 0: R300_STATECHANGE(r300, vpi); @@ -335,6 +337,7 @@ void r300SetupVertexProgram(r300ContextPtr rmesa) ((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0; ((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0; + R300_STATECHANGE(rmesa, vap_flush); R300_STATECHANGE(rmesa, vpp); param_count = r300VertexProgUpdateParams(ctx, prog, (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]); bump_vpu_count(rmesa->hw.vpp.cmd, param_count); |