diff options
Diffstat (limited to 'src/mesa/drivers/dri/r600/r600_render.c')
-rw-r--r-- | src/mesa/drivers/dri/r600/r600_render.c | 539 |
1 files changed, 539 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/r600/r600_render.c b/src/mesa/drivers/dri/r600/r600_render.c new file mode 100644 index 0000000000..7b4ecccc66 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_render.c @@ -0,0 +1,539 @@ +/************************************************************************** + +Copyright (C) 2004 Nicolai Haehnle. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/** + * \file + * + * \brief R600 Render (Vertex Buffer Implementation) + * + * The immediate implementation has been removed from CVS in favor of the vertex + * buffer implementation. + * + * The render functions are called by the pipeline manager to render a batch of + * primitives. They return TRUE to pass on to the next stage (i.e. software + * rasterization) or FALSE to indicate that the pipeline has finished after + * rendering something. + * + * When falling back to software TCL still attempt to use hardware + * rasterization. + * + * I am not sure that the cache related registers are setup correctly, but + * obviously this does work... Further investigation is needed. + * + * \author Nicolai Haehnle <prefect_@gmx.net> + * + * \todo Add immediate implementation back? Perhaps this is useful if there are + * no bugs... + */ + +#include "main/glheader.h" +#include "main/state.h" +#include "main/imports.h" +#include "main/enums.h" +#include "main/macros.h" +#include "main/context.h" +#include "main/dd.h" +#include "main/simple_list.h" +#include "main/api_arrayelt.h" +#include "swrast/swrast.h" +#include "swrast_setup/swrast_setup.h" +#include "vbo/vbo.h" +#include "tnl/tnl.h" +#include "tnl/t_vp_build.h" +#include "radeon_reg.h" +#include "radeon_macros.h" +#include "r600_context.h" +#include "r600_ioctl.h" +#include "r600_state.h" +#include "r600_reg.h" +#include "r600_tex.h" +#include "r600_emit.h" +#include "r600_fragprog.h" +extern int future_hw_tcl_on; + +/** + * \brief Convert a OpenGL primitive type into a R600 primitive type. + */ +int r600PrimitiveType(r600ContextPtr rmesa, int prim) +{ + switch (prim & PRIM_MODE_MASK) { + case GL_POINTS: + return R600_VAP_VF_CNTL__PRIM_POINTS; + break; + case GL_LINES: + return R600_VAP_VF_CNTL__PRIM_LINES; + break; + case GL_LINE_STRIP: + return R600_VAP_VF_CNTL__PRIM_LINE_STRIP; + break; + case GL_LINE_LOOP: + return R600_VAP_VF_CNTL__PRIM_LINE_LOOP; + break; + case GL_TRIANGLES: + return R600_VAP_VF_CNTL__PRIM_TRIANGLES; + break; + case GL_TRIANGLE_STRIP: + return R600_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP; + break; + case GL_TRIANGLE_FAN: + return R600_VAP_VF_CNTL__PRIM_TRIANGLE_FAN; + break; + case GL_QUADS: + return R600_VAP_VF_CNTL__PRIM_QUADS; + break; + case GL_QUAD_STRIP: + return R600_VAP_VF_CNTL__PRIM_QUAD_STRIP; + break; + case GL_POLYGON: + return R600_VAP_VF_CNTL__PRIM_POLYGON; + break; + default: + assert(0); + return -1; + break; + } +} + +int r600NumVerts(r600ContextPtr rmesa, int num_verts, int prim) +{ + int verts_off = 0; + + switch (prim & PRIM_MODE_MASK) { + case GL_POINTS: + verts_off = 0; + break; + case GL_LINES: + verts_off = num_verts % 2; + break; + case GL_LINE_STRIP: + if (num_verts < 2) + verts_off = num_verts; + break; + case GL_LINE_LOOP: + if (num_verts < 2) + verts_off = num_verts; + break; + case GL_TRIANGLES: + verts_off = num_verts % 3; + break; + case GL_TRIANGLE_STRIP: + if (num_verts < 3) + verts_off = num_verts; + break; + case GL_TRIANGLE_FAN: + if (num_verts < 3) + verts_off = num_verts; + break; + case GL_QUADS: + verts_off = num_verts % 4; + break; + case GL_QUAD_STRIP: + if (num_verts < 4) + verts_off = num_verts; + else + verts_off = num_verts % 2; + break; + case GL_POLYGON: + if (num_verts < 3) + verts_off = num_verts; + break; + default: + assert(0); + return -1; + break; + } + + return num_verts - verts_off; +} + +static void r600EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts) +{ + r600ContextPtr rmesa = R600_CONTEXT(ctx); + void *out; + + radeonAllocDmaRegion(&rmesa->radeon, &rmesa->radeon.tcl.elt_dma_bo, + &rmesa->radeon.tcl.elt_dma_offset, n_elts * 4, 4); + radeon_bo_map(rmesa->radeon.tcl.elt_dma_bo, 1); + out = rmesa->radeon.tcl.elt_dma_bo->ptr + rmesa->radeon.tcl.elt_dma_offset; + memcpy(out, elts, n_elts * 4); + radeon_bo_unmap(rmesa->radeon.tcl.elt_dma_bo); +} + +static void r600FireEB(r600ContextPtr rmesa, int vertex_count, int type) +{ + BATCH_LOCALS(&rmesa->radeon); + + if (vertex_count > 0) { + BEGIN_BATCH(10); + OUT_BATCH_PACKET3(R600_PACKET3_3D_DRAW_INDX_2, 0); + OUT_BATCH(R600_VAP_VF_CNTL__PRIM_WALK_INDICES | + ((vertex_count + 0) << 16) | + type | + R600_VAP_VF_CNTL__INDEX_SIZE_32bit); + + if (!rmesa->radeon.radeonScreen->kernel_mm) { + OUT_BATCH_PACKET3(R600_PACKET3_INDX_BUFFER, 2); + OUT_BATCH(R600_INDX_BUFFER_ONE_REG_WR | (0 << R600_INDX_BUFFER_SKIP_SHIFT) | + (R600_VAP_PORT_IDX0 >> 2)); + OUT_BATCH_RELOC(rmesa->radeon.tcl.elt_dma_offset, + rmesa->radeon.tcl.elt_dma_bo, + rmesa->radeon.tcl.elt_dma_offset, + RADEON_GEM_DOMAIN_GTT, 0, 0); + OUT_BATCH(vertex_count); + } else { + OUT_BATCH_PACKET3(R600_PACKET3_INDX_BUFFER, 2); + OUT_BATCH(R600_INDX_BUFFER_ONE_REG_WR | (0 << R600_INDX_BUFFER_SKIP_SHIFT) | + (R600_VAP_PORT_IDX0 >> 2)); + OUT_BATCH(rmesa->radeon.tcl.elt_dma_offset); + OUT_BATCH(vertex_count); + radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, + rmesa->radeon.tcl.elt_dma_bo, + RADEON_GEM_DOMAIN_GTT, 0, 0); + } + END_BATCH(); + } +} + +static void r600EmitAOS(r600ContextPtr rmesa, GLuint nr, GLuint offset) +{ + BATCH_LOCALS(&rmesa->radeon); + uint32_t voffset; + int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2; + int i; + + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr, + offset); + + + if (!rmesa->radeon.radeonScreen->kernel_mm) { + BEGIN_BATCH(sz+2+(nr * 2)); + OUT_BATCH_PACKET3(R600_PACKET3_3D_LOAD_VBPNTR, sz - 1); + OUT_BATCH(nr); + + for (i = 0; i + 1 < nr; i += 2) { + OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) | + (rmesa->radeon.tcl.aos[i].stride << 8) | + (rmesa->radeon.tcl.aos[i + 1].components << 16) | + (rmesa->radeon.tcl.aos[i + 1].stride << 24)); + + voffset = rmesa->radeon.tcl.aos[i + 0].offset + + offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride; + OUT_BATCH_RELOC(voffset, + rmesa->radeon.tcl.aos[i].bo, + voffset, + RADEON_GEM_DOMAIN_GTT, + 0, 0); + voffset = rmesa->radeon.tcl.aos[i + 1].offset + + offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride; + OUT_BATCH_RELOC(voffset, + rmesa->radeon.tcl.aos[i+1].bo, + voffset, + RADEON_GEM_DOMAIN_GTT, + 0, 0); + } + + if (nr & 1) { + OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) | + (rmesa->radeon.tcl.aos[nr - 1].stride << 8)); + voffset = rmesa->radeon.tcl.aos[nr - 1].offset + + offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride; + OUT_BATCH_RELOC(voffset, + rmesa->radeon.tcl.aos[nr - 1].bo, + voffset, + RADEON_GEM_DOMAIN_GTT, + 0, 0); + } + END_BATCH(); + } else { + + BEGIN_BATCH(sz+2+(nr * 2)); + OUT_BATCH_PACKET3(R600_PACKET3_3D_LOAD_VBPNTR, sz - 1); + OUT_BATCH(nr); + + for (i = 0; i + 1 < nr; i += 2) { + OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) | + (rmesa->radeon.tcl.aos[i].stride << 8) | + (rmesa->radeon.tcl.aos[i + 1].components << 16) | + (rmesa->radeon.tcl.aos[i + 1].stride << 24)); + + voffset = rmesa->radeon.tcl.aos[i + 0].offset + + offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride; + OUT_BATCH(voffset); + voffset = rmesa->radeon.tcl.aos[i + 1].offset + + offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride; + OUT_BATCH(voffset); + } + + if (nr & 1) { + OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) | + (rmesa->radeon.tcl.aos[nr - 1].stride << 8)); + voffset = rmesa->radeon.tcl.aos[nr - 1].offset + + offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride; + OUT_BATCH(voffset); + } + for (i = 0; i + 1 < nr; i += 2) { + voffset = rmesa->radeon.tcl.aos[i + 0].offset + + offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride; + radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, + rmesa->radeon.tcl.aos[i+0].bo, + RADEON_GEM_DOMAIN_GTT, + 0, 0); + voffset = rmesa->radeon.tcl.aos[i + 1].offset + + offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride; + radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, + rmesa->radeon.tcl.aos[i+1].bo, + RADEON_GEM_DOMAIN_GTT, + 0, 0); + } + if (nr & 1) { + voffset = rmesa->radeon.tcl.aos[nr - 1].offset + + offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride; + radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, + rmesa->radeon.tcl.aos[nr-1].bo, + RADEON_GEM_DOMAIN_GTT, + 0, 0); + } + END_BATCH(); + } + +} + +static void r600FireAOS(r600ContextPtr rmesa, int vertex_count, int type) +{ + BATCH_LOCALS(&rmesa->radeon); + + BEGIN_BATCH(3); + OUT_BATCH_PACKET3(R600_PACKET3_3D_DRAW_VBUF_2, 0); + OUT_BATCH(R600_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type); + END_BATCH(); +} + +static void r600RunRenderPrimitive(r600ContextPtr rmesa, GLcontext * ctx, + int start, int end, int prim) +{ + int type, num_verts; + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *vb = &tnl->vb; + + type = r600PrimitiveType(rmesa, prim); + num_verts = r600NumVerts(rmesa, end - start, prim); + + if (type < 0 || num_verts <= 0) + return; + + /* Make space for at least 64 dwords. + * This is supposed to ensure that we can get all rendering + * commands into a single command buffer. + */ + rcommonEnsureCmdBufSpace(&rmesa->radeon, 64, __FUNCTION__); + + if (vb->Elts) { + if (num_verts > 65535) { + /* not implemented yet */ + WARN_ONCE("Too many elts\n"); + return; + } + /* Note: The following is incorrect, but it's the best I can do + * without a major refactoring of how DMA memory is handled. + * The problem: Ensuring that both vertex arrays *and* index + * arrays are at the right position, and then ensuring that + * the LOAD_VBPNTR, DRAW_INDX and INDX_BUFFER packets are emitted + * at once. + * + * So why is the following incorrect? Well, it seems like + * allocating the index array might actually evict the vertex + * arrays. *sigh* + */ + r600EmitElts(ctx, vb->Elts, num_verts); + r600EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, start); + r600FireEB(rmesa, num_verts, type); + } else { + r600EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, start); + r600FireAOS(rmesa, num_verts, type); + } + COMMIT_BATCH(); +} + +static GLboolean r600RunRender(GLcontext * ctx, + struct tnl_pipeline_stage *stage) +{ + r600ContextPtr rmesa = R600_CONTEXT(ctx); + int i; + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *vb = &tnl->vb; + + if (RADEON_DEBUG & DEBUG_PRIMS) + fprintf(stderr, "%s\n", __FUNCTION__); + + r600UpdateShaders(rmesa); + if (r600EmitArrays(ctx)) + return GL_TRUE; + + r600UpdateShaderStates(rmesa); + + r600EmitCacheFlush(rmesa); + radeonEmitState(&rmesa->radeon); + + for (i = 0; i < vb->PrimitiveCount; i++) { + GLuint prim = _tnl_translate_prim(&vb->Primitive[i]); + GLuint start = vb->Primitive[i].start; + GLuint end = vb->Primitive[i].start + vb->Primitive[i].count; + r600RunRenderPrimitive(rmesa, ctx, start, end, prim); + } + + r600EmitCacheFlush(rmesa); + + radeonReleaseArrays(ctx, ~0); + + return GL_FALSE; +} + +#define FALLBACK_IF(expr) \ + do { \ + if (expr) { \ + if (1 || RADEON_DEBUG & DEBUG_FALLBACKS) \ + WARN_ONCE("Software fallback:%s\n", \ + #expr); \ + return R600_FALLBACK_RAST; \ + } \ + } while(0) + +static int r600Fallback(GLcontext * ctx) +{ + r600ContextPtr r600 = R600_CONTEXT(ctx); + const unsigned back = ctx->Stencil._BackFace; + + FALLBACK_IF(r600->radeon.Fallback); + /* Do we need to use new-style shaders? + * Also is there a better way to do this? */ + { + struct r600_fragment_program *fp = (struct r600_fragment_program *) + (char *)ctx->FragmentProgram._Current; + if (fp) { + if (!fp->translated) { + r600TranslateFragmentShader(r600, fp); + FALLBACK_IF(!fp->translated); + } + } + } + + FALLBACK_IF(ctx->RenderMode != GL_RENDER); + + /* If GL_EXT_stencil_two_side is disabled, this fallback check can + * be removed. + */ + FALLBACK_IF(ctx->Stencil.Ref[0] != ctx->Stencil.Ref[back] + || ctx->Stencil.ValueMask[0] != + ctx->Stencil.ValueMask[back] + || ctx->Stencil.WriteMask[0] != + ctx->Stencil.WriteMask[back]); + + if (ctx->Extensions.NV_point_sprite || ctx->Extensions.ARB_point_sprite) + FALLBACK_IF(ctx->Point.PointSprite); + + if (!r600->disable_lowimpact_fallback) { + FALLBACK_IF(ctx->Polygon.StippleFlag); + FALLBACK_IF(ctx->Multisample._Enabled); + FALLBACK_IF(ctx->Line.StippleFlag); + FALLBACK_IF(ctx->Line.SmoothFlag); + FALLBACK_IF(ctx->Point.SmoothFlag); + } + + return R600_FALLBACK_NONE; +} + +static GLboolean r600RunNonTCLRender(GLcontext * ctx, + struct tnl_pipeline_stage *stage) +{ + r600ContextPtr rmesa = R600_CONTEXT(ctx); + + if (RADEON_DEBUG & DEBUG_PRIMS) + fprintf(stderr, "%s\n", __FUNCTION__); + + if (r600Fallback(ctx) >= R600_FALLBACK_RAST) + return GL_TRUE; + + if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) + return GL_TRUE; + + if (!r600ValidateBuffers(ctx)) + return GL_TRUE; + + return r600RunRender(ctx, stage); +} + +static GLboolean r600RunTCLRender(GLcontext * ctx, + struct tnl_pipeline_stage *stage) +{ + r600ContextPtr rmesa = R600_CONTEXT(ctx); + struct r600_vertex_program *vp; + + hw_tcl_on = future_hw_tcl_on; + + if (RADEON_DEBUG & DEBUG_PRIMS) + fprintf(stderr, "%s\n", __FUNCTION__); + + if (hw_tcl_on == GL_FALSE) + return GL_TRUE; + + if (r600Fallback(ctx) >= R600_FALLBACK_TCL) { + hw_tcl_on = GL_FALSE; + return GL_TRUE; + } + + if (!r600ValidateBuffers(ctx)) + return GL_TRUE; + + r600UpdateShaders(rmesa); + + vp = (struct r600_vertex_program *)CURRENT_VERTEX_SHADER(ctx); + if (vp->native == GL_FALSE) { + hw_tcl_on = GL_FALSE; + return GL_TRUE; + } + + return r600RunRender(ctx, stage); +} + +const struct tnl_pipeline_stage _r600_render_stage = { + "r600 Hardware Rasterization", + NULL, + NULL, + NULL, + NULL, + r600RunNonTCLRender +}; + +const struct tnl_pipeline_stage _r600_tcl_stage = { + "r600 Hardware Transform, Clipping and Lighting", + NULL, + NULL, + NULL, + NULL, + r600RunTCLRender +}; |