diff options
Diffstat (limited to 'src/mesa/drivers/dri/r600/r700_render.c')
-rw-r--r-- | src/mesa/drivers/dri/r600/r700_render.c | 231 |
1 files changed, 153 insertions, 78 deletions
diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c index 3e1ce9fb72..eab27cbd84 100644 --- a/src/mesa/drivers/dri/r600/r700_render.c +++ b/src/mesa/drivers/dri/r600/r700_render.c @@ -59,9 +59,7 @@ void r700WaitForIdle(context_t *context); void r700WaitForIdleClean(context_t *context); -GLboolean r700SendTextureState(context_t *context); static unsigned int r700PrimitiveType(int prim); -void r600UpdateTextureState(GLcontext * ctx); GLboolean r700SyncSurf(context_t *context, struct radeon_bo *pbo, uint32_t read_domain, @@ -253,24 +251,12 @@ static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim { context_t *context = R700_CONTEXT(ctx); BATCH_LOCALS(&context->radeon); - int type, i, total_emit; + int type, total_emit; int num_indices; uint32_t vgt_draw_initiator = 0; uint32_t vgt_index_type = 0; uint32_t vgt_primitive_type = 0; uint32_t vgt_num_indices = 0; - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *vb = &tnl->vb; - GLboolean bUseDrawIndex; - - if(NULL != context->ind_buf.bo) - { - bUseDrawIndex = GL_TRUE; - } - else - { - bUseDrawIndex = GL_FALSE; - } type = r700PrimitiveType(prim); num_indices = r700NumVerts(end - start, prim); @@ -282,90 +268,153 @@ static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim if (type < 0 || num_indices <= 0) return; - if(GL_TRUE == bUseDrawIndex) - { - total_emit = 3 /* VGT_PRIMITIVE_TYPE */ - + 2 /* VGT_INDEX_TYPE */ - + 2 /* NUM_INSTANCES */ - + 5 + 2; /* DRAW_INDEX */ - } - else - { - total_emit = 3 /* VGT_PRIMITIVE_TYPE */ - + 2 /* VGT_INDEX_TYPE */ - + 2 /* NUM_INSTANCES */ - + num_indices + 3; /* DRAW_INDEX_IMMD */ - } - - BEGIN_BATCH_NO_AUTOSTATE(total_emit); - // prim SETfield(vgt_primitive_type, type, VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask); - R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1)); - R600_OUT_BATCH(mmVGT_PRIMITIVE_TYPE - ASIC_CONFIG_BASE_INDEX); - R600_OUT_BATCH(vgt_primitive_type); - // index type SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask); - if(GL_TRUE == bUseDrawIndex) + if(GL_TRUE != context->ind_buf.is_32bit) { - if(GL_TRUE != context->ind_buf.is_32bit) - { SETfield(vgt_index_type, DI_INDEX_SIZE_16_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask); - } } + vgt_num_indices = num_indices; + SETfield(vgt_draw_initiator, DI_SRC_SEL_DMA, SOURCE_SELECT_shift, SOURCE_SELECT_mask); + SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask); + + total_emit = 3 /* VGT_PRIMITIVE_TYPE */ + + 2 /* VGT_INDEX_TYPE */ + + 2 /* NUM_INSTANCES */ + + 5 + 2; /* DRAW_INDEX */ + + BEGIN_BATCH_NO_AUTOSTATE(total_emit); + // prim + R600_OUT_BATCH_REGSEQ(VGT_PRIMITIVE_TYPE, 1); + R600_OUT_BATCH(vgt_primitive_type); + // index type R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0)); R600_OUT_BATCH(vgt_index_type); - // num instances R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0)); R600_OUT_BATCH(1); - // draw packet + R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX, 3)); + R600_OUT_BATCH(context->ind_buf.bo_offset); + R600_OUT_BATCH(0); + R600_OUT_BATCH(vgt_num_indices); + R600_OUT_BATCH(vgt_draw_initiator); + R600_OUT_BATCH_RELOC(context->ind_buf.bo_offset, + context->ind_buf.bo, + context->ind_buf.bo_offset, + RADEON_GEM_DOMAIN_GTT, 0, 0); + END_BATCH(); + COMMIT_BATCH(); +} + +static void r700RunRenderPrimitiveImmediate(GLcontext * ctx, int start, int end, int prim) +{ + context_t *context = R700_CONTEXT(ctx); + BATCH_LOCALS(&context->radeon); + int type, i; + uint32_t num_indices, total_emit = 0; + uint32_t vgt_draw_initiator = 0; + uint32_t vgt_index_type = 0; + uint32_t vgt_primitive_type = 0; + uint32_t vgt_num_indices = 0; + + type = r700PrimitiveType(prim); + num_indices = r700NumVerts(end - start, prim); + + radeon_print(RADEON_RENDER, RADEON_TRACE, + "%s type %x num_indices %d\n", + __func__, type, num_indices); + + if (type < 0 || num_indices <= 0) + return; + + SETfield(vgt_primitive_type, type, + VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask); + + if (num_indices > 0xffff) + { + SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask); + } + else + { + SETfield(vgt_index_type, DI_INDEX_SIZE_16_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask); + } + vgt_num_indices = num_indices; + SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask); - if(GL_TRUE == bUseDrawIndex) + if (start == 0) { - SETfield(vgt_draw_initiator, DI_SRC_SEL_DMA, SOURCE_SELECT_shift, SOURCE_SELECT_mask); + SETfield(vgt_draw_initiator, DI_SRC_SEL_AUTO_INDEX, SOURCE_SELECT_shift, SOURCE_SELECT_mask); } else { - SETfield(vgt_draw_initiator, DI_SRC_SEL_IMMEDIATE, SOURCE_SELECT_shift, SOURCE_SELECT_mask); + if (num_indices > 0xffff) + { + total_emit += num_indices; + } + else + { + total_emit += (num_indices + 1) / 2; + } + SETfield(vgt_draw_initiator, DI_SRC_SEL_IMMEDIATE, SOURCE_SELECT_shift, SOURCE_SELECT_mask); } - SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask); + total_emit += 3 /* VGT_PRIMITIVE_TYPE */ + + 2 /* VGT_INDEX_TYPE */ + + 2 /* NUM_INSTANCES */ + + 3; /* DRAW */ - if(GL_TRUE == bUseDrawIndex) + BEGIN_BATCH_NO_AUTOSTATE(total_emit); + // prim + R600_OUT_BATCH_REGSEQ(VGT_PRIMITIVE_TYPE, 1); + R600_OUT_BATCH(vgt_primitive_type); + // index type + R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0)); + R600_OUT_BATCH(vgt_index_type); + // num instances + R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0)); + R600_OUT_BATCH(1); + // draw packet + if(start == 0) { - R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX, 3)); - R600_OUT_BATCH(context->ind_buf.bo_offset); - R600_OUT_BATCH(0); + R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1)); R600_OUT_BATCH(vgt_num_indices); R600_OUT_BATCH(vgt_draw_initiator); - R600_OUT_BATCH_RELOC(context->ind_buf.bo_offset, - context->ind_buf.bo, - context->ind_buf.bo_offset, - RADEON_GEM_DOMAIN_GTT, 0, 0); } else { - R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (num_indices + 1))); - R600_OUT_BATCH(vgt_num_indices); - R600_OUT_BATCH(vgt_draw_initiator); - - for (i = start; i < (start + num_indices); i++) - { - if(vb->Elts) - { - R600_OUT_BATCH(vb->Elts[i]); - } - else + if (num_indices > 0xffff) + { + R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (num_indices + 1))); + R600_OUT_BATCH(vgt_num_indices); + R600_OUT_BATCH(vgt_draw_initiator); + for (i = start; i < (start + num_indices); i++) { - R600_OUT_BATCH(i); + R600_OUT_BATCH(i); } - } + } + else + { + R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (((num_indices + 1) / 2) + 1))); + R600_OUT_BATCH(vgt_num_indices); + R600_OUT_BATCH(vgt_draw_initiator); + for (i = start; i < (start + num_indices); i += 2) + { + if ((i + 1) == (start + num_indices)) + { + R600_OUT_BATCH(i); + } + else + { + R600_OUT_BATCH(((i + 1) << 16) | (i)); + } + } + } } END_BATCH(); @@ -391,7 +440,12 @@ static GLuint r700PredictRenderSize(GLcontext* ctx, else { for (i = 0; i < nr_prims; ++i) { - dwords += prim[i].count + 10; + if (prim[i].start == 0) + dwords += 10; + else if (prim[i].count > 0xffff) + dwords += prim[i].count + 10; + else + dwords += ((prim[i].count + 1) / 2) + 10; } } @@ -472,6 +526,9 @@ static void r700ConvertAttrib(GLcontext *ctx, int count, radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset, sizeof(GLfloat) * input->Size * count, 32); + + radeon_bo_map(attr->bo, 1); + dst_ptr = (GLfloat *)ADD_POINTERS(attr->bo->ptr, attr->bo_offset); assert(src_ptr != NULL); @@ -505,6 +562,8 @@ static void r700ConvertAttrib(GLcontext *ctx, int count, break; } + radeon_bo_unmap(attr->bo); + if (mapped_named_bo) { ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); @@ -523,6 +582,8 @@ static void r700AlignDataToDword(GLcontext *ctx, radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset, size, 32); + radeon_bo_map(attr->bo, 1); + if (!input->BufferObj->Pointer) { ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); @@ -542,6 +603,7 @@ static void r700AlignDataToDword(GLcontext *ctx, } } + radeon_bo_unmap(attr->bo); if (mapped_named_bo) { ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); @@ -610,32 +672,33 @@ static void r700SetupStreams(GLcontext *ctx, const struct gl_client_array *input radeonAllocDmaRegion(&context->radeon, &context->stream_desc[index].bo, &context->stream_desc[index].bo_offset, size, 32); + + radeon_bo_map(context->stream_desc[index].bo, 1); assert(context->stream_desc[index].bo->ptr != NULL); + + dst = (uint32_t *)ADD_POINTERS(context->stream_desc[index].bo->ptr, context->stream_desc[index].bo_offset); switch (context->stream_desc[index].dwords) { case 1: - radeonEmitVec4(dst, input[i]->Ptr, input[i]->StrideB, local_count); - context->stream_desc[index].stride = 4; + radeonEmitVec4(dst, input[i]->Ptr, input[i]->StrideB, local_count); break; case 2: radeonEmitVec8(dst, input[i]->Ptr, input[i]->StrideB, local_count); - context->stream_desc[index].stride = 8; break; case 3: radeonEmitVec12(dst, input[i]->Ptr, input[i]->StrideB, local_count); - context->stream_desc[index].stride = 12; break; case 4: radeonEmitVec16(dst, input[i]->Ptr, input[i]->StrideB, local_count); - context->stream_desc[index].stride = 16; break; default: assert(0); break; } + radeon_bo_unmap(context->stream_desc[index].bo); } } @@ -707,6 +770,7 @@ static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, &context->ind_buf.bo_offset, size, 4); + radeon_bo_map(context->ind_buf.bo, 1); assert(context->ind_buf.bo->ptr != NULL); out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); @@ -720,6 +784,7 @@ static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *out++ = in[i]; } + radeon_bo_unmap(context->ind_buf.bo); #if MESA_BIG_ENDIAN } else @@ -730,6 +795,7 @@ static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, &context->ind_buf.bo_offset, size, 4); + radeon_bo_map(context->ind_buf.bo, 1); assert(context->ind_buf.bo->ptr != NULL); out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); @@ -742,6 +808,7 @@ static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer { *out++ = in[i]; } + radeon_bo_unmap(context->ind_buf.bo); #endif } @@ -787,11 +854,13 @@ static void r700SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, &context->ind_buf.bo_offset, size, 4); + radeon_bo_map(context->ind_buf.bo, 1); assert(context->ind_buf.bo->ptr != NULL); dst_ptr = ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); _mesa_memcpy(dst_ptr, src_ptr, size); + radeon_bo_unmap(context->ind_buf.bo); context->ind_buf.is_32bit = (mesa_ind_buf->type == GL_UNSIGNED_INT); context->ind_buf.count = mesa_ind_buf->count; @@ -839,7 +908,7 @@ static GLboolean r700TryDrawPrims(GLcontext *ctx, r700SetScissor(context); r700SetupVertexProgram(ctx); r700SetupFragmentProgram(ctx); - r600UpdateTextureState(ctx); + r700UpdateShaderStates(ctx); GLuint emit_end = r700PredictRenderSize(ctx, prim, ib, nr_prims) + context->radeon.cmdbuf.cs->cdw; @@ -852,10 +921,16 @@ static GLboolean r700TryDrawPrims(GLcontext *ctx, radeon_debug_add_indent(); for (i = 0; i < nr_prims; ++i) { - r700RunRenderPrimitive(ctx, - prim[i].start, - prim[i].start + prim[i].count, - prim[i].mode); + if (context->ind_buf.bo) + r700RunRenderPrimitive(ctx, + prim[i].start, + prim[i].start + prim[i].count, + prim[i].mode); + else + r700RunRenderPrimitiveImmediate(ctx, + prim[i].start, + prim[i].start + prim[i].count, + prim[i].mode); } radeon_debug_remove_indent(); |