diff options
Diffstat (limited to 'src/mesa/drivers/dri/r600')
-rw-r--r-- | src/mesa/drivers/dri/r600/r600_blit.c | 20 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/r700_assembler.c | 149 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/r700_chip.c | 37 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/r700_clear.c | 3 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/r700_render.c | 27 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/r700_vertprog.c | 6 |
6 files changed, 194 insertions, 48 deletions
diff --git a/src/mesa/drivers/dri/r600/r600_blit.c b/src/mesa/drivers/dri/r600/r600_blit.c index 172f85eb26..619678214f 100644 --- a/src/mesa/drivers/dri/r600/r600_blit.c +++ b/src/mesa/drivers/dri/r600/r600_blit.c @@ -390,13 +390,20 @@ set_render_target(context_t *context, struct radeon_bo *bo, gl_format mesa_forma 0, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0); END_BATCH(); - BEGIN_BATCH_NO_AUTOSTATE(12); + BEGIN_BATCH_NO_AUTOSTATE(9); R600_OUT_BATCH_REGVAL(CB_COLOR0_SIZE + (4 * id), cb_color0_size); R600_OUT_BATCH_REGVAL(CB_COLOR0_VIEW + (4 * id), cb_color0_view); - R600_OUT_BATCH_REGVAL(CB_COLOR0_INFO + (4 * id), cb_color0_info); R600_OUT_BATCH_REGVAL(CB_COLOR0_MASK + (4 * id), 0); END_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + R600_OUT_BATCH_REGVAL(CB_COLOR0_INFO + (4 * id), cb_color0_info); + R600_OUT_BATCH_RELOC(0, + bo, + 0, + 0, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0); + END_BATCH(); + COMMIT_BATCH(); } @@ -1447,7 +1454,7 @@ set_default_state(context_t *context) SETbit(sq_dyn_gpr_cntl_ps_flush_req, VS_PC_LIMIT_ENABLE_bit); } - BEGIN_BATCH_NO_AUTOSTATE(117); + BEGIN_BATCH_NO_AUTOSTATE(114); R600_OUT_BATCH_REGSEQ(SQ_CONFIG, 6); R600_OUT_BATCH(sq_config); R600_OUT_BATCH(sq_gpr_resource_mgmt_1); @@ -1477,7 +1484,6 @@ set_default_state(context_t *context) (CLRCMP_SEL_SRC << CLRCMP_FCN_SEL_shift)); R600_OUT_BATCH_REGVAL(SQ_VTX_BASE_VTX_LOC, 0); R600_OUT_BATCH_REGVAL(SQ_VTX_START_INST_LOC, 0); - R600_OUT_BATCH_REGVAL(DB_DEPTH_INFO, 0); R600_OUT_BATCH_REGVAL(DB_DEPTH_CONTROL, 0); R600_OUT_BATCH_REGVAL(CB_SHADER_MASK, (OUTPUT0_ENABLE_mask)); R600_OUT_BATCH_REGVAL(CB_TARGET_MASK, (TARGET0_ENABLE_mask)); @@ -1607,7 +1613,7 @@ unsigned r600_blit(GLcontext *ctx, /* Flush is needed to make sure that source buffer has correct data */ radeonFlush(ctx); - rcommonEnsureCmdBufSpace(&context->radeon, 304, __FUNCTION__); + rcommonEnsureCmdBufSpace(&context->radeon, 305, __FUNCTION__); /* load shaders */ load_shaders(context->radeon.glCtx); @@ -1616,7 +1622,7 @@ unsigned r600_blit(GLcontext *ctx, return GL_FALSE; /* set clear state */ - /* 117 */ + /* 114 */ set_default_state(context); /* shaders */ @@ -1632,7 +1638,7 @@ unsigned r600_blit(GLcontext *ctx, set_tex_sampler(context); /* dst */ - /* 27 */ + /* 31 */ set_render_target(context, dst_bo, dst_mesaformat, dst_pitch, dst_width, dst_height, dst_offset); /* scissors */ diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 99a33df4fc..9c954cbf70 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -275,7 +275,10 @@ GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size) case 2: format = FMT_8_8; break; case 3: - format = FMT_8_8_8; break; + /* for some (small/unaligned) strides using 4 comps works + * better, probably same as GL_SHORT below + * test piglit/draw-vertices */ + format = FMT_8_8_8_8; break; case 4: format = FMT_8_8_8_8; break; default: @@ -2872,25 +2875,92 @@ GLboolean assemble_CMP(r700_AssemblerBase *pAsm) GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode) { + /* + * r600 - trunc to -PI..PI range + * r700 - normalize by dividing by 2PI + * see fdo bug 27901 + */ + int tmp; checkop1(pAsm); tmp = gethelpr(pAsm); - pAsm->D.dst.opcode = SQ_OP2_INST_MUL; + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + pAsm->D.dst.op3 = 1; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); pAsm->D.dst.rtype = DST_REG_TEMPORARY; pAsm->D.dst.reg = tmp; - pAsm->D.dst.writex = 1; assemble_src(pAsm, 0, -1); pAsm->S[1].src.rtype = SRC_REC_LITERAL; setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); + + pAsm->S[2].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y); + pAsm->D2.dst2.literal_slots = 1; pAsm->C[0].f = 1/(3.1415926535 * 2); - pAsm->C[1].f = 0.0F; - next_ins(pAsm); + pAsm->C[1].f = 0.5f; + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_FRACT; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + + if(( GL_FALSE == next_ins(pAsm) )) + { + return GL_FALSE; + } + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + pAsm->D.dst.op3 = 1; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + + pAsm->S[1].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); + + pAsm->S[2].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y); + + pAsm->D2.dst2.literal_slots = 1; + + if (pAsm->bR6xx) + { + pAsm->C[0].f = 3.1415926535897f * 2.0f; + pAsm->C[1].f = -3.1415926535897f; + } + else + { + pAsm->C[0].f = 1.0f; + pAsm->C[1].f = -0.5f; + } + + if(( GL_FALSE == next_ins(pAsm) )) + { + return GL_FALSE; + } pAsm->D.dst.opcode = opcode; pAsm->D.dst.math = 1; @@ -4030,22 +4100,79 @@ GLboolean assemble_SCS(r700_AssemblerBase *pAsm) checkop1(pAsm); tmp = gethelpr(pAsm); - /* tmp.x = src /2*PI */ - pAsm->D.dst.opcode = SQ_OP2_INST_MUL; + + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + pAsm->D.dst.op3 = 1; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); pAsm->D.dst.rtype = DST_REG_TEMPORARY; pAsm->D.dst.reg = tmp; - pAsm->D.dst.writex = 1; assemble_src(pAsm, 0, -1); pAsm->S[1].src.rtype = SRC_REC_LITERAL; setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); + + pAsm->S[2].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y); + pAsm->D2.dst2.literal_slots = 1; pAsm->C[0].f = 1/(3.1415926535 * 2); - pAsm->C[1].f = 0.0F; + pAsm->C[1].f = 0.5F; - next_ins(pAsm); + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_FRACT; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + + if(( GL_FALSE == next_ins(pAsm) )) + { + return GL_FALSE; + } + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + pAsm->D.dst.op3 = 1; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + + pAsm->S[1].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); + + pAsm->S[2].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y); + + pAsm->D2.dst2.literal_slots = 1; + + if(pAsm->bR6xx) { + pAsm->C[0].f = 3.1415926535897f * 2.0f; + pAsm->C[1].f = -3.1415926535897f; + } else { + pAsm->C[0].f = 1.0f; + pAsm->C[1].f = -0.5f; + } + + if(( GL_FALSE == next_ins(pAsm) )) + { + return GL_FALSE; + } // COS dst.x, a.x pAsm->D.dst.opcode = SQ_OP2_INST_COS; @@ -6473,7 +6600,7 @@ GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode, * results are undefined anyway */ if(export_count == 0) { - Process_Export(pR700AsmCode, SQ_EXPORT_PIXEL, 0, 1, 0, GL_FALSE); + Process_Export(pR700AsmCode, SQ_EXPORT_PIXEL, 0, 1, pR700AsmCode->starting_export_register_number, GL_FALSE); } if(pR700AsmCode->cf_last_export_ptr != NULL) diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c index cefda3ac4b..1e955b93b2 100644 --- a/src/mesa/drivers/dri/r600/r700_chip.c +++ b/src/mesa/drivers/dri/r600/r700_chip.c @@ -617,18 +617,25 @@ static void r700SendDepthTargetState(GLcontext *ctx, struct radeon_state_atom *a r700SetDepthTarget(context); - BEGIN_BATCH_NO_AUTOSTATE(8 + 2); + BEGIN_BATCH_NO_AUTOSTATE(7 + 2); R600_OUT_BATCH_REGSEQ(DB_DEPTH_SIZE, 2); R600_OUT_BATCH(r700->DB_DEPTH_SIZE.u32All); R600_OUT_BATCH(r700->DB_DEPTH_VIEW.u32All); - R600_OUT_BATCH_REGSEQ(DB_DEPTH_BASE, 2); + R600_OUT_BATCH_REGSEQ(DB_DEPTH_BASE, 1); R600_OUT_BATCH(r700->DB_DEPTH_BASE.u32All); - R600_OUT_BATCH(r700->DB_DEPTH_INFO.u32All); R600_OUT_BATCH_RELOC(r700->DB_DEPTH_BASE.u32All, rrb->bo, r700->DB_DEPTH_BASE.u32All, 0, RADEON_GEM_DOMAIN_VRAM, 0); END_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + R600_OUT_BATCH_REGSEQ(DB_DEPTH_INFO, 1); + R600_OUT_BATCH(r700->DB_DEPTH_INFO.u32All); + R600_OUT_BATCH_RELOC(r700->DB_DEPTH_INFO.u32All, + rrb->bo, + r700->DB_DEPTH_INFO.u32All, + 0, RADEON_GEM_DOMAIN_VRAM, 0); + END_BATCH(); if ((context->radeon.radeonScreen->chip_family > CHIP_FAMILY_R600) && (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)) { @@ -687,27 +694,35 @@ static void r700SendRenderTargetState(GLcontext *ctx, struct radeon_state_atom * BEGIN_BATCH_NO_AUTOSTATE(3 + 2); R600_OUT_BATCH_REGSEQ(CB_COLOR0_TILE + (4 * id), 1); R600_OUT_BATCH(r700->render_target[id].CB_COLOR0_TILE.u32All); - R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_BASE.u32All, + R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_TILE.u32All, rrb->bo, - r700->render_target[id].CB_COLOR0_BASE.u32All, + r700->render_target[id].CB_COLOR0_TILE.u32All, 0, RADEON_GEM_DOMAIN_VRAM, 0); END_BATCH(); BEGIN_BATCH_NO_AUTOSTATE(3 + 2); R600_OUT_BATCH_REGSEQ(CB_COLOR0_FRAG + (4 * id), 1); R600_OUT_BATCH(r700->render_target[id].CB_COLOR0_FRAG.u32All); - R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_BASE.u32All, + R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_FRAG.u32All, rrb->bo, - r700->render_target[id].CB_COLOR0_BASE.u32All, + r700->render_target[id].CB_COLOR0_FRAG.u32All, 0, RADEON_GEM_DOMAIN_VRAM, 0); END_BATCH(); - BEGIN_BATCH_NO_AUTOSTATE(12); + BEGIN_BATCH_NO_AUTOSTATE(9); R600_OUT_BATCH_REGVAL(CB_COLOR0_SIZE + (4 * id), r700->render_target[id].CB_COLOR0_SIZE.u32All); R600_OUT_BATCH_REGVAL(CB_COLOR0_VIEW + (4 * id), r700->render_target[id].CB_COLOR0_VIEW.u32All); - R600_OUT_BATCH_REGVAL(CB_COLOR0_INFO + (4 * id), r700->render_target[id].CB_COLOR0_INFO.u32All); R600_OUT_BATCH_REGVAL(CB_COLOR0_MASK + (4 * id), r700->render_target[id].CB_COLOR0_MASK.u32All); END_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + R600_OUT_BATCH_REGVAL(CB_COLOR0_INFO + (4 * id), r700->render_target[id].CB_COLOR0_INFO.u32All); + R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_INFO.u32All, + rrb->bo, + r700->render_target[id].CB_COLOR0_INFO.u32All, + 0, RADEON_GEM_DOMAIN_VRAM, 0); + + END_BATCH(); + COMMIT_BATCH(); } @@ -1567,7 +1582,7 @@ void r600InitAtoms(context_t *context) ALLOC_STATE(sq, always, 34, r700SendSQConfig); ALLOC_STATE(db, always, 17, r700SendDBState); ALLOC_STATE(stencil, always, 4, r700SendStencilState); - ALLOC_STATE(db_target, always, 12, r700SendDepthTargetState); + ALLOC_STATE(db_target, always, 16, r700SendDepthTargetState); ALLOC_STATE(sc, always, 15, r700SendSCState); ALLOC_STATE(scissor, always, 22, r700SendScissorState); ALLOC_STATE(aa, always, 12, r700SendAAState); @@ -1578,7 +1593,7 @@ void r600InitAtoms(context_t *context) ALLOC_STATE(poly, always, 10, r700SendPolyState); ALLOC_STATE(cb, cb, 18, r700SendCBState); ALLOC_STATE(clrcmp, always, 6, r700SendCBCLRCMPState); - ALLOC_STATE(cb_target, always, 29, r700SendRenderTargetState); + ALLOC_STATE(cb_target, always, 31, r700SendRenderTargetState); ALLOC_STATE(blnd, blnd, (6 + (R700_MAX_RENDER_TARGETS * 3)), r700SendCBBlendState); ALLOC_STATE(blnd_clr, always, 6, r700SendCBBlendColorState); ALLOC_STATE(sx, always, 9, r700SendSXState); diff --git a/src/mesa/drivers/dri/r600/r700_clear.c b/src/mesa/drivers/dri/r600/r700_clear.c index 09c48565b6..d1008f28b9 100644 --- a/src/mesa/drivers/dri/r600/r700_clear.c +++ b/src/mesa/drivers/dri/r600/r700_clear.c @@ -48,6 +48,7 @@ static GLboolean r700ClearFast(context_t *context, GLbitfield mask) void r700Clear(GLcontext * ctx, GLbitfield mask) { context_t *context = R700_CONTEXT(ctx); + radeonContextPtr radeon = &context->radeon; __DRIdrawable *dPriv = radeon_get_drawable(&context->radeon); const GLuint colorMask = *((GLuint *) & ctx->Color.ColorMask[0]); GLbitfield swrast_mask = 0, tri_mask = 0; @@ -60,6 +61,8 @@ void r700Clear(GLcontext * ctx, GLbitfield mask) context->radeon.front_buffer_dirty = GL_TRUE; } + radeon_prepare_render(radeon); + if( GL_TRUE == r700ClearFast(context, mask) ) { return; diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c index 1929b7cc12..ba55f38e05 100644 --- a/src/mesa/drivers/dri/r600/r700_render.c +++ b/src/mesa/drivers/dri/r600/r700_render.c @@ -625,11 +625,11 @@ static void r700SetupStreams(GLcontext *ctx, const struct gl_client_array *input stride = (input[i]->StrideB == 0) ? getTypeSize(input[i]->Type) * input[i]->Size : input[i]->StrideB; - if (input[i]->Type == GL_DOUBLE || input[i]->Type == GL_UNSIGNED_INT || input[i]->Type == GL_INT || + if (input[i]->Type == GL_DOUBLE || input[i]->Type == GL_UNSIGNED_INT || input[i]->Type == GL_INT #if MESA_BIG_ENDIAN - getTypeSize(input[i]->Type) != 4 || + || getTypeSize(input[i]->Type) != 4 #endif - stride < 4) + ) { r700ConvertAttrib(ctx, count, input[i], &context->stream_desc[index]); } @@ -637,19 +637,10 @@ static void r700SetupStreams(GLcontext *ctx, const struct gl_client_array *input { if (input[i]->BufferObj->Name) { - if (stride % 4 != 0) - { - assert(((intptr_t) input[i]->Ptr) % input[i]->StrideB == 0); - r700AlignDataToDword(ctx, input[i], count, &context->stream_desc[index]); - context->stream_desc[index].is_named_bo = GL_FALSE; - } - else - { - context->stream_desc[index].stride = input[i]->StrideB; - context->stream_desc[index].bo_offset = (intptr_t) input[i]->Ptr; - context->stream_desc[index].bo = get_radeon_buffer_object(input[i]->BufferObj)->bo; - context->stream_desc[index].is_named_bo = GL_TRUE; - } + context->stream_desc[index].stride = input[i]->StrideB; + context->stream_desc[index].bo_offset = (intptr_t) input[i]->Ptr; + context->stream_desc[index].bo = get_radeon_buffer_object(input[i]->BufferObj)->bo; + context->stream_desc[index].is_named_bo = GL_TRUE; } else { @@ -977,6 +968,10 @@ static void r700DrawPrims(GLcontext *ctx, { GLboolean retval = GL_FALSE; + context_t *context = R700_CONTEXT(ctx); + radeonContextPtr radeon = &context->radeon; + radeon_prepare_render(radeon); + /* This check should get folded into just the places that * min/max index are really needed. */ diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c index 137f3007ce..6a2a09eaf1 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.c +++ b/src/mesa/drivers/dri/r600/r700_vertprog.c @@ -461,11 +461,11 @@ static void r700TranslateAttrib(GLcontext *ctx, GLuint unLoc, int count, const s stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size : input->StrideB; - if (input->Type == GL_DOUBLE || input->Type == GL_UNSIGNED_INT || input->Type == GL_INT || + if (input->Type == GL_DOUBLE || input->Type == GL_UNSIGNED_INT || input->Type == GL_INT #if MESA_BIG_ENDIAN - getTypeSize(input->Type) != 4 || + || getTypeSize(input->Type) != 4 #endif - stride < 4) + ) { pStreamDesc->type = GL_FLOAT; |