diff options
Diffstat (limited to 'src/mesa/drivers/dri/r300/compiler')
8 files changed, 71 insertions, 17 deletions
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c index cc552aee17..37dafa7710 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c @@ -353,7 +353,7 @@ void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi } } - if (code->pixsize >= R300_PFS_NUM_TEMP_REGS) + if (code->pixsize >= compiler->max_temp_regs) rc_error(&compiler->Base, "Too many hardware temporaries used.\n"); if (compiler->Base.Error) diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index c2d5dc27b4..d06429254d 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -156,10 +156,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) fflush(stderr); } - if (c->is_r500) - rc_pair_regalloc(c, 128); - else - rc_pair_regalloc(c, R300_PFS_NUM_TEMP_REGS); + rc_pair_regalloc(c, c->max_temp_regs); if (c->Base.Error) return; diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c index b0fb8e970b..b78d7d5715 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c @@ -433,19 +433,20 @@ void r500FragmentProgramDump(struct rX00_fragment_program_code *c) (inst >> 30)); fprintf(stderr,"\t3 RGB_INST: 0x%08x:", code->inst[n].inst3); inst = code->inst[n].inst3; - fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d\n", + fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d targ: %d\n", (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7), (inst >> 11) & 0x3, (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7), - (inst >> 24) & 0x3); + (inst >> 24) & 0x3, (inst >> 29) & 0x3); fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", code->inst[n].inst4); inst = code->inst[n].inst4; - fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d w:%d\n", to_alpha_op(inst & 0xf), + fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d targ %d w:%d\n", to_alpha_op(inst & 0xf), (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"", (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3, (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3, + (inst >> 29) & 0x3, (inst >> 31) & 0x1); fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", code->inst[n].inst5); diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c index 4e84eefd65..b6dfe28def 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c @@ -190,6 +190,17 @@ static unsigned int use_source(struct r500_fragment_program_code* code, struct r return 0; } +/** + * NOP the specified instruction if it is not a texture lookup. + */ +static void alu_nop(struct r300_fragment_program_compiler *c, int ip) +{ + PROG_CODE; + + if ((code->inst[ip].inst0 & 0x3) != R500_INST_TYPE_TEX) { + code->inst[ip].inst0 |= R500_INST_NOP; + } +} /** * Emit a paired ALU instruction. @@ -205,6 +216,14 @@ static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair int ip = ++code->inst_end; + /* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */ + if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX || + inst->RGB.Opcode == RC_OPCODE_DDY || inst->Alpha.Opcode == RC_OPCODE_DDY) { + if (ip > 0) { + alu_nop(c, ip - 1); + } + } + code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode); code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode); @@ -252,8 +271,8 @@ static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT; code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT; - code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target); - code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target); + code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target); + code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target); if (inst->WriteALUResult) { code->inst[ip].inst3 |= R500_ALU_RGB_WMASK; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h index 6bfda0574f..934ae28da5 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h @@ -83,6 +83,7 @@ struct r300_fragment_program_compiler { struct rX00_fragment_program_code *code; struct r300_fragment_program_external_state state; unsigned is_r500; + unsigned max_temp_regs; /* Register corresponding to the depthbuffer. */ unsigned OutputDepth; /* Registers corresponding to the four colorbuffers. */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c index c1c0181fac..9d289fce34 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c @@ -75,14 +75,14 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { { .Opcode = RC_OPCODE_DDX, .Name = "DDX", - .NumSrcRegs = 1, + .NumSrcRegs = 2, .HasDstReg = 1, .IsComponentwise = 1 }, { .Opcode = RC_OPCODE_DDY, .Name = "DDY", - .NumSrcRegs = 1, + .NumSrcRegs = 2, .HasDstReg = 1, .IsComponentwise = 1 }, diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c index fff5b0c217..3a26e7daaf 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c @@ -159,11 +159,6 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c, int nargs = opcode->NumSrcRegs; int i; - /* Special case for DDX/DDY (MDH/MDV). */ - if (inst->Opcode == RC_OPCODE_DDX || inst->Opcode == RC_OPCODE_DDY) { - nargs++; - } - for(i = 0; i < opcode->NumSrcRegs; ++i) { int source; if (needrgb && !istranscendent) { diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c index b5c08aea49..f5b7d57eab 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c @@ -506,6 +506,46 @@ static void transform_r300_vertex_ABS(struct radeon_compiler* c, inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; } +static void transform_r300_vertex_CMP(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* There is no decent CMP available, so let's rig one up. + * CMP is defined as dst = src0 < 0.0 ? src1 : src2 + * The following sequence consumes two temps and three extra slots, + * but should be equivalent: + * + * SLT tmp0, src0, 0.0 + * SGE tmp1, src0, 0.0 + * MUL tmp0, tmp0, src1 + * MAD dst, src2, tmp1, tmp0 + * + * Yes, I know, I'm a mad scientist. ~ C. */ + int tempreg0 = rc_find_free_temporary(c); + int tempreg1 = rc_find_free_temporary(c); + + /* SLT tmp0, src0, 0.0 */ + emit2(c, inst->Prev, RC_OPCODE_SLT, 0, + dstreg(RC_FILE_TEMPORARY, tempreg0), + inst->U.I.SrcReg[0], builtin_zero); + + /* SGE tmp1, src0, 0.0 */ + emit2(c, inst->Prev, RC_OPCODE_SGE, 0, + dstreg(RC_FILE_TEMPORARY, tempreg1), + inst->U.I.SrcReg[0], builtin_zero); + + /* MUL tmp0, tmp0, src1 */ + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, + dstreg(RC_FILE_TEMPORARY, tempreg0), + srcreg(RC_FILE_TEMPORARY, tempreg0), inst->U.I.SrcReg[1]); + + /* MAD dst, src2, tmp1, tmp0 */ + emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, + inst->U.I.DstReg, + inst->U.I.SrcReg[2], srcreg(RC_FILE_TEMPORARY, tempreg1), srcreg(RC_FILE_TEMPORARY, tempreg0)); + + rc_remove_instruction(inst); +} + /** * For use with radeonLocalTransform, this transforms non-native ALU * instructions of the r300 up to r500 vertex engine. @@ -517,6 +557,7 @@ int r300_transform_vertex_alu( { switch(inst->U.I.Opcode) { case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1; + case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1; case RC_OPCODE_DP3: transform_DP3(c, inst); return 1; case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; case RC_OPCODE_FLR: transform_FLR(c, inst); return 1; |