summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/r300/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/r300/compiler')
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c2
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c5
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog.c7
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c23
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler.h1
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c4
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c5
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c41
8 files changed, 71 insertions, 17 deletions
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
index cc552aee17..37dafa7710 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
@@ -353,7 +353,7 @@ void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi
}
}
- if (code->pixsize >= R300_PFS_NUM_TEMP_REGS)
+ if (code->pixsize >= compiler->max_temp_regs)
rc_error(&compiler->Base, "Too many hardware temporaries used.\n");
if (compiler->Base.Error)
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
index c2d5dc27b4..d06429254d 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
@@ -156,10 +156,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
fflush(stderr);
}
- if (c->is_r500)
- rc_pair_regalloc(c, 128);
- else
- rc_pair_regalloc(c, R300_PFS_NUM_TEMP_REGS);
+ rc_pair_regalloc(c, c->max_temp_regs);
if (c->Base.Error)
return;
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
index b0fb8e970b..b78d7d5715 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
@@ -433,19 +433,20 @@ void r500FragmentProgramDump(struct rX00_fragment_program_code *c)
(inst >> 30));
fprintf(stderr,"\t3 RGB_INST: 0x%08x:", code->inst[n].inst3);
inst = code->inst[n].inst3;
- fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d\n",
+ fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d targ: %d\n",
(inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7),
(inst >> 11) & 0x3,
(inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7),
- (inst >> 24) & 0x3);
+ (inst >> 24) & 0x3, (inst >> 29) & 0x3);
fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", code->inst[n].inst4);
inst = code->inst[n].inst4;
- fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d w:%d\n", to_alpha_op(inst & 0xf),
+ fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d targ %d w:%d\n", to_alpha_op(inst & 0xf),
(inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
(inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3,
(inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3,
+ (inst >> 29) & 0x3,
(inst >> 31) & 0x1);
fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", code->inst[n].inst5);
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
index 4e84eefd65..b6dfe28def 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
@@ -190,6 +190,17 @@ static unsigned int use_source(struct r500_fragment_program_code* code, struct r
return 0;
}
+/**
+ * NOP the specified instruction if it is not a texture lookup.
+ */
+static void alu_nop(struct r300_fragment_program_compiler *c, int ip)
+{
+ PROG_CODE;
+
+ if ((code->inst[ip].inst0 & 0x3) != R500_INST_TYPE_TEX) {
+ code->inst[ip].inst0 |= R500_INST_NOP;
+ }
+}
/**
* Emit a paired ALU instruction.
@@ -205,6 +216,14 @@ static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair
int ip = ++code->inst_end;
+ /* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */
+ if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX ||
+ inst->RGB.Opcode == RC_OPCODE_DDY || inst->Alpha.Opcode == RC_OPCODE_DDY) {
+ if (ip > 0) {
+ alu_nop(c, ip - 1);
+ }
+ }
+
code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode);
code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode);
@@ -252,8 +271,8 @@ static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair
code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT;
code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT;
- code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target);
- code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target);
+ code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target);
+ code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target);
if (inst->WriteALUResult) {
code->inst[ip].inst3 |= R500_ALU_RGB_WMASK;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
index 6bfda0574f..934ae28da5 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
@@ -83,6 +83,7 @@ struct r300_fragment_program_compiler {
struct rX00_fragment_program_code *code;
struct r300_fragment_program_external_state state;
unsigned is_r500;
+ unsigned max_temp_regs;
/* Register corresponding to the depthbuffer. */
unsigned OutputDepth;
/* Registers corresponding to the four colorbuffers. */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
index c1c0181fac..9d289fce34 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
@@ -75,14 +75,14 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
{
.Opcode = RC_OPCODE_DDX,
.Name = "DDX",
- .NumSrcRegs = 1,
+ .NumSrcRegs = 2,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_DDY,
.Name = "DDY",
- .NumSrcRegs = 1,
+ .NumSrcRegs = 2,
.HasDstReg = 1,
.IsComponentwise = 1
},
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
index fff5b0c217..3a26e7daaf 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
@@ -159,11 +159,6 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c,
int nargs = opcode->NumSrcRegs;
int i;
- /* Special case for DDX/DDY (MDH/MDV). */
- if (inst->Opcode == RC_OPCODE_DDX || inst->Opcode == RC_OPCODE_DDY) {
- nargs++;
- }
-
for(i = 0; i < opcode->NumSrcRegs; ++i) {
int source;
if (needrgb && !istranscendent) {
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
index b5c08aea49..f5b7d57eab 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
@@ -506,6 +506,46 @@ static void transform_r300_vertex_ABS(struct radeon_compiler* c,
inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
}
+static void transform_r300_vertex_CMP(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ /* There is no decent CMP available, so let's rig one up.
+ * CMP is defined as dst = src0 < 0.0 ? src1 : src2
+ * The following sequence consumes two temps and three extra slots,
+ * but should be equivalent:
+ *
+ * SLT tmp0, src0, 0.0
+ * SGE tmp1, src0, 0.0
+ * MUL tmp0, tmp0, src1
+ * MAD dst, src2, tmp1, tmp0
+ *
+ * Yes, I know, I'm a mad scientist. ~ C. */
+ int tempreg0 = rc_find_free_temporary(c);
+ int tempreg1 = rc_find_free_temporary(c);
+
+ /* SLT tmp0, src0, 0.0 */
+ emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
+ dstreg(RC_FILE_TEMPORARY, tempreg0),
+ inst->U.I.SrcReg[0], builtin_zero);
+
+ /* SGE tmp1, src0, 0.0 */
+ emit2(c, inst->Prev, RC_OPCODE_SGE, 0,
+ dstreg(RC_FILE_TEMPORARY, tempreg1),
+ inst->U.I.SrcReg[0], builtin_zero);
+
+ /* MUL tmp0, tmp0, src1 */
+ emit2(c, inst->Prev, RC_OPCODE_MUL, 0,
+ dstreg(RC_FILE_TEMPORARY, tempreg0),
+ srcreg(RC_FILE_TEMPORARY, tempreg0), inst->U.I.SrcReg[1]);
+
+ /* MAD dst, src2, tmp1, tmp0 */
+ emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode,
+ inst->U.I.DstReg,
+ inst->U.I.SrcReg[2], srcreg(RC_FILE_TEMPORARY, tempreg1), srcreg(RC_FILE_TEMPORARY, tempreg0));
+
+ rc_remove_instruction(inst);
+}
+
/**
* For use with radeonLocalTransform, this transforms non-native ALU
* instructions of the r300 up to r500 vertex engine.
@@ -517,6 +557,7 @@ int r300_transform_vertex_alu(
{
switch(inst->U.I.Opcode) {
case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1;
+ case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1;
case RC_OPCODE_DP3: transform_DP3(c, inst); return 1;
case RC_OPCODE_DPH: transform_DPH(c, inst); return 1;
case RC_OPCODE_FLR: transform_FLR(c, inst); return 1;