From 394803b2cd7fd9d4c0a394eb6998ccafec19bcd3 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 18 Apr 2010 23:40:01 +0200 Subject: r300/compiler: lower CEIL --- .../drivers/dri/r300/compiler/radeon_opcodes.c | 7 +++++++ .../drivers/dri/r300/compiler/radeon_opcodes.h | 3 +++ .../drivers/dri/r300/compiler/radeon_program_alu.c | 24 +++++++++++++++++++++- 3 files changed, 33 insertions(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri/r300') diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c index ffc91241ab..d593b3e81a 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c @@ -59,6 +59,13 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { .NumSrcRegs = 1, .HasDstReg = 1 }, + { + .Opcode = RC_OPCODE_CEIL, + .Name = "CEIL", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, { .Opcode = RC_OPCODE_CMP, .Name = "CMP", diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h index 1c9b34df78..87a2e23084 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h @@ -47,6 +47,9 @@ typedef enum { * dst.x = floor(src.x), where dst must be an address register */ RC_OPCODE_ARL, + /** vec4 instruction: dst.c = ceil(src0.c) */ + RC_OPCODE_CEIL, + /** vec4 instruction: dst.c = src0.c < 0.0 ? src1.c : src2.c */ RC_OPCODE_CMP, diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c index fced31d6cb..05b874ba7c 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c @@ -175,6 +175,26 @@ static void transform_ABS(struct radeon_compiler* c, rc_remove_instruction(inst); } +static void transform_CEIL(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* Assuming: + * ceil(x) = -floor(-x) + * + * After inlining floor: + * ceil(x) = -(-x-frac(-x)) + * + * After simplification: + * ceil(x) = x+frac(-x) + */ + + int tempreg = rc_find_free_temporary(c); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0])); + emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg, + inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, tempreg)); + rc_remove_instruction(inst); +} + static void transform_DP3(struct radeon_compiler* c, struct rc_instruction* inst) { @@ -458,7 +478,7 @@ static void transform_XPD(struct radeon_compiler* c, * no userData necessary. * * Eliminates the following ALU instructions: - * ABS, DPH, DST, FLR, LIT, LRP, POW, SEQ, SFL, SGE, SGT, SLE, SLT, SNE, SUB, SWZ, XPD + * ABS, CEIL, DPH, DST, FLR, LIT, LRP, POW, SEQ, SFL, SGE, SGT, SLE, SLT, SNE, SUB, SWZ, XPD * using: * MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP * @@ -474,6 +494,7 @@ int radeonTransformALU( { switch(inst->U.I.Opcode) { case RC_OPCODE_ABS: transform_ABS(c, inst); return 1; + case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1; case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; case RC_OPCODE_DST: transform_DST(c, inst); return 1; case RC_OPCODE_FLR: transform_FLR(c, inst); return 1; @@ -546,6 +567,7 @@ int r300_transform_vertex_alu( { switch(inst->U.I.Opcode) { case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1; + case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1; case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1; case RC_OPCODE_DP3: transform_DP3(c, inst); return 1; case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; -- cgit v1.2.3