From f855e16afa086edfc2f03360a27f66c955a6c208 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 5 Jun 2010 05:07:41 +0200 Subject: r300/compiler: implement SIN+COS+SCS for vertex shaders --- src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 5 +- .../drivers/dri/r300/compiler/radeon_program_alu.c | 87 +++++++++++++++++----- .../drivers/dri/r300/compiler/radeon_program_alu.h | 5 ++ 3 files changed, 76 insertions(+), 21 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index dd307856a7..507b2e532f 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -360,6 +360,7 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi switch (vpi->Opcode) { case RC_OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break; case RC_OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break; + case RC_OPCODE_COS: ei_math1(compiler->code, ME_COS, vpi, inst); break; case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break; case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break; case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break; @@ -378,6 +379,7 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi case RC_OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break; case RC_OPCODE_SEQ: ei_vector2(compiler->code, VE_SET_EQUAL, vpi, inst); break; case RC_OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break; + case RC_OPCODE_SIN: ei_math1(compiler->code, ME_SIN, vpi, inst); break; case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break; case RC_OPCODE_SNE: ei_vector2(compiler->code, VE_SET_NOT_EQUAL, vpi, inst); break; default: @@ -605,8 +607,9 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) { struct radeon_program_transformation transformations[] = { { &r300_transform_vertex_alu, 0 }, + { &r300_transform_trig_scale_vertex, 0 } }; - radeonLocalTransform(&compiler->Base, 1, transformations); + radeonLocalTransform(&compiler->Base, 2, transformations); } debug_program_log(compiler, "after native rewrite"); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c index d14de79565..c922d3d9a4 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c @@ -848,6 +848,34 @@ int radeonTransformTrigSimple(struct radeon_compiler* c, return 1; } +static void r300_transform_SIN_COS_SCS(struct radeon_compiler *c, + struct rc_instruction *inst, + unsigned srctmp) +{ + if (inst->U.I.Opcode == RC_OPCODE_COS) { + emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, inst->U.I.DstReg, + srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW)); + } else if (inst->U.I.Opcode == RC_OPCODE_SIN) { + emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, + inst->U.I.DstReg, srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW)); + } else if (inst->U.I.Opcode == RC_OPCODE_SCS) { + struct rc_dst_register moddst = inst->U.I.DstReg; + + if (inst->U.I.DstReg.WriteMask & RC_MASK_X) { + moddst.WriteMask = RC_MASK_X; + emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, moddst, + srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW)); + } + if (inst->U.I.DstReg.WriteMask & RC_MASK_Y) { + moddst.WriteMask = RC_MASK_Y; + emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, moddst, + srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW)); + } + } + + rc_remove_instruction(inst); +} + /** * Transform the trigonometric functions COS, SIN, and SCS @@ -880,29 +908,48 @@ int radeonTransformTrigScale(struct radeon_compiler* c, emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W), srcreg(RC_FILE_TEMPORARY, temp)); - if (inst->U.I.Opcode == RC_OPCODE_COS) { - emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, inst->U.I.DstReg, - srcregswz(RC_FILE_TEMPORARY, temp, RC_SWIZZLE_WWWW)); - } else if (inst->U.I.Opcode == RC_OPCODE_SIN) { - emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, - inst->U.I.DstReg, srcregswz(RC_FILE_TEMPORARY, temp, RC_SWIZZLE_WWWW)); - } else if (inst->U.I.Opcode == RC_OPCODE_SCS) { - struct rc_dst_register moddst = inst->U.I.DstReg; + r300_transform_SIN_COS_SCS(c, inst, temp); + return 1; +} - if (inst->U.I.DstReg.WriteMask & RC_MASK_X) { - moddst.WriteMask = RC_MASK_X; - emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, moddst, - srcregswz(RC_FILE_TEMPORARY, temp, RC_SWIZZLE_WWWW)); - } - if (inst->U.I.DstReg.WriteMask & RC_MASK_Y) { - moddst.WriteMask = RC_MASK_Y; - emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, moddst, - srcregswz(RC_FILE_TEMPORARY, temp, RC_SWIZZLE_WWWW)); - } - } +/** + * Transform the trigonometric functions COS, SIN, and SCS + * so that the input to COS and SIN is always in the range [-PI, PI]. + * SCS is replaced by one COS and one SIN instruction. + */ +int r300_transform_trig_scale_vertex(struct radeon_compiler *c, + struct rc_instruction *inst, + void *unused) +{ + if (inst->U.I.Opcode != RC_OPCODE_COS && + inst->U.I.Opcode != RC_OPCODE_SIN && + inst->U.I.Opcode != RC_OPCODE_SCS) + return 0; - rc_remove_instruction(inst); + /* Repeat x in the range [-PI, PI]: + * + * repeat(x) = frac(x / 2PI + 0.5) * 2PI - PI + */ + + static const float cons[4] = {0.15915494309189535, 0.5, 6.28318530717959, -3.14159265358979}; + unsigned int temp; + unsigned int constant; + + temp = rc_find_free_temporary(c); + constant = rc_constants_add_immediate_vec4(&c->Program.Constants, cons); + + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W), + swizzle_xxxx(inst->U.I.SrcReg[0]), + srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_XXXX), + srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_YYYY)); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W), + srcreg(RC_FILE_TEMPORARY, temp)); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W), + srcreg(RC_FILE_TEMPORARY, temp), + srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_ZZZZ), + srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_WWWW)); + r300_transform_SIN_COS_SCS(c, inst, temp); return 1; } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h index 7cb5f84b7f..77d444476f 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h @@ -50,6 +50,11 @@ int radeonTransformTrigScale( struct rc_instruction * inst, void*); +int r300_transform_trig_scale_vertex( + struct radeon_compiler *c, + struct rc_instruction *inst, + void*); + int radeonTransformDeriv( struct radeon_compiler * c, struct rc_instruction * inst, -- cgit v1.2.3