From 7904c9fad4c2cb2a4153258a9e86e530a0330a78 Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Sun, 6 Jul 2008 16:58:51 +0200 Subject: r500_fragprog: Transform trigonometric functions in first pass --- src/mesa/drivers/dri/r300/r500_fragprog.c | 7 +- src/mesa/drivers/dri/r300/r500_fragprog_emit.c | 91 ++------------------------ src/mesa/drivers/dri/r300/radeon_program_alu.c | 52 +++++++++++++++ src/mesa/drivers/dri/r300/radeon_program_alu.h | 5 ++ 4 files changed, 65 insertions(+), 90 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 1cdb065354..9bb92d3ba4 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -318,12 +318,13 @@ void r500TranslateFragmentShader(r300ContextPtr r300, insert_WPOS_trailer(&compiler); - struct radeon_program_transformation transformations[2] = { + struct radeon_program_transformation transformations[3] = { { &transform_TEX, &compiler }, - { &radeonTransformALU, 0 } + { &radeonTransformALU, 0 }, + { &radeonTransformTrigScale, 0 } }; radeonLocalTransform(r300->radeon.glCtx, compiler.program, - 2, transformations); + 3, transformations); if (RADEON_DEBUG & DEBUG_PIXEL) { _mesa_printf("Compiler: after all transformations:\n"); diff --git a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/r500_fragprog_emit.c index 8c900941c4..4f65803953 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog_emit.c @@ -156,17 +156,6 @@ struct r500_pfs_compile_state { #define R500_WRITEMASK_AB 0xC #define R500_WRITEMASK_ARGB 0xF -/* 1/(2pi), needed for quick modulus in trig insts - * Thanks to glisse for pointing out how to do it! */ -static const GLfloat RCP_2PI[] = {0.15915494309189535, - 0.15915494309189535, - 0.15915494309189535, - 0.15915494309189535}; - -static const GLfloat LIT[] = {127.999999, - 127.999999, - 127.999999, - -127.999999}; static const struct prog_dst_register dstreg_template = { .File = PROGRAM_TEMPORARY, @@ -476,12 +465,6 @@ static int emit_alu(struct r500_pfs_compile_state *cs, GLuint rgbop, GLuint alph return _helper_emit_alu(cs, rgbop, alphaop, dst.File, dst.Index, dst.WriteMask); } -static int emit_alu_temp(struct r500_pfs_compile_state *cs, GLuint rgbop, GLuint alphaop, int dst, int writemask) -{ - return _helper_emit_alu(cs, rgbop, alphaop, - PROGRAM_TEMPORARY, dst - cs->compiler->code->temp_reg_offset, writemask); -} - /** * Set an instruction's source 0 (both RGB and ALPHA) to the given hardware index. */ @@ -612,56 +595,6 @@ static int emit_sop(struct r500_pfs_compile_state *cs, } -/** - * Emit trigonometric function COS, SIN, SCS - */ -static void emit_trig(struct r500_pfs_compile_state *cs, struct prog_instruction *fpi) -{ - int ip; - struct prog_dst_register temp = dstreg_template; - temp.Index = get_temp(cs, 0); - temp.WriteMask = WRITEMASK_W; - - struct prog_src_register srcreg; - GLuint constant_swizzle; - - srcreg.File = PROGRAM_CONSTANT; - srcreg.Index = _mesa_add_unnamed_constant(cs->compiler->program->Parameters, - RCP_2PI, 4, &constant_swizzle); - srcreg.Swizzle = constant_swizzle; - - /* temp = Input*(1/2pi) */ - ip = emit_alu(cs, R500_ALU_RGBA_OP_MAD, R500_ALPHA_OP_MAD, temp); - set_src0(cs, ip, fpi->SrcReg[0]); - set_src1(cs, ip, srcreg); - set_argA(cs, ip, 0, R500_SWIZ_RGB_ZERO, make_sop_swizzle(fpi->SrcReg[0])); - set_argB(cs, ip, 1, R500_SWIZ_RGB_ZERO, make_alpha_swizzle(srcreg)); - set_argC(cs, ip, 0, R500_SWIZ_RGB_ZERO, R500_SWIZZLE_ZERO); - - /* temp = frac(dst) */ - ip = emit_alu(cs, R500_ALU_RGBA_OP_FRC, R500_ALPHA_OP_FRC, temp); - set_src0_direct(cs, ip, temp.Index); - set_argA(cs, ip, 0, R500_SWIZ_RGB_RGB, SWIZZLE_W); - - /* Dest = trig(temp) */ - if (fpi->Opcode == OPCODE_COS) { - emit_sop(cs, R500_ALPHA_OP_COS, fpi->DstReg, temp.Index, SWIZZLE_W); - } else if (fpi->Opcode == OPCODE_SIN) { - emit_sop(cs, R500_ALPHA_OP_SIN, fpi->DstReg, temp.Index, SWIZZLE_W); - } else if (fpi->Opcode == OPCODE_SCS) { - struct prog_dst_register moddst = fpi->DstReg; - - if (fpi->DstReg.WriteMask & WRITEMASK_X) { - moddst.WriteMask = WRITEMASK_X; - emit_sop(cs, R500_ALPHA_OP_COS, fpi->DstReg, temp.Index, SWIZZLE_W); - } - if (fpi->DstReg.WriteMask & WRITEMASK_Y) { - moddst.WriteMask = WRITEMASK_Y; - emit_sop(cs, R500_ALPHA_OP_SIN, fpi->DstReg, temp.Index, SWIZZLE_W); - } - } -} - static void do_inst(struct r500_pfs_compile_state *cs, struct prog_instruction *fpi) { PROG_CODE; GLuint src[3], dest = 0; @@ -693,7 +626,8 @@ static void do_inst(struct r500_pfs_compile_state *cs, struct prog_instruction * set_argC_reg(cs, ip, 0, fpi->SrcReg[0]); break; case OPCODE_COS: - emit_trig(cs, fpi); + src[0] = make_src(cs, fpi->SrcReg[0]); + emit_sop(cs, R500_ALPHA_OP_COS, fpi->DstReg, src[0], make_sop_swizzle(fpi->SrcReg[0])); break; case OPCODE_DP3: ip = emit_alu(cs, R500_ALU_RGBA_OP_DP3, R500_ALPHA_OP_DP, fpi->DstReg); @@ -713,21 +647,6 @@ static void do_inst(struct r500_pfs_compile_state *cs, struct prog_instruction * src[0] = make_src(cs, fpi->SrcReg[0]); emit_sop(cs, R500_ALPHA_OP_EX2, fpi->DstReg, src[0], make_sop_swizzle(fpi->SrcReg[0])); break; - case OPCODE_FLR: - dest = get_temp(cs, 0); - ip = emit_alu_temp(cs, R500_ALU_RGBA_OP_FRC, R500_ALPHA_OP_FRC, dest, WRITEMASK_XYZW); - set_src0(cs, ip, fpi->SrcReg[0]); - set_argA_reg(cs, ip, 0, fpi->SrcReg[0]); - - ip = emit_alu(cs, R500_ALU_RGBA_OP_MAD, R500_ALPHA_OP_MAD, fpi->DstReg); - set_src0(cs, ip, fpi->SrcReg[0]); - set_src1_direct(cs, ip, dest); - set_argA_reg(cs, ip, 0, fpi->SrcReg[1]); - set_argB(cs, ip, 0, R500_SWIZ_RGB_ONE, R500_SWIZZLE_ONE); - set_argC(cs, ip, 1, - R500_SWIZ_RGB_RGB|(R500_SWIZ_MOD_NEG<<9), - SWIZZLE_W|(R500_SWIZ_MOD_NEG<<3)); - break; case OPCODE_FRC: ip = emit_alu(cs, R500_ALU_RGBA_OP_FRC, R500_ALPHA_OP_FRC, fpi->DstReg); set_src0(cs, ip, fpi->SrcReg[0]); @@ -787,11 +706,9 @@ static void do_inst(struct r500_pfs_compile_state *cs, struct prog_instruction * emit_sop(cs, R500_ALPHA_OP_RSQ, fpi->DstReg, src[0], (make_sop_swizzle(fpi->SrcReg[0]) | (R500_SWIZ_MOD_ABS<<3)) & ~(R500_SWIZ_MOD_NEG<<3)); break; - case OPCODE_SCS: - emit_trig(cs, fpi); - break; case OPCODE_SIN: - emit_trig(cs, fpi); + src[0] = make_src(cs, fpi->SrcReg[0]); + emit_sop(cs, R500_ALPHA_OP_SIN, fpi->DstReg, src[0], make_sop_swizzle(fpi->SrcReg[0])); break; case OPCODE_KIL: case OPCODE_TEX: diff --git a/src/mesa/drivers/dri/r300/radeon_program_alu.c b/src/mesa/drivers/dri/r300/radeon_program_alu.c index fa6a67f0c1..8daa94c726 100644 --- a/src/mesa/drivers/dri/r300/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/radeon_program_alu.c @@ -556,3 +556,55 @@ GLboolean radeonTransformTrigSimple(struct radeon_transform_context* t, return GL_TRUE; } + + +/** + * Transform the trigonometric functions COS, SIN, and SCS + * to include pre-scaling by 1/(2*PI) and taking the fractional + * part, so that the input to COS and SIN is always in the range [0,1). + * SCS is replaced by one COS and one SIN instruction. + * + * @warning This transformation implicitly changes the semantics of SIN and COS! + */ +GLboolean radeonTransformTrigScale(struct radeon_transform_context* t, + struct prog_instruction* inst, + void* unused) +{ + if (inst->Opcode != OPCODE_COS && + inst->Opcode != OPCODE_SIN && + inst->Opcode != OPCODE_SCS) + return GL_FALSE; + + static const GLfloat RCP_2PI[] = { 0.15915494309189535 }; + GLuint temp; + GLuint constant; + GLuint constant_swizzle; + + temp = radeonFindFreeTemporary(t); + constant = _mesa_add_unnamed_constant(t->Program->Parameters, RCP_2PI, 1, &constant_swizzle); + + emit2(t->Program, OPCODE_MUL, dstregtmpmask(temp, WRITEMASK_W), + swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle)); + emit1(t->Program, OPCODE_FRC, dstregtmpmask(temp, WRITEMASK_W), + srcreg(PROGRAM_TEMPORARY, temp)); + + if (inst->Opcode == OPCODE_COS) { + emit1(t->Program, OPCODE_COS, inst->DstReg, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); + } else if (inst->Opcode == OPCODE_SIN) { + emit1(t->Program, OPCODE_SIN, inst->DstReg, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); + } else if (inst->Opcode == OPCODE_SCS) { + struct prog_dst_register moddst = inst->DstReg; + + if (inst->DstReg.WriteMask & WRITEMASK_X) { + moddst.WriteMask = WRITEMASK_X; + emit1(t->Program, OPCODE_COS, moddst, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); + } + if (inst->DstReg.WriteMask & WRITEMASK_Y) { + moddst.WriteMask = WRITEMASK_Y; + emit1(t->Program, OPCODE_SIN, moddst, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); + } + } + + return GL_TRUE; +} diff --git a/src/mesa/drivers/dri/r300/radeon_program_alu.h b/src/mesa/drivers/dri/r300/radeon_program_alu.h index 3fe6153fd8..ea9d5bb669 100644 --- a/src/mesa/drivers/dri/r300/radeon_program_alu.h +++ b/src/mesa/drivers/dri/r300/radeon_program_alu.h @@ -40,4 +40,9 @@ GLboolean radeonTransformTrigSimple( struct prog_instruction*, void*); +GLboolean radeonTransformTrigScale( + struct radeon_transform_context *t, + struct prog_instruction*, + void*); + #endif /* __RADEON_PROGRAM_ALU_H_ */ -- cgit v1.2.3