/* * Copyright 2008 Corbin Simpson * Joakim Sindholt * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * on the rights to use, copy, modify, merge, publish, distribute, sub * license, and/or sell copies of the Software, and to permit persons to whom * the Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "r5xx_fs.h" static INLINE unsigned r5xx_fix_swiz(unsigned s) { /* For historical reasons, the swizzle values x, y, z, w, and 0 are * equivalent to the actual machine code, but 1 is not. Thus, we just * adjust it a bit... */ if (s == TGSI_EXTSWIZZLE_ONE) { return R500_SWIZZLE_ONE; } else { return s; } } static uint32_t r5xx_rgba_swiz(struct tgsi_full_src_register* reg) { if (reg->SrcRegister.Extended) { return r5xx_fix_swiz(reg->SrcRegisterExtSwz.ExtSwizzleX) | (r5xx_fix_swiz(reg->SrcRegisterExtSwz.ExtSwizzleY) << 3) | (r5xx_fix_swiz(reg->SrcRegisterExtSwz.ExtSwizzleZ) << 6) | (r5xx_fix_swiz(reg->SrcRegisterExtSwz.ExtSwizzleW) << 9); } else { return reg->SrcRegister.SwizzleX | (reg->SrcRegister.SwizzleY << 3) | (reg->SrcRegister.SwizzleZ << 6) | (reg->SrcRegister.SwizzleW << 9); } } static uint32_t r5xx_strq_swiz(struct tgsi_full_src_register* reg) { return reg->SrcRegister.SwizzleX | (reg->SrcRegister.SwizzleY << 2) | (reg->SrcRegister.SwizzleZ << 4) | (reg->SrcRegister.SwizzleW << 6); } static INLINE uint32_t r5xx_rgb_swiz(struct tgsi_full_src_register* reg) { /* Only the first 9 bits... */ return (r5xx_rgba_swiz(reg) & 0x1ff) | (reg->SrcRegister.Negate ? (1 << 9) : 0) | (reg->SrcRegisterExtMod.Absolute ? (1 << 10) : 0); } static INLINE uint32_t r5xx_alpha_swiz(struct tgsi_full_src_register* reg) { /* Only the last 3 bits... */ return (r5xx_rgba_swiz(reg) >> 9) | (reg->SrcRegister.Negate ? (1 << 9) : 0) | (reg->SrcRegisterExtMod.Absolute ? (1 << 10) : 0); } static INLINE uint32_t r5xx_rgba_op(unsigned op) { switch (op) { case TGSI_OPCODE_COS: case TGSI_OPCODE_EX2: case TGSI_OPCODE_LG2: case TGSI_OPCODE_RCP: case TGSI_OPCODE_RSQ: case TGSI_OPCODE_SIN: return R500_ALU_RGBA_OP_SOP; case TGSI_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH; case TGSI_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV; case TGSI_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC; case TGSI_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3; case TGSI_OPCODE_DP4: case TGSI_OPCODE_DPH: return R500_ALU_RGBA_OP_DP4; case TGSI_OPCODE_ABS: case TGSI_OPCODE_CMP: case TGSI_OPCODE_MOV: case TGSI_OPCODE_SWZ: return R500_ALU_RGBA_OP_CMP; case TGSI_OPCODE_ADD: case TGSI_OPCODE_MAD: case TGSI_OPCODE_MUL: case TGSI_OPCODE_SUB: return R500_ALU_RGBA_OP_MAD; default: return 0; } } static INLINE uint32_t r5xx_alpha_op(unsigned op) { switch (op) { case TGSI_OPCODE_COS: return R500_ALPHA_OP_COS; case TGSI_OPCODE_EX2: return R500_ALPHA_OP_EX2; case TGSI_OPCODE_LG2: return R500_ALPHA_OP_LN2; case TGSI_OPCODE_RCP: return R500_ALPHA_OP_RCP; case TGSI_OPCODE_RSQ: return R500_ALPHA_OP_RSQ; case TGSI_OPCODE_FRC: return R500_ALPHA_OP_FRC; case TGSI_OPCODE_SIN: return R500_ALPHA_OP_SIN; case TGSI_OPCODE_DDX: return R500_ALPHA_OP_MDH; case TGSI_OPCODE_DDY: return R500_ALPHA_OP_MDV; case TGSI_OPCODE_DP3: case TGSI_OPCODE_DP4: case TGSI_OPCODE_DPH: return R500_ALPHA_OP_DP; case TGSI_OPCODE_ABS: case TGSI_OPCODE_CMP: case TGSI_OPCODE_MOV: case TGSI_OPCODE_SWZ: return R500_ALPHA_OP_CMP; case TGSI_OPCODE_ADD: case TGSI_OPCODE_MAD: case TGSI_OPCODE_MUL: case TGSI_OPCODE_SUB: return R500_ALPHA_OP_MAD; default: return 0; } } static INLINE uint32_t r5xx_tex_op(unsigned op) { switch (op) { case TGSI_OPCODE_KIL: return R500_TEX_INST_TEXKILL; case TGSI_OPCODE_TEX: return R500_TEX_INST_LD; case TGSI_OPCODE_TXB: return R500_TEX_INST_LODBIAS; case TGSI_OPCODE_TXP: return R500_TEX_INST_PROJ; default: return 0; } } /* Setup an ALU operation. */ static INLINE void r5xx_emit_maths(struct r5xx_fragment_shader* fs, struct r300_fs_asm* assembler, struct tgsi_full_src_register* src, struct tgsi_full_dst_register* dst, unsigned op, unsigned count) { int i = fs->instruction_count; if (dst->DstRegister.File == TGSI_FILE_OUTPUT) { fs->instructions[i].inst0 = R500_INST_TYPE_OUT; if (r300_fs_is_depr(assembler, dst)) { fs->instructions[i].inst4 = R500_W_OMASK; } else { fs->instructions[i].inst0 |= R500_ALU_OMASK(dst->DstRegister.WriteMask); } } else { fs->instructions[i].inst0 = R500_INST_TYPE_ALU | R500_ALU_WMASK(dst->DstRegister.WriteMask); } fs->instructions[i].inst0 |= R500_INST_TEX_SEM_WAIT; fs->instructions[i].inst4 |= R500_ALPHA_ADDRD(r300_fs_dst(assembler, &dst->DstRegister)); fs->instructions[i].inst5 = R500_ALU_RGBA_ADDRD(r300_fs_dst(assembler, &dst->DstRegister)); switch (count) { case 3: fs->instructions[i].inst1 = R500_RGB_ADDR2(r300_fs_src(assembler, &src[2].SrcRegister)); fs->instructions[i].inst2 = R500_ALPHA_ADDR2(r300_fs_src(assembler, &src[2].SrcRegister)); fs->instructions[i].inst5 |= R500_ALU_RGBA_SEL_C_SRC2 | R500_SWIZ_RGBA_C(r5xx_rgb_swiz(&src[2])) | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 | R500_SWIZ_ALPHA_C(r5xx_alpha_swiz(&src[2])); case 2: fs->instructions[i].inst1 |= R500_RGB_ADDR1(r300_fs_src(assembler, &src[1].SrcRegister)); fs->instructions[i].inst2 |= R500_ALPHA_ADDR1(r300_fs_src(assembler, &src[1].SrcRegister)); fs->instructions[i].inst3 = R500_ALU_RGB_SEL_B_SRC1 | R500_SWIZ_RGB_B(r5xx_rgb_swiz(&src[1])); fs->instructions[i].inst4 |= R500_ALPHA_SEL_B_SRC1 | R500_SWIZ_ALPHA_B(r5xx_alpha_swiz(&src[1])); case 1: case 0: default: fs->instructions[i].inst1 |= R500_RGB_ADDR0(r300_fs_src(assembler, &src[0].SrcRegister)); fs->instructions[i].inst2 |= R500_ALPHA_ADDR0(r300_fs_src(assembler, &src[0].SrcRegister)); fs->instructions[i].inst3 |= R500_ALU_RGB_SEL_A_SRC0 | R500_SWIZ_RGB_A(r5xx_rgb_swiz(&src[0])); fs->instructions[i].inst4 |= R500_ALPHA_SEL_A_SRC0 | R500_SWIZ_ALPHA_A(r5xx_alpha_swiz(&src[0])); break; } fs->instructions[i].inst4 |= r5xx_alpha_op(op); fs->instructions[i].inst5 |= r5xx_rgba_op(op); fs->instruction_count++; } static INLINE void r5xx_emit_tex(struct r5xx_fragment_shader* fs, struct r300_fs_asm* assembler, struct tgsi_full_src_register* src, struct tgsi_full_dst_register* dst, uint32_t op) { int i = fs->instruction_count; fs->instructions[i].inst0 = R500_INST_TYPE_TEX | R500_TEX_WMASK(dst->DstRegister.WriteMask) | R500_INST_TEX_SEM_WAIT; fs->instructions[i].inst1 = R500_TEX_ID(0) | R500_TEX_SEM_ACQUIRE | //R500_TEX_IGNORE_UNCOVERED | r5xx_tex_op(op); fs->instructions[i].inst2 = R500_TEX_SRC_ADDR(r300_fs_src(assembler, &src->SrcRegister)) | R500_SWIZ_TEX_STRQ(r5xx_strq_swiz(src)) | R500_TEX_DST_ADDR(r300_fs_dst(assembler, &dst->DstRegister)) | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A; if (dst->DstRegister.File == TGSI_FILE_OUTPUT) { fs->instructions[i].inst2 |= R500_TEX_DST_ADDR(assembler->temp_count + assembler->temp_offset); fs->instruction_count++; /* Setup and emit a MOV. */ src[0].SrcRegister.Index = assembler->temp_count; src[0].SrcRegister.File = TGSI_FILE_TEMPORARY; src[1] = src[0]; src[2] = r300_constant_zero; r5xx_emit_maths(fs, assembler, src, dst, TGSI_OPCODE_MOV, 3); } else { fs->instruction_count++; } } void r5xx_fs_finalize(struct r5xx_fragment_shader* fs, struct r300_fs_asm* assembler) { /* XXX should this just go with OPCODE_END? */ fs->instructions[fs->instruction_count - 1].inst0 |= R500_INST_LAST; } void r5xx_fs_instruction(struct r5xx_fragment_shader* fs, struct r300_fs_asm* assembler, struct tgsi_full_instruction* inst) { /* Switch between opcodes. When possible, prefer using the official * AMD/ATI names for opcodes, please, as it facilitates using the * documentation. */ switch (inst->Instruction.Opcode) { /* XXX trig needs extra prep */ case TGSI_OPCODE_COS: case TGSI_OPCODE_SIN: /* The simple scalar ops. */ case TGSI_OPCODE_EX2: case TGSI_OPCODE_LG2: case TGSI_OPCODE_RCP: case TGSI_OPCODE_RSQ: /* Copy red swizzle to alpha for src0 */ inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleW = inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleX; inst->FullSrcRegisters[0].SrcRegister.SwizzleW = inst->FullSrcRegisters[0].SrcRegister.SwizzleX; /* Fall through */ case TGSI_OPCODE_DDX: case TGSI_OPCODE_DDY: case TGSI_OPCODE_FRC: r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, &inst->FullDstRegisters[0], inst->Instruction.Opcode, 1); break; /* The dot products. */ case TGSI_OPCODE_DPH: /* Set alpha swizzle to one for src0 */ if (!inst->FullSrcRegisters[0].SrcRegister.Extended) { inst->FullSrcRegisters[0].SrcRegister.Extended = TRUE; inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleX = inst->FullSrcRegisters[0].SrcRegister.SwizzleX; inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleY = inst->FullSrcRegisters[0].SrcRegister.SwizzleY; inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleZ = inst->FullSrcRegisters[0].SrcRegister.SwizzleZ; } inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleW = TGSI_EXTSWIZZLE_ONE; /* Fall through */ case TGSI_OPCODE_DP3: case TGSI_OPCODE_DP4: r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, &inst->FullDstRegisters[0], inst->Instruction.Opcode, 2); break; /* Simple three-source operations. */ case TGSI_OPCODE_CMP: /* Swap src0 and src2 */ inst->FullSrcRegisters[3] = inst->FullSrcRegisters[2]; inst->FullSrcRegisters[2] = inst->FullSrcRegisters[0]; inst->FullSrcRegisters[0] = inst->FullSrcRegisters[3]; r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, &inst->FullDstRegisters[0], inst->Instruction.Opcode, 3); break; /* The MAD variants. */ case TGSI_OPCODE_SUB: /* Just like ADD, but flip the negation on src1 first */ inst->FullSrcRegisters[1].SrcRegister.Negate = !inst->FullSrcRegisters[1].SrcRegister.Negate; /* Fall through */ case TGSI_OPCODE_ADD: /* Force src0 to one, move all registers over */ inst->FullSrcRegisters[2] = inst->FullSrcRegisters[1]; inst->FullSrcRegisters[1] = inst->FullSrcRegisters[0]; inst->FullSrcRegisters[0] = r300_constant_one; r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, &inst->FullDstRegisters[0], inst->Instruction.Opcode, 3); break; case TGSI_OPCODE_MUL: /* Force our src2 to zero */ inst->FullSrcRegisters[2] = r300_constant_zero; r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, &inst->FullDstRegisters[0], inst->Instruction.Opcode, 3); break; case TGSI_OPCODE_MAD: r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, &inst->FullDstRegisters[0], inst->Instruction.Opcode, 3); break; /* The MOV variants. */ case TGSI_OPCODE_ABS: /* Set absolute value modifiers. */ inst->FullSrcRegisters[0].SrcRegisterExtMod.Absolute = TRUE; /* Fall through */ case TGSI_OPCODE_MOV: case TGSI_OPCODE_SWZ: /* src0 -> src1 and src2 forced to zero */ inst->FullSrcRegisters[1] = inst->FullSrcRegisters[0]; inst->FullSrcRegisters[2] = r300_constant_zero; r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, &inst->FullDstRegisters[0], inst->Instruction.Opcode, 3); break; /* The compound and hybrid insts. */ case TGSI_OPCODE_LRP: /* LRP DST A, B, C -> MAD TMP -A, C, C; MAD DST A, B, TMP */ inst->FullSrcRegisters[3] = inst->FullSrcRegisters[1]; inst->FullSrcRegisters[1] = inst->FullSrcRegisters[2]; inst->FullSrcRegisters[0].SrcRegister.Negate = !(inst->FullSrcRegisters[0].SrcRegister.Negate); inst->FullDstRegisters[1] = inst->FullDstRegisters[0]; inst->FullDstRegisters[0].DstRegister.Index = assembler->temp_count; inst->FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, &inst->FullDstRegisters[0], TGSI_OPCODE_MAD, 3); inst->FullSrcRegisters[2].SrcRegister.Index = assembler->temp_count; inst->FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY; inst->FullSrcRegisters[2].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; inst->FullSrcRegisters[2].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; inst->FullSrcRegisters[2].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Z; inst->FullSrcRegisters[2].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; inst->FullSrcRegisters[1] = inst->FullSrcRegisters[3]; inst->FullSrcRegisters[0].SrcRegister.Negate = !(inst->FullSrcRegisters[0].SrcRegister.Negate); inst->FullDstRegisters[0] = inst->FullDstRegisters[1]; r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, &inst->FullDstRegisters[0], TGSI_OPCODE_MAD, 3); break; case TGSI_OPCODE_POW: /* POW DST A, B -> LG2 TMP A; MUL TMP TMP, B; EX2 DST TMP */ inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleW = inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleX; inst->FullSrcRegisters[0].SrcRegister.SwizzleW = inst->FullSrcRegisters[0].SrcRegister.SwizzleX; inst->FullDstRegisters[1] = inst->FullDstRegisters[0]; inst->FullDstRegisters[0].DstRegister.Index = assembler->temp_count; inst->FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, &inst->FullDstRegisters[0], TGSI_OPCODE_LG2, 1); inst->FullSrcRegisters[0].SrcRegister.Index = assembler->temp_count; inst->FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; inst->FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; inst->FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; inst->FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Z; inst->FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; inst->FullSrcRegisters[2] = r300_constant_zero; r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, &inst->FullDstRegisters[0], TGSI_OPCODE_MUL, 3); inst->FullDstRegisters[0] = inst->FullDstRegisters[1]; r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, &inst->FullDstRegisters[0], TGSI_OPCODE_EX2, 1); break; /* The texture instruction set. */ case TGSI_OPCODE_KIL: case TGSI_OPCODE_TEX: case TGSI_OPCODE_TXB: case TGSI_OPCODE_TXP: r5xx_emit_tex(fs, assembler, &inst->FullSrcRegisters[0], &inst->FullDstRegisters[0], inst->Instruction.Opcode); break; /* This is the end. My only friend, the end. */ case TGSI_OPCODE_END: break; default: debug_printf("r300: fs: Bad opcode %d\n", inst->Instruction.Opcode); break; } /* Clamp, if saturation flags are set. */ if (inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE) { fs->instructions[fs->instruction_count - 1].inst0 |= R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP; } }