diff options
Diffstat (limited to 'src/mesa/drivers')
-rw-r--r-- | src/mesa/drivers/dri/r600/Makefile | 4 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/r600_context.c | 8 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/radeon_nqssadce.c | 284 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/radeon_nqssadce.h | 96 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/radeon_program.c | 128 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/radeon_program.h | 99 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/radeon_program_alu.c | 658 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/radeon_program_alu.h | 53 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/radeon_program_pair.c | 1006 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/radeon_program_pair.h | 126 |
10 files changed, 3 insertions, 2459 deletions
diff --git a/src/mesa/drivers/dri/r600/Makefile b/src/mesa/drivers/dri/r600/Makefile index 2ad9d238df..667eb1f919 100644 --- a/src/mesa/drivers/dri/r600/Makefile +++ b/src/mesa/drivers/dri/r600/Makefile @@ -38,10 +38,6 @@ DRIVER_SOURCES = \ radeon_screen.c \ r600_context.c \ r600_cmdbuf.c \ - radeon_program.c \ - radeon_program_alu.c \ - radeon_program_pair.c \ - radeon_nqssadce.c \ r600_emit.c \ r700_assembler.c \ r700_fragprog.c \ diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c index a82dc7f36b..01440705c3 100644 --- a/src/mesa/drivers/dri/r600/r600_context.c +++ b/src/mesa/drivers/dri/r600/r600_context.c @@ -279,10 +279,8 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, ctx->Const.MaxTextureMaxAnisotropy = 16.0; ctx->Const.MaxTextureLodBias = 16.0; - if (screen->chip_family >= CHIP_FAMILY_RV515) { - ctx->Const.MaxTextureLevels = 13; - ctx->Const.MaxTextureRectSize = 4096; - } + ctx->Const.MaxTextureLevels = 13; + ctx->Const.MaxTextureRectSize = 4096; ctx->Const.MinPointSize = 0x0001 / 8.0; ctx->Const.MinPointSizeAA = 0x0001 / 8.0; @@ -377,7 +375,7 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, r600InitCmdBuf(r600); - (r600->chipobj.InitState)(r600->radeon.glCtx); + (r600->chipobj.InitState)(r600->radeon.glCtx); TNL_CONTEXT(ctx)->Driver.RunPipeline = r600RunPipeline; diff --git a/src/mesa/drivers/dri/r600/radeon_nqssadce.c b/src/mesa/drivers/dri/r600/radeon_nqssadce.c deleted file mode 100644 index a083c3d243..0000000000 --- a/src/mesa/drivers/dri/r600/radeon_nqssadce.c +++ /dev/null @@ -1,284 +0,0 @@ -/* - * Copyright (C) 2008 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -/** - * @file - * - * "Not-quite SSA" and Dead-Code Elimination. - * - * @note This code uses SWIZZLE_NIL in a source register to indicate that - * the corresponding component is ignored by the corresponding instruction. - */ - -#include "radeon_nqssadce.h" - - -/** - * Return the @ref register_state for the given register (or 0 for untracked - * registers, i.e. constants). - */ -static struct register_state *get_reg_state(struct nqssadce_state* s, GLuint file, GLuint index) -{ - switch(file) { - case PROGRAM_TEMPORARY: return &s->Temps[index]; - case PROGRAM_OUTPUT: return &s->Outputs[index]; - default: return 0; - } -} - - -/** - * Left multiplication of a register with a swizzle - * - * @note Works correctly only for X, Y, Z, W swizzles, not for constant swizzles. - */ -static struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg) -{ - struct prog_src_register tmp = srcreg; - int i; - tmp.Swizzle = 0; - tmp.NegateBase = 0; - for(i = 0; i < 4; ++i) { - GLuint swz = GET_SWZ(swizzle, i); - if (swz < 4) { - tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3); - tmp.NegateBase |= GET_BIT(srcreg.NegateBase, swz) << i; - } else { - tmp.Swizzle |= swz << (i*3); - } - } - return tmp; -} - - -static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s, - struct prog_instruction *inst, GLint src, GLuint sourced) -{ - int i; - GLuint deswz_source = 0; - - for(i = 0; i < 4; ++i) { - if (GET_BIT(sourced, i)) { - GLuint swz = GET_SWZ(inst->SrcReg[src].Swizzle, i); - deswz_source |= 1 << swz; - } else { - inst->SrcReg[src].Swizzle &= ~(7 << (3*i)); - inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i); - } - } - - if (!s->Descr->IsNativeSwizzle(inst->Opcode, inst->SrcReg[src])) { - struct prog_dst_register dstreg = inst->DstReg; - dstreg.File = PROGRAM_TEMPORARY; - dstreg.Index = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY); - dstreg.WriteMask = sourced; - - s->Descr->BuildSwizzle(s, dstreg, inst->SrcReg[src]); - - inst = s->Program->Instructions + s->IP; - inst->SrcReg[src].File = PROGRAM_TEMPORARY; - inst->SrcReg[src].Index = dstreg.Index; - inst->SrcReg[src].Swizzle = 0; - inst->SrcReg[src].NegateBase = 0; - inst->SrcReg[src].Abs = 0; - inst->SrcReg[src].NegateAbs = 0; - for(i = 0; i < 4; ++i) { - if (GET_BIT(sourced, i)) - inst->SrcReg[src].Swizzle |= i << (3*i); - else - inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i); - } - deswz_source = sourced; - } - - struct register_state *regstate = get_reg_state(s, inst->SrcReg[src].File, inst->SrcReg[src].Index); - if (regstate) - regstate->Sourced |= deswz_source & 0xf; - - return inst; -} - - -static void rewrite_depth_out(struct prog_instruction *inst) -{ - if (inst->DstReg.WriteMask & WRITEMASK_Z) { - inst->DstReg.WriteMask = WRITEMASK_W; - } else { - inst->DstReg.WriteMask = 0; - return; - } - - switch (inst->Opcode) { - case OPCODE_FRC: - case OPCODE_MOV: - inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); - break; - case OPCODE_ADD: - case OPCODE_MAX: - case OPCODE_MIN: - case OPCODE_MUL: - inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); - inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]); - break; - case OPCODE_CMP: - case OPCODE_MAD: - inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); - inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]); - inst->SrcReg[2] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[2]); - break; - default: - // Scalar instructions needn't be reswizzled - break; - } -} - -static void unalias_srcregs(struct prog_instruction *inst, GLuint oldindex, GLuint newindex) -{ - int nsrc = _mesa_num_inst_src_regs(inst->Opcode); - int i; - for(i = 0; i < nsrc; ++i) - if (inst->SrcReg[i].File == PROGRAM_TEMPORARY && inst->SrcReg[i].Index == oldindex) - inst->SrcReg[i].Index = newindex; -} - -static void unalias_temporary(struct nqssadce_state* s, GLuint oldindex) -{ - GLuint newindex = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY); - int ip; - for(ip = 0; ip < s->IP; ++ip) { - struct prog_instruction* inst = s->Program->Instructions + ip; - if (inst->DstReg.File == PROGRAM_TEMPORARY && inst->DstReg.Index == oldindex) - inst->DstReg.Index = newindex; - unalias_srcregs(inst, oldindex, newindex); - } - unalias_srcregs(s->Program->Instructions + s->IP, oldindex, newindex); -} - - -/** - * Handle one instruction. - */ -static void process_instruction(struct nqssadce_state* s) -{ - struct prog_instruction *inst = s->Program->Instructions + s->IP; - - if (inst->Opcode == OPCODE_END) - return; - - if (inst->Opcode != OPCODE_KIL) { - if (s->Descr->RewriteDepthOut) { - if (inst->DstReg.File == PROGRAM_OUTPUT && inst->DstReg.Index == FRAG_RESULT_DEPTH) - rewrite_depth_out(inst); - } - - struct register_state *regstate = get_reg_state(s, inst->DstReg.File, inst->DstReg.Index); - if (!regstate) { - _mesa_problem(s->Ctx, "NqssaDce: bad destination register (%i[%i])\n", - inst->DstReg.File, inst->DstReg.Index); - return; - } - - inst->DstReg.WriteMask &= regstate->Sourced; - regstate->Sourced &= ~inst->DstReg.WriteMask; - - if (inst->DstReg.WriteMask == 0) { - _mesa_delete_instructions(s->Program, s->IP, 1); - return; - } - - if (inst->DstReg.File == PROGRAM_TEMPORARY && !regstate->Sourced) - unalias_temporary(s, inst->DstReg.Index); - } - - /* Attention: Due to swizzle emulation code, the following - * might change the instruction stream under us, so we have - * to be careful with the inst pointer. */ - switch (inst->Opcode) { - case OPCODE_DDX: - case OPCODE_DDY: - case OPCODE_FRC: - case OPCODE_MOV: - inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask); - break; - case OPCODE_ADD: - case OPCODE_MAX: - case OPCODE_MIN: - case OPCODE_MUL: - inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask); - inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask); - break; - case OPCODE_CMP: - case OPCODE_MAD: - inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask); - inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask); - inst = track_used_srcreg(s, inst, 2, inst->DstReg.WriteMask); - break; - case OPCODE_COS: - case OPCODE_EX2: - case OPCODE_LG2: - case OPCODE_RCP: - case OPCODE_RSQ: - case OPCODE_SIN: - inst = track_used_srcreg(s, inst, 0, 0x1); - break; - case OPCODE_DP3: - inst = track_used_srcreg(s, inst, 0, 0x7); - inst = track_used_srcreg(s, inst, 1, 0x7); - break; - case OPCODE_DP4: - inst = track_used_srcreg(s, inst, 0, 0xf); - inst = track_used_srcreg(s, inst, 1, 0xf); - break; - case OPCODE_KIL: - case OPCODE_TEX: - case OPCODE_TXB: - case OPCODE_TXP: - inst = track_used_srcreg(s, inst, 0, 0xf); - break; - default: - _mesa_problem(s->Ctx, "NqssaDce: Unknown opcode %d\n", inst->Opcode); - return; - } -} - - -void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr) -{ - struct nqssadce_state s; - - _mesa_bzero(&s, sizeof(s)); - s.Ctx = ctx; - s.Program = p; - s.Descr = descr; - s.Descr->Init(&s); - s.IP = p->NumInstructions; - - while(s.IP > 0) { - s.IP--; - process_instruction(&s); - } -} diff --git a/src/mesa/drivers/dri/r600/radeon_nqssadce.h b/src/mesa/drivers/dri/r600/radeon_nqssadce.h deleted file mode 100644 index a4f94abcb6..0000000000 --- a/src/mesa/drivers/dri/r600/radeon_nqssadce.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (C) 2008 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef __RADEON_PROGRAM_NQSSADCE_H_ -#define __RADEON_PROGRAM_NQSSADCE_H_ - -#include "radeon_program.h" - - -struct register_state { - /** - * Bitmask indicating which components of the register are sourced - * by later instructions. - */ - GLuint Sourced : 4; -}; - -/** - * Maintain state such as which registers are used, which registers are - * read from, etc. - */ -struct nqssadce_state { - GLcontext *Ctx; - struct gl_program *Program; - struct radeon_nqssadce_descr *Descr; - - /** - * All instructions after this instruction pointer have been dealt with. - */ - int IP; - - /** - * Which registers are read by subsequent instructions? - */ - struct register_state Temps[MAX_PROGRAM_TEMPS]; - struct register_state Outputs[VERT_RESULT_MAX]; -}; - - -/** - * This structure contains a description of the hardware in-so-far as - * it is required for the NqSSA-DCE pass. - */ -struct radeon_nqssadce_descr { - /** - * Fill in which outputs - */ - void (*Init)(struct nqssadce_state *); - - /** - * Check whether the given swizzle, absolute and negate combination - * can be implemented natively by the hardware for this opcode. - */ - GLboolean (*IsNativeSwizzle)(GLuint opcode, struct prog_src_register reg); - - /** - * Emit (at the current IP) the instruction MOV dst, src; - * The transformation will work recursively on the emitted instruction(s). - */ - void (*BuildSwizzle)(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src); - - /** - * Rewrite instructions that write to DEPR.z to write to DEPR.w - * instead (rewriting is done *before* the WriteMask test). - */ - GLboolean RewriteDepthOut; - void *Data; -}; - -void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr); - -#endif /* __RADEON_PROGRAM_NQSSADCE_H_ */ diff --git a/src/mesa/drivers/dri/r600/radeon_program.c b/src/mesa/drivers/dri/r600/radeon_program.c deleted file mode 100644 index da5e7aefce..0000000000 --- a/src/mesa/drivers/dri/r600/radeon_program.c +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright (C) 2008 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include "radeon_program.h" - -#include "shader/prog_print.h" - - -/** - * Transform the given clause in the following way: - * 1. Replace it with an empty clause - * 2. For every instruction in the original clause, try the given - * transformations in order. - * 3. If one of the transformations returns GL_TRUE, assume that it - * has emitted the appropriate instruction(s) into the new clause; - * otherwise, copy the instruction verbatim. - * - * \note The transformation is currently not recursive; in other words, - * instructions emitted by transformations are not transformed. - * - * \note The transform is called 'local' because it can only look at - * one instruction at a time. - */ -void radeonLocalTransform( - GLcontext *Ctx, - struct gl_program *program, - int num_transformations, - struct radeon_program_transformation* transformations) -{ - struct radeon_transform_context ctx; - int ip; - - ctx.Ctx = Ctx; - ctx.Program = program; - ctx.OldInstructions = program->Instructions; - ctx.OldNumInstructions = program->NumInstructions; - - program->Instructions = 0; - program->NumInstructions = 0; - - for(ip = 0; ip < ctx.OldNumInstructions; ++ip) { - struct prog_instruction *instr = ctx.OldInstructions + ip; - int i; - - for(i = 0; i < num_transformations; ++i) { - struct radeon_program_transformation* t = transformations + i; - - if (t->function(&ctx, instr, t->userData)) - break; - } - - if (i >= num_transformations) { - struct prog_instruction* dest = radeonAppendInstructions(program, 1); - _mesa_copy_instructions(dest, instr, 1); - } - } - - _mesa_free_instructions(ctx.OldInstructions, ctx.OldNumInstructions); -} - - -static void scan_instructions(GLboolean* used, const struct prog_instruction* insts, GLuint count) -{ - GLuint i; - for (i = 0; i < count; i++) { - const struct prog_instruction *inst = insts + i; - const GLuint n = _mesa_num_inst_src_regs(inst->Opcode); - GLuint k; - - for (k = 0; k < n; k++) { - if (inst->SrcReg[k].File == PROGRAM_TEMPORARY) - used[inst->SrcReg[k].Index] = GL_TRUE; - } - } -} - -GLint radeonFindFreeTemporary(struct radeon_transform_context *t) -{ - GLboolean used[MAX_PROGRAM_TEMPS]; - GLuint i; - - _mesa_memset(used, 0, sizeof(used)); - scan_instructions(used, t->Program->Instructions, t->Program->NumInstructions); - scan_instructions(used, t->OldInstructions, t->OldNumInstructions); - - for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { - if (!used[i]) - return i; - } - - return -1; -} - - -/** - * Append the given number of instructions to the program and return a - * pointer to the first new instruction. - */ -struct prog_instruction *radeonAppendInstructions(struct gl_program *program, int count) -{ - int oldnum = program->NumInstructions; - _mesa_insert_instructions(program, oldnum, count); - return program->Instructions + oldnum; -} diff --git a/src/mesa/drivers/dri/r600/radeon_program.h b/src/mesa/drivers/dri/r600/radeon_program.h deleted file mode 100644 index b411f69bc8..0000000000 --- a/src/mesa/drivers/dri/r600/radeon_program.h +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (C) 2008 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef __RADEON_PROGRAM_H_ -#define __RADEON_PROGRAM_H_ - -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" -#include "shader/program.h" -#include "shader/prog_instruction.h" - - -enum { - CLAUSE_MIXED = 0, - CLAUSE_ALU, - CLAUSE_TEX -}; - -enum { - PROGRAM_BUILTIN = PROGRAM_FILE_MAX /**< not a real register, but a special swizzle constant */ -}; - -enum { - OPCODE_REPL_ALPHA = MAX_OPCODE /**< used in paired instructions */ -}; - -#define SWIZZLE_0000 MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO) -#define SWIZZLE_1111 MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE) - -/** - * Transformation context that is passed to local transformations. - * - * Care must be taken with some operations during transformation, - * e.g. finding new temporary registers must use @ref radeonFindFreeTemporary - */ -struct radeon_transform_context { - GLcontext *Ctx; - struct gl_program *Program; - struct prog_instruction *OldInstructions; - GLuint OldNumInstructions; -}; - -/** - * A transformation that can be passed to \ref radeonLocalTransform. - * - * The function will be called once for each instruction. - * It has to either emit the appropriate transformed code for the instruction - * and return GL_TRUE, or return GL_FALSE if it doesn't understand the - * instruction. - * - * The function gets passed the userData as last parameter. - */ -struct radeon_program_transformation { - GLboolean (*function)( - struct radeon_transform_context*, - struct prog_instruction*, - void*); - void *userData; -}; - -void radeonLocalTransform( - GLcontext* ctx, - struct gl_program *program, - int num_transformations, - struct radeon_program_transformation* transformations); - -/** - * Find a usable free temporary register during program transformation - */ -GLint radeonFindFreeTemporary(struct radeon_transform_context *ctx); - -struct prog_instruction *radeonAppendInstructions(struct gl_program *program, int count); - -#endif diff --git a/src/mesa/drivers/dri/r600/radeon_program_alu.c b/src/mesa/drivers/dri/r600/radeon_program_alu.c deleted file mode 100644 index 1ef71e74dc..0000000000 --- a/src/mesa/drivers/dri/r600/radeon_program_alu.c +++ /dev/null @@ -1,658 +0,0 @@ -/* - * Copyright (C) 2008 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -/** - * @file - * - * Shareable transformations that transform "special" ALU instructions - * into ALU instructions that are supported by hardware. - * - */ - -#include "radeon_program_alu.h" - -#include "shader/prog_parameter.h" - - -static struct prog_instruction *emit1(struct gl_program* p, - gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg, - struct prog_src_register SrcReg) -{ - struct prog_instruction *fpi = radeonAppendInstructions(p, 1); - - fpi->Opcode = Opcode; - fpi->SaturateMode = Saturate; - fpi->DstReg = DstReg; - fpi->SrcReg[0] = SrcReg; - return fpi; -} - -static struct prog_instruction *emit2(struct gl_program* p, - gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg, - struct prog_src_register SrcReg0, struct prog_src_register SrcReg1) -{ - struct prog_instruction *fpi = radeonAppendInstructions(p, 1); - - fpi->Opcode = Opcode; - fpi->SaturateMode = Saturate; - fpi->DstReg = DstReg; - fpi->SrcReg[0] = SrcReg0; - fpi->SrcReg[1] = SrcReg1; - return fpi; -} - -static struct prog_instruction *emit3(struct gl_program* p, - gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg, - struct prog_src_register SrcReg0, struct prog_src_register SrcReg1, - struct prog_src_register SrcReg2) -{ - struct prog_instruction *fpi = radeonAppendInstructions(p, 1); - - fpi->Opcode = Opcode; - fpi->SaturateMode = Saturate; - fpi->DstReg = DstReg; - fpi->SrcReg[0] = SrcReg0; - fpi->SrcReg[1] = SrcReg1; - fpi->SrcReg[2] = SrcReg2; - return fpi; -} - -static void set_swizzle(struct prog_src_register *SrcReg, int coordinate, int swz) -{ - SrcReg->Swizzle &= ~(7 << (3*coordinate)); - SrcReg->Swizzle |= swz << (3*coordinate); -} - -static void set_negate_base(struct prog_src_register *SrcReg, int coordinate, int negate) -{ - SrcReg->NegateBase &= ~(1 << coordinate); - SrcReg->NegateBase |= (negate << coordinate); -} - -static struct prog_dst_register dstreg(int file, int index) -{ - struct prog_dst_register dst; - dst.File = file; - dst.Index = index; - dst.WriteMask = WRITEMASK_XYZW; - dst.CondMask = COND_TR; - dst.CondSwizzle = SWIZZLE_NOOP; - dst.CondSrc = 0; - dst.pad = 0; - return dst; -} - -static struct prog_dst_register dstregtmpmask(int index, int mask) -{ - struct prog_dst_register dst; - dst.File = PROGRAM_TEMPORARY; - dst.Index = index; - dst.WriteMask = mask; - dst.CondMask = COND_TR; - dst.CondSwizzle = SWIZZLE_NOOP; - dst.CondSrc = 0; - dst.pad = 0; - return dst; -} - -static const struct prog_src_register builtin_zero = { - .File = PROGRAM_BUILTIN, - .Index = 0, - .Swizzle = SWIZZLE_0000 -}; -static const struct prog_src_register builtin_one = { - .File = PROGRAM_BUILTIN, - .Index = 0, - .Swizzle = SWIZZLE_1111 -}; -static const struct prog_src_register srcreg_undefined = { - .File = PROGRAM_UNDEFINED, - .Index = 0, - .Swizzle = SWIZZLE_NOOP -}; - -static struct prog_src_register srcreg(int file, int index) -{ - struct prog_src_register src = srcreg_undefined; - src.File = file; - src.Index = index; - return src; -} - -static struct prog_src_register srcregswz(int file, int index, int swz) -{ - struct prog_src_register src = srcreg_undefined; - src.File = file; - src.Index = index; - src.Swizzle = swz; - return src; -} - -static struct prog_src_register absolute(struct prog_src_register reg) -{ - struct prog_src_register newreg = reg; - newreg.Abs = 1; - newreg.NegateBase = 0; - newreg.NegateAbs = 0; - return newreg; -} - -static struct prog_src_register negate(struct prog_src_register reg) -{ - struct prog_src_register newreg = reg; - newreg.NegateAbs = !newreg.NegateAbs; - return newreg; -} - -static struct prog_src_register swizzle(struct prog_src_register reg, GLuint x, GLuint y, GLuint z, GLuint w) -{ - struct prog_src_register swizzled = reg; - swizzled.Swizzle = MAKE_SWIZZLE4( - x >= 4 ? x : GET_SWZ(reg.Swizzle, x), - y >= 4 ? y : GET_SWZ(reg.Swizzle, y), - z >= 4 ? z : GET_SWZ(reg.Swizzle, z), - w >= 4 ? w : GET_SWZ(reg.Swizzle, w)); - return swizzled; -} - -static struct prog_src_register scalar(struct prog_src_register reg) -{ - return swizzle(reg, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X); -} - -static void transform_ABS(struct radeon_transform_context* t, - struct prog_instruction* inst) -{ - struct prog_src_register src = inst->SrcReg[0]; - src.Abs = 1; - src.NegateBase = 0; - src.NegateAbs = 0; - emit1(t->Program, OPCODE_MOV, inst->SaturateMode, inst->DstReg, src); -} - -static void transform_DPH(struct radeon_transform_context* t, - struct prog_instruction* inst) -{ - struct prog_src_register src0 = inst->SrcReg[0]; - if (src0.NegateAbs) { - if (src0.Abs) { - int tempreg = radeonFindFreeTemporary(t); - emit1(t->Program, OPCODE_MOV, 0, dstreg(PROGRAM_TEMPORARY, tempreg), src0); - src0 = srcreg(src0.File, src0.Index); - } else { - src0.NegateAbs = 0; - src0.NegateBase ^= NEGATE_XYZW; - } - } - set_swizzle(&src0, 3, SWIZZLE_ONE); - set_negate_base(&src0, 3, 0); - emit2(t->Program, OPCODE_DP4, inst->SaturateMode, inst->DstReg, src0, inst->SrcReg[1]); -} - -/** - * [1, src0.y*src1.y, src0.z, src1.w] - * So basically MUL with lotsa swizzling. - */ -static void transform_DST(struct radeon_transform_context* t, - struct prog_instruction* inst) -{ - emit2(t->Program, OPCODE_MUL, inst->SaturateMode, inst->DstReg, - swizzle(inst->SrcReg[0], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE), - swizzle(inst->SrcReg[1], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_ONE, SWIZZLE_W)); -} - -static void transform_FLR(struct radeon_transform_context* t, - struct prog_instruction* inst) -{ - int tempreg = radeonFindFreeTemporary(t); - emit1(t->Program, OPCODE_FRC, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0]); - emit2(t->Program, OPCODE_ADD, inst->SaturateMode, inst->DstReg, - inst->SrcReg[0], negate(srcreg(PROGRAM_TEMPORARY, tempreg))); -} - -/** - * Definition of LIT (from ARB_fragment_program): - * - * tmp = VectorLoad(op0); - * if (tmp.x < 0) tmp.x = 0; - * if (tmp.y < 0) tmp.y = 0; - * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon); - * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon; - * result.x = 1.0; - * result.y = tmp.x; - * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0; - * result.w = 1.0; - * - * The longest path of computation is the one leading to result.z, - * consisting of 5 operations. This implementation of LIT takes - * 5 slots, if the subsequent optimization passes are clever enough - * to pair instructions correctly. - */ -static void transform_LIT(struct radeon_transform_context* t, - struct prog_instruction* inst) -{ - static const GLfloat LitConst[4] = { -127.999999 }; - - GLuint constant; - GLuint constant_swizzle; - GLuint temp; - int needTemporary = 0; - struct prog_src_register srctemp; - - constant = _mesa_add_unnamed_constant(t->Program->Parameters, LitConst, 1, &constant_swizzle); - - if (inst->DstReg.WriteMask != WRITEMASK_XYZW) { - needTemporary = 1; - } else if (inst->DstReg.File != PROGRAM_TEMPORARY) { - // LIT is typically followed by DP3/DP4, so there's no point - // in creating special code for this case - needTemporary = 1; - } - - if (needTemporary) { - temp = radeonFindFreeTemporary(t); - } else { - temp = inst->DstReg.Index; - } - srctemp = srcreg(PROGRAM_TEMPORARY, temp); - - // tmp.x = max(0.0, Src.x); - // tmp.y = max(0.0, Src.y); - // tmp.w = clamp(Src.z, -128+eps, 128-eps); - emit2(t->Program, OPCODE_MAX, 0, - dstregtmpmask(temp, WRITEMASK_XYW), - inst->SrcReg[0], - swizzle(srcreg(PROGRAM_CONSTANT, constant), - SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, constant_swizzle&3)); - emit2(t->Program, OPCODE_MIN, 0, - dstregtmpmask(temp, WRITEMASK_Z), - swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - negate(srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle))); - - // tmp.w = Pow(tmp.y, tmp.w) - emit1(t->Program, OPCODE_LG2, 0, - dstregtmpmask(temp, WRITEMASK_W), - swizzle(srctemp, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y)); - emit2(t->Program, OPCODE_MUL, 0, - dstregtmpmask(temp, WRITEMASK_W), - swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - swizzle(srctemp, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)); - emit1(t->Program, OPCODE_EX2, 0, - dstregtmpmask(temp, WRITEMASK_W), - swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); - - // tmp.z = (tmp.x > 0) ? tmp.w : 0.0 - emit3(t->Program, OPCODE_CMP, inst->SaturateMode, - dstregtmpmask(temp, WRITEMASK_Z), - negate(swizzle(srctemp, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), - swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - builtin_zero); - - // tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0 - emit1(t->Program, OPCODE_MOV, inst->SaturateMode, - dstregtmpmask(temp, WRITEMASK_XYW), - swizzle(srctemp, SWIZZLE_ONE, SWIZZLE_X, SWIZZLE_ONE, SWIZZLE_ONE)); - - if (needTemporary) - emit1(t->Program, OPCODE_MOV, 0, inst->DstReg, srctemp); -} - -static void transform_LRP(struct radeon_transform_context* t, - struct prog_instruction* inst) -{ - int tempreg = radeonFindFreeTemporary(t); - - emit2(t->Program, OPCODE_ADD, 0, - dstreg(PROGRAM_TEMPORARY, tempreg), - inst->SrcReg[1], negate(inst->SrcReg[2])); - emit3(t->Program, OPCODE_MAD, inst->SaturateMode, - inst->DstReg, - inst->SrcReg[0], srcreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[2]); -} - -static void transform_POW(struct radeon_transform_context* t, - struct prog_instruction* inst) -{ - int tempreg = radeonFindFreeTemporary(t); - struct prog_dst_register tempdst = dstreg(PROGRAM_TEMPORARY, tempreg); - struct prog_src_register tempsrc = srcreg(PROGRAM_TEMPORARY, tempreg); - tempdst.WriteMask = WRITEMASK_W; - tempsrc.Swizzle = SWIZZLE_WWWW; - - emit1(t->Program, OPCODE_LG2, 0, tempdst, scalar(inst->SrcReg[0])); - emit2(t->Program, OPCODE_MUL, 0, tempdst, tempsrc, scalar(inst->SrcReg[1])); - emit1(t->Program, OPCODE_EX2, inst->SaturateMode, inst->DstReg, tempsrc); -} - -static void transform_RSQ(struct radeon_transform_context* t, - struct prog_instruction* inst) -{ - emit1(t->Program, OPCODE_RSQ, inst->SaturateMode, inst->DstReg, absolute(inst->SrcReg[0])); -} - -static void transform_SGE(struct radeon_transform_context* t, - struct prog_instruction* inst) -{ - int tempreg = radeonFindFreeTemporary(t); - - emit2(t->Program, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1])); - emit3(t->Program, OPCODE_CMP, inst->SaturateMode, inst->DstReg, - srcreg(PROGRAM_TEMPORARY, tempreg), builtin_zero, builtin_one); -} - -static void transform_SLT(struct radeon_transform_context* t, - struct prog_instruction* inst) -{ - int tempreg = radeonFindFreeTemporary(t); - - emit2(t->Program, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1])); - emit3(t->Program, OPCODE_CMP, inst->SaturateMode, inst->DstReg, - srcreg(PROGRAM_TEMPORARY, tempreg), builtin_one, builtin_zero); -} - -static void transform_SUB(struct radeon_transform_context* t, - struct prog_instruction* inst) -{ - emit2(t->Program, OPCODE_ADD, inst->SaturateMode, inst->DstReg, inst->SrcReg[0], negate(inst->SrcReg[1])); -} - -static void transform_SWZ(struct radeon_transform_context* t, - struct prog_instruction* inst) -{ - emit1(t->Program, OPCODE_MOV, inst->SaturateMode, inst->DstReg, inst->SrcReg[0]); -} - -static void transform_XPD(struct radeon_transform_context* t, - struct prog_instruction* inst) -{ - int tempreg = radeonFindFreeTemporary(t); - - emit2(t->Program, OPCODE_MUL, 0, dstreg(PROGRAM_TEMPORARY, tempreg), - swizzle(inst->SrcReg[0], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W), - swizzle(inst->SrcReg[1], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W)); - emit3(t->Program, OPCODE_MAD, inst->SaturateMode, inst->DstReg, - swizzle(inst->SrcReg[0], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W), - swizzle(inst->SrcReg[1], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W), - negate(srcreg(PROGRAM_TEMPORARY, tempreg))); -} - - -/** - * Can be used as a transformation for @ref radeonClauseLocalTransform, - * no userData necessary. - * - * Eliminates the following ALU instructions: - * ABS, DPH, DST, FLR, LIT, LRP, POW, SGE, SLT, SUB, SWZ, XPD - * using: - * MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP - * - * Transforms RSQ to Radeon's native RSQ by explicitly setting - * absolute value. - * - * @note should be applicable to R300 and R500 fragment programs. - */ -GLboolean radeonTransformALU(struct radeon_transform_context* t, - struct prog_instruction* inst, - void* unused) -{ - switch(inst->Opcode) { - case OPCODE_ABS: transform_ABS(t, inst); return GL_TRUE; - case OPCODE_DPH: transform_DPH(t, inst); return GL_TRUE; - case OPCODE_DST: transform_DST(t, inst); return GL_TRUE; - case OPCODE_FLR: transform_FLR(t, inst); return GL_TRUE; - case OPCODE_LIT: transform_LIT(t, inst); return GL_TRUE; - case OPCODE_LRP: transform_LRP(t, inst); return GL_TRUE; - case OPCODE_POW: transform_POW(t, inst); return GL_TRUE; - case OPCODE_RSQ: transform_RSQ(t, inst); return GL_TRUE; - case OPCODE_SGE: transform_SGE(t, inst); return GL_TRUE; - case OPCODE_SLT: transform_SLT(t, inst); return GL_TRUE; - case OPCODE_SUB: transform_SUB(t, inst); return GL_TRUE; - case OPCODE_SWZ: transform_SWZ(t, inst); return GL_TRUE; - case OPCODE_XPD: transform_XPD(t, inst); return GL_TRUE; - default: - return GL_FALSE; - } -} - - -static void sincos_constants(struct radeon_transform_context* t, GLuint *constants) -{ - static const GLfloat SinCosConsts[2][4] = { - { - 1.273239545, // 4/PI - -0.405284735, // -4/(PI*PI) - 3.141592654, // PI - 0.2225 // weight - }, - { - 0.75, - 0.5, - 0.159154943, // 1/(2*PI) - 6.283185307 // 2*PI - } - }; - int i; - - for(i = 0; i < 2; ++i) { - GLuint swz; - constants[i] = _mesa_add_unnamed_constant(t->Program->Parameters, SinCosConsts[i], 4, &swz); - ASSERT(swz == SWIZZLE_NOOP); - } -} - -/** - * Approximate sin(x), where x is clamped to (-pi/2, pi/2). - * - * MUL tmp.xy, src, { 4/PI, -4/(PI^2) } - * MAD tmp.x, tmp.y, |src|, tmp.x - * MAD tmp.y, tmp.x, |tmp.x|, -tmp.x - * MAD dest, tmp.y, weight, tmp.x - */ -static void sin_approx(struct radeon_transform_context* t, - struct prog_dst_register dst, struct prog_src_register src, const GLuint* constants) -{ - GLuint tempreg = radeonFindFreeTemporary(t); - - emit2(t->Program, OPCODE_MUL, 0, dstregtmpmask(tempreg, WRITEMASK_XY), - swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), - srcreg(PROGRAM_CONSTANT, constants[0])); - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_X), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), - absolute(swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)); - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_Y), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), - absolute(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), - negate(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X))); - emit3(t->Program, OPCODE_MAD, 0, dst, - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), - swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)); -} - -/** - * Translate the trigonometric functions COS, SIN, and SCS - * using only the basic instructions - * MOV, ADD, MUL, MAD, FRC - */ -GLboolean radeonTransformTrigSimple(struct radeon_transform_context* t, - struct prog_instruction* inst, - void* unused) -{ - if (inst->Opcode != OPCODE_COS && - inst->Opcode != OPCODE_SIN && - inst->Opcode != OPCODE_SCS) - return GL_FALSE; - - GLuint constants[2]; - GLuint tempreg = radeonFindFreeTemporary(t); - - sincos_constants(t, constants); - - if (inst->Opcode == OPCODE_COS) { - // MAD tmp.x, src, 1/(2*PI), 0.75 - // FRC tmp.x, tmp.x - // MAD tmp.z, tmp.x, 2*PI, -PI - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), - swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)); - emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); - - sin_approx(t, inst->DstReg, - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - constants); - } else if (inst->Opcode == OPCODE_SIN) { - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), - swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y)); - emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); - - sin_approx(t, inst->DstReg, - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - constants); - } else { - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY), - swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W)); - emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_XY), - srcreg(PROGRAM_TEMPORARY, tempreg)); - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY), - srcreg(PROGRAM_TEMPORARY, tempreg), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); - - struct prog_dst_register dst = inst->DstReg; - - dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_X; - sin_approx(t, dst, - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), - constants); - - dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_Y; - sin_approx(t, dst, - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), - constants); - } - - return GL_TRUE; -} - - -/** - * Transform the trigonometric functions COS, SIN, and SCS - * to include pre-scaling by 1/(2*PI) and taking the fractional - * part, so that the input to COS and SIN is always in the range [0,1). - * SCS is replaced by one COS and one SIN instruction. - * - * @warning This transformation implicitly changes the semantics of SIN and COS! - */ -GLboolean radeonTransformTrigScale(struct radeon_transform_context* t, - struct prog_instruction* inst, - void* unused) -{ - if (inst->Opcode != OPCODE_COS && - inst->Opcode != OPCODE_SIN && - inst->Opcode != OPCODE_SCS) - return GL_FALSE; - - static const GLfloat RCP_2PI[] = { 0.15915494309189535 }; - GLuint temp; - GLuint constant; - GLuint constant_swizzle; - - temp = radeonFindFreeTemporary(t); - constant = _mesa_add_unnamed_constant(t->Program->Parameters, RCP_2PI, 1, &constant_swizzle); - - emit2(t->Program, OPCODE_MUL, 0, dstregtmpmask(temp, WRITEMASK_W), - swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), - srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle)); - emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(temp, WRITEMASK_W), - srcreg(PROGRAM_TEMPORARY, temp)); - - if (inst->Opcode == OPCODE_COS) { - emit1(t->Program, OPCODE_COS, inst->SaturateMode, inst->DstReg, - srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); - } else if (inst->Opcode == OPCODE_SIN) { - emit1(t->Program, OPCODE_SIN, inst->SaturateMode, - inst->DstReg, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); - } else if (inst->Opcode == OPCODE_SCS) { - struct prog_dst_register moddst = inst->DstReg; - - if (inst->DstReg.WriteMask & WRITEMASK_X) { - moddst.WriteMask = WRITEMASK_X; - emit1(t->Program, OPCODE_COS, inst->SaturateMode, moddst, - srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); - } - if (inst->DstReg.WriteMask & WRITEMASK_Y) { - moddst.WriteMask = WRITEMASK_Y; - emit1(t->Program, OPCODE_SIN, inst->SaturateMode, moddst, - srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); - } - } - - return GL_TRUE; -} - -/** - * Rewrite DDX/DDY instructions to properly work with r5xx shaders. - * The r5xx MDH/MDV instruction provides per-quad partial derivatives. - * It takes the form A*B+C. A and C are set by setting src0. B should be -1. - * - * @warning This explicitly changes the form of DDX and DDY! - */ - -GLboolean radeonTransformDeriv(struct radeon_transform_context* t, - struct prog_instruction* inst, - void* unused) -{ - if (inst->Opcode != OPCODE_DDX && inst->Opcode != OPCODE_DDY) - return GL_FALSE; - - struct prog_src_register B = inst->SrcReg[1]; - - B.Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, - SWIZZLE_ONE, SWIZZLE_ONE); - B.NegateBase = NEGATE_XYZW; - - emit2(t->Program, inst->Opcode, inst->SaturateMode, inst->DstReg, - inst->SrcReg[0], B); - - return GL_TRUE; -} diff --git a/src/mesa/drivers/dri/r600/radeon_program_alu.h b/src/mesa/drivers/dri/r600/radeon_program_alu.h deleted file mode 100644 index b45958115c..0000000000 --- a/src/mesa/drivers/dri/r600/radeon_program_alu.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (C) 2008 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef __RADEON_PROGRAM_ALU_H_ -#define __RADEON_PROGRAM_ALU_H_ - -#include "radeon_program.h" - -GLboolean radeonTransformALU( - struct radeon_transform_context *t, - struct prog_instruction*, - void*); - -GLboolean radeonTransformTrigSimple( - struct radeon_transform_context *t, - struct prog_instruction*, - void*); - -GLboolean radeonTransformTrigScale( - struct radeon_transform_context *t, - struct prog_instruction*, - void*); - -GLboolean radeonTransformDeriv( - struct radeon_transform_context *t, - struct prog_instruction*, - void*); - -#endif /* __RADEON_PROGRAM_ALU_H_ */ diff --git a/src/mesa/drivers/dri/r600/radeon_program_pair.c b/src/mesa/drivers/dri/r600/radeon_program_pair.c deleted file mode 100644 index 49aa90dd94..0000000000 --- a/src/mesa/drivers/dri/r600/radeon_program_pair.c +++ /dev/null @@ -1,1006 +0,0 @@ -/* - * Copyright (C) 2008 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -/** - * @file - * - * Perform temporary register allocation and attempt to pair off instructions - * in RGB and Alpha pairs. Also attempts to optimize the TEX instruction - * vs. ALU instruction scheduling. - */ - -#include "radeon_program_pair.h" - -#include "radeon_common.h" - -#include "shader/prog_print.h" - -#define error(fmt, args...) do { \ - _mesa_problem(s->Ctx, "%s::%s(): " fmt "\n", \ - __FILE__, __FUNCTION__, ##args); \ - s->Error = GL_TRUE; \ -} while(0) - -struct pair_state_instruction { - GLuint IsTex:1; /**< Is a texture instruction */ - GLuint NeedRGB:1; /**< Needs the RGB ALU */ - GLuint NeedAlpha:1; /**< Needs the Alpha ALU */ - GLuint IsTranscendent:1; /**< Is a special transcendent instruction */ - - /** - * Number of (read and write) dependencies that must be resolved before - * this instruction can be scheduled. - */ - GLuint NumDependencies:5; - - /** - * Next instruction in the linked list of ready instructions. - */ - struct pair_state_instruction *NextReady; - - /** - * Values that this instruction writes - */ - struct reg_value *Values[4]; -}; - - -/** - * Used to keep track of which instructions read a value. - */ -struct reg_value_reader { - GLuint IP; /**< IP of the instruction that performs this access */ - struct reg_value_reader *Next; -}; - -/** - * Used to keep track which values are stored in each component of a - * PROGRAM_TEMPORARY. - */ -struct reg_value { - GLuint IP; /**< IP of the instruction that writes this value */ - struct reg_value *Next; /**< Pointer to the next value to be written to the same PROGRAM_TEMPORARY component */ - - /** - * Unordered linked list of instructions that read from this value. - */ - struct reg_value_reader *Readers; - - /** - * Number of readers of this value. This is calculated during @ref scan_instructions - * and continually decremented during code emission. - * When this count reaches zero, the instruction that writes the @ref Next value - * can be scheduled. - */ - GLuint NumReaders; -}; - -/** - * Used to translate a PROGRAM_INPUT or PROGRAM_TEMPORARY Mesa register - * to the proper hardware temporary. - */ -struct pair_register_translation { - GLuint Allocated:1; - GLuint HwIndex:8; - GLuint RefCount:23; /**< # of times this occurs in an unscheduled instruction SrcReg or DstReg */ - - /** - * Notes the value that is currently contained in each component - * (only used for PROGRAM_TEMPORARY registers). - */ - struct reg_value *Value[4]; -}; - -struct pair_state { - GLcontext *Ctx; - struct gl_program *Program; - const struct radeon_pair_handler *Handler; - GLboolean Error; - GLboolean Debug; - GLboolean Verbose; - void *UserData; - - /** - * Translate Mesa registers to hardware registers - */ - struct pair_register_translation Inputs[FRAG_ATTRIB_MAX]; - struct pair_register_translation Temps[MAX_PROGRAM_TEMPS]; - - /** - * Derived information about program instructions. - */ - struct pair_state_instruction *Instructions; - - struct { - GLuint RefCount; /**< # of times this occurs in an unscheduled SrcReg or DstReg */ - } HwTemps[128]; - - /** - * Linked list of instructions that can be scheduled right now, - * based on which ALU/TEX resources they require. - */ - struct pair_state_instruction *ReadyFullALU; - struct pair_state_instruction *ReadyRGB; - struct pair_state_instruction *ReadyAlpha; - struct pair_state_instruction *ReadyTEX; - - /** - * Pool of @ref reg_value structures for fast allocation. - */ - struct reg_value *ValuePool; - GLuint ValuePoolUsed; - struct reg_value_reader *ReaderPool; - GLuint ReaderPoolUsed; -}; - - -static struct pair_register_translation *get_register(struct pair_state *s, GLuint file, GLuint index) -{ - switch(file) { - case PROGRAM_TEMPORARY: return &s->Temps[index]; - case PROGRAM_INPUT: return &s->Inputs[index]; - default: return 0; - } -} - -static void alloc_hw_reg(struct pair_state *s, GLuint file, GLuint index, GLuint hwindex) -{ - struct pair_register_translation *t = get_register(s, file, index); - ASSERT(!s->HwTemps[hwindex].RefCount); - ASSERT(!t->Allocated); - s->HwTemps[hwindex].RefCount = t->RefCount; - t->Allocated = 1; - t->HwIndex = hwindex; -} - -static GLuint get_hw_reg(struct pair_state *s, GLuint file, GLuint index) -{ - GLuint hwindex; - - struct pair_register_translation *t = get_register(s, file, index); - if (!t) { - _mesa_problem(s->Ctx, "get_hw_reg: %i[%i]\n", file, index); - return 0; - } - - if (t->Allocated) - return t->HwIndex; - - for(hwindex = 0; hwindex < s->Handler->MaxHwTemps; ++hwindex) - if (!s->HwTemps[hwindex].RefCount) - break; - - if (hwindex >= s->Handler->MaxHwTemps) { - error("Ran out of hardware temporaries"); - return 0; - } - - alloc_hw_reg(s, file, index, hwindex); - return hwindex; -} - - -static void deref_hw_reg(struct pair_state *s, GLuint hwindex) -{ - if (!s->HwTemps[hwindex].RefCount) { - error("Hwindex %i refcount error", hwindex); - return; - } - - s->HwTemps[hwindex].RefCount--; -} - -static void add_pairinst_to_list(struct pair_state_instruction **list, struct pair_state_instruction *pairinst) -{ - pairinst->NextReady = *list; - *list = pairinst; -} - -/** - * The instruction at the given IP has become ready. Link it into the ready - * instructions. - */ -static void instruction_ready(struct pair_state *s, int ip) -{ - struct pair_state_instruction *pairinst = s->Instructions + ip; - - if (s->Verbose) - _mesa_printf("instruction_ready(%i)\n", ip); - - if (pairinst->IsTex) - add_pairinst_to_list(&s->ReadyTEX, pairinst); - else if (!pairinst->NeedAlpha) - add_pairinst_to_list(&s->ReadyRGB, pairinst); - else if (!pairinst->NeedRGB) - add_pairinst_to_list(&s->ReadyAlpha, pairinst); - else - add_pairinst_to_list(&s->ReadyFullALU, pairinst); -} - - -/** - * Finally rewrite ADD, MOV, MUL as the appropriate native instruction - * and reverse the order of arguments for CMP. - */ -static void final_rewrite(struct pair_state *s, struct prog_instruction *inst) -{ - struct prog_src_register tmp; - - switch(inst->Opcode) { - case OPCODE_ADD: - inst->SrcReg[2] = inst->SrcReg[1]; - inst->SrcReg[1].File = PROGRAM_BUILTIN; - inst->SrcReg[1].Swizzle = SWIZZLE_1111; - inst->SrcReg[1].NegateBase = 0; - inst->SrcReg[1].NegateAbs = 0; - inst->Opcode = OPCODE_MAD; - break; - case OPCODE_CMP: - tmp = inst->SrcReg[2]; - inst->SrcReg[2] = inst->SrcReg[0]; - inst->SrcReg[0] = tmp; - break; - case OPCODE_MOV: - /* AMD say we should use CMP. - * However, when we transform - * KIL -r0; - * into - * CMP tmp, -r0, -r0, 0; - * KIL tmp; - * we get incorrect behaviour on R500 when r0 == 0.0. - * It appears that the R500 KIL hardware treats -0.0 as less - * than zero. - */ - inst->SrcReg[1].File = PROGRAM_BUILTIN; - inst->SrcReg[1].Swizzle = SWIZZLE_1111; - inst->SrcReg[2].File = PROGRAM_BUILTIN; - inst->SrcReg[2].Swizzle = SWIZZLE_0000; - inst->Opcode = OPCODE_MAD; - break; - case OPCODE_MUL: - inst->SrcReg[2].File = PROGRAM_BUILTIN; - inst->SrcReg[2].Swizzle = SWIZZLE_0000; - inst->Opcode = OPCODE_MAD; - break; - default: - /* nothing to do */ - break; - } -} - - -/** - * Classify an instruction according to which ALUs etc. it needs - */ -static void classify_instruction(struct pair_state *s, - struct prog_instruction *inst, struct pair_state_instruction *pairinst) -{ - pairinst->NeedRGB = (inst->DstReg.WriteMask & WRITEMASK_XYZ) ? 1 : 0; - pairinst->NeedAlpha = (inst->DstReg.WriteMask & WRITEMASK_W) ? 1 : 0; - - switch(inst->Opcode) { - case OPCODE_ADD: - case OPCODE_CMP: - case OPCODE_DDX: - case OPCODE_DDY: - case OPCODE_FRC: - case OPCODE_MAD: - case OPCODE_MAX: - case OPCODE_MIN: - case OPCODE_MOV: - case OPCODE_MUL: - break; - case OPCODE_COS: - case OPCODE_EX2: - case OPCODE_LG2: - case OPCODE_RCP: - case OPCODE_RSQ: - case OPCODE_SIN: - pairinst->IsTranscendent = 1; - pairinst->NeedAlpha = 1; - break; - case OPCODE_DP4: - pairinst->NeedAlpha = 1; - /* fall through */ - case OPCODE_DP3: - pairinst->NeedRGB = 1; - break; - case OPCODE_KIL: - case OPCODE_TEX: - case OPCODE_TXB: - case OPCODE_TXP: - case OPCODE_END: - pairinst->IsTex = 1; - break; - default: - error("Unknown opcode %d\n", inst->Opcode); - break; - } -} - - -/** - * Count which (input, temporary) register is read and written how often, - * and scan the instruction stream to find dependencies. - */ -static void scan_instructions(struct pair_state *s) -{ - struct prog_instruction *inst; - struct pair_state_instruction *pairinst; - GLuint ip; - - for(inst = s->Program->Instructions, pairinst = s->Instructions, ip = 0; - inst->Opcode != OPCODE_END; - ++inst, ++pairinst, ++ip) { - final_rewrite(s, inst); - classify_instruction(s, inst, pairinst); - - int nsrc = _mesa_num_inst_src_regs(inst->Opcode); - int j; - for(j = 0; j < nsrc; j++) { - struct pair_register_translation *t = - get_register(s, inst->SrcReg[j].File, inst->SrcReg[j].Index); - if (!t) - continue; - - t->RefCount++; - - if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { - int i; - for(i = 0; i < 4; ++i) { - GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, i); - if (swz >= 4) - continue; /* constant or NIL swizzle */ - if (!t->Value[swz]) - continue; /* this is an undefined read */ - - /* Do not add a dependency if this instruction - * also rewrites the value. The code below adds - * a dependency for the DstReg, which is a superset - * of the SrcReg dependency. */ - if (inst->DstReg.File == PROGRAM_TEMPORARY && - inst->DstReg.Index == inst->SrcReg[j].Index && - GET_BIT(inst->DstReg.WriteMask, swz)) - continue; - - struct reg_value_reader* r = &s->ReaderPool[s->ReaderPoolUsed++]; - pairinst->NumDependencies++; - t->Value[swz]->NumReaders++; - r->IP = ip; - r->Next = t->Value[swz]->Readers; - t->Value[swz]->Readers = r; - } - } - } - - int ndst = _mesa_num_inst_dst_regs(inst->Opcode); - if (ndst) { - struct pair_register_translation *t = - get_register(s, inst->DstReg.File, inst->DstReg.Index); - if (t) { - t->RefCount++; - - if (inst->DstReg.File == PROGRAM_TEMPORARY) { - int j; - for(j = 0; j < 4; ++j) { - if (!GET_BIT(inst->DstReg.WriteMask, j)) - continue; - - struct reg_value* v = &s->ValuePool[s->ValuePoolUsed++]; - v->IP = ip; - if (t->Value[j]) { - pairinst->NumDependencies++; - t->Value[j]->Next = v; - } - t->Value[j] = v; - pairinst->Values[j] = v; - } - } - } - } - - if (s->Verbose) - _mesa_printf("scan(%i): NumDeps = %i\n", ip, pairinst->NumDependencies); - - if (!pairinst->NumDependencies) - instruction_ready(s, ip); - } - - /* Clear the PROGRAM_TEMPORARY state */ - int i, j; - for(i = 0; i < MAX_PROGRAM_TEMPS; ++i) { - for(j = 0; j < 4; ++j) - s->Temps[i].Value[j] = 0; - } -} - - -/** - * Reserve hardware temporary registers for the program inputs. - * - * @note This allocation is performed explicitly, because the order of inputs - * is determined by the RS hardware. - */ -static void allocate_input_registers(struct pair_state *s) -{ - GLuint InputsRead = s->Program->InputsRead; - int i; - GLuint hwindex = 0; - - /* Primary colour */ - if (InputsRead & FRAG_BIT_COL0) - alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL0, hwindex++); - InputsRead &= ~FRAG_BIT_COL0; - - /* Secondary color */ - if (InputsRead & FRAG_BIT_COL1) - alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL1, hwindex++); - InputsRead &= ~FRAG_BIT_COL1; - - /* Texcoords */ - for (i = 0; i < s->Ctx->Const.MaxTextureUnits; i++) { - if (InputsRead & (FRAG_BIT_TEX0 << i)) - alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_TEX0+i, hwindex++); - } - InputsRead &= ~FRAG_BITS_TEX_ANY; - - /* Fogcoords treated as a texcoord */ - if (InputsRead & FRAG_BIT_FOGC) - alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_FOGC, hwindex++); - InputsRead &= ~FRAG_BIT_FOGC; - - /* fragment position treated as a texcoord */ - if (InputsRead & FRAG_BIT_WPOS) - alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_WPOS, hwindex++); - InputsRead &= ~FRAG_BIT_WPOS; - - /* Anything else */ - if (InputsRead) - error("Don't know how to handle inputs 0x%x\n", InputsRead); -} - - -static void decrement_dependencies(struct pair_state *s, int ip) -{ - struct pair_state_instruction *pairinst = s->Instructions + ip; - ASSERT(pairinst->NumDependencies > 0); - if (!--pairinst->NumDependencies) - instruction_ready(s, ip); -} - -/** - * Update the dependency tracking state based on what the instruction - * at the given IP does. - */ -static void commit_instruction(struct pair_state *s, int ip) -{ - struct prog_instruction *inst = s->Program->Instructions + ip; - struct pair_state_instruction *pairinst = s->Instructions + ip; - - if (s->Verbose) - _mesa_printf("commit_instruction(%i)\n", ip); - - if (inst->DstReg.File == PROGRAM_TEMPORARY) { - struct pair_register_translation *t = &s->Temps[inst->DstReg.Index]; - deref_hw_reg(s, t->HwIndex); - - int i; - for(i = 0; i < 4; ++i) { - if (!GET_BIT(inst->DstReg.WriteMask, i)) - continue; - - t->Value[i] = pairinst->Values[i]; - if (t->Value[i]->NumReaders) { - struct reg_value_reader *r; - for(r = pairinst->Values[i]->Readers; r; r = r->Next) - decrement_dependencies(s, r->IP); - } else if (t->Value[i]->Next) { - /* This happens when the only reader writes - * the register at the same time */ - decrement_dependencies(s, t->Value[i]->Next->IP); - } - } - } - - int nsrc = _mesa_num_inst_src_regs(inst->Opcode); - int i; - for(i = 0; i < nsrc; i++) { - struct pair_register_translation *t = get_register(s, inst->SrcReg[i].File, inst->SrcReg[i].Index); - if (!t) - continue; - - deref_hw_reg(s, get_hw_reg(s, inst->SrcReg[i].File, inst->SrcReg[i].Index)); - - if (inst->SrcReg[i].File != PROGRAM_TEMPORARY) - continue; - - int j; - for(j = 0; j < 4; ++j) { - GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); - if (swz >= 4) - continue; - if (!t->Value[swz]) - continue; - - /* Do not free a dependency if this instruction - * also rewrites the value. See scan_instructions. */ - if (inst->DstReg.File == PROGRAM_TEMPORARY && - inst->DstReg.Index == inst->SrcReg[i].Index && - GET_BIT(inst->DstReg.WriteMask, swz)) - continue; - - if (!--t->Value[swz]->NumReaders) { - if (t->Value[swz]->Next) - decrement_dependencies(s, t->Value[swz]->Next->IP); - } - } - } -} - - -/** - * Emit all ready texture instructions in a single block. - * - * Emit as a single block to (hopefully) sample many textures in parallel, - * and to avoid hardware indirections on R300. - * - * In R500, we don't really know when the result of a texture instruction - * arrives. So allocate all destinations first, to make sure they do not - * arrive early and overwrite a texture coordinate we're going to use later - * in the block. - */ -static void emit_all_tex(struct pair_state *s) -{ - struct pair_state_instruction *readytex; - struct pair_state_instruction *pairinst; - - ASSERT(s->ReadyTEX); - - // Don't let the ready list change under us! - readytex = s->ReadyTEX; - s->ReadyTEX = 0; - - // Allocate destination hardware registers in one block to avoid conflicts. - for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) { - int ip = pairinst - s->Instructions; - struct prog_instruction *inst = s->Program->Instructions + ip; - if (inst->Opcode != OPCODE_KIL) - get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); - } - - if (s->Debug) - _mesa_printf(" BEGIN_TEX\n"); - - if (s->Handler->BeginTexBlock) - s->Error = s->Error || !s->Handler->BeginTexBlock(s->UserData); - - for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) { - int ip = pairinst - s->Instructions; - struct prog_instruction *inst = s->Program->Instructions + ip; - commit_instruction(s, ip); - - if (inst->Opcode != OPCODE_KIL) - inst->DstReg.Index = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); - inst->SrcReg[0].Index = get_hw_reg(s, inst->SrcReg[0].File, inst->SrcReg[0].Index); - - if (s->Debug) { - _mesa_printf(" "); - _mesa_print_instruction(inst); - } - s->Error = s->Error || !s->Handler->EmitTex(s->UserData, inst); - } - - if (s->Debug) - _mesa_printf(" END_TEX\n"); -} - - -static int alloc_pair_source(struct pair_state *s, struct radeon_pair_instruction *pair, - struct prog_src_register src, GLboolean rgb, GLboolean alpha) -{ - int candidate = -1; - int candidate_quality = -1; - int i; - - if (!rgb && !alpha) - return 0; - - GLuint constant; - GLuint index; - - if (src.File == PROGRAM_TEMPORARY || src.File == PROGRAM_INPUT) { - constant = 0; - index = get_hw_reg(s, src.File, src.Index); - } else { - constant = 1; - s->Error |= !s->Handler->EmitConst(s->UserData, src.File, src.Index, &index); - } - - for(i = 0; i < 3; ++i) { - int q = 0; - if (rgb) { - if (pair->RGB.Src[i].Used) { - if (pair->RGB.Src[i].Constant != constant || - pair->RGB.Src[i].Index != index) - continue; - q++; - } - } - if (alpha) { - if (pair->Alpha.Src[i].Used) { - if (pair->Alpha.Src[i].Constant != constant || - pair->Alpha.Src[i].Index != index) - continue; - q++; - } - } - if (q > candidate_quality) { - candidate_quality = q; - candidate = i; - } - } - - if (candidate >= 0) { - if (rgb) { - pair->RGB.Src[candidate].Used = 1; - pair->RGB.Src[candidate].Constant = constant; - pair->RGB.Src[candidate].Index = index; - } - if (alpha) { - pair->Alpha.Src[candidate].Used = 1; - pair->Alpha.Src[candidate].Constant = constant; - pair->Alpha.Src[candidate].Index = index; - } - } - - return candidate; -} - -/** - * Fill the given ALU instruction's opcodes and source operands into the given pair, - * if possible. - */ -static GLboolean fill_instruction_into_pair(struct pair_state *s, struct radeon_pair_instruction *pair, int ip) -{ - struct pair_state_instruction *pairinst = s->Instructions + ip; - struct prog_instruction *inst = s->Program->Instructions + ip; - - ASSERT(!pairinst->NeedRGB || pair->RGB.Opcode == OPCODE_NOP); - ASSERT(!pairinst->NeedAlpha || pair->Alpha.Opcode == OPCODE_NOP); - - if (pairinst->NeedRGB) { - if (pairinst->IsTranscendent) - pair->RGB.Opcode = OPCODE_REPL_ALPHA; - else - pair->RGB.Opcode = inst->Opcode; - if (inst->SaturateMode == SATURATE_ZERO_ONE) - pair->RGB.Saturate = 1; - } - if (pairinst->NeedAlpha) { - pair->Alpha.Opcode = inst->Opcode; - if (inst->SaturateMode == SATURATE_ZERO_ONE) - pair->Alpha.Saturate = 1; - } - - int nargs = _mesa_num_inst_src_regs(inst->Opcode); - int i; - - /* Special case for DDX/DDY (MDH/MDV). */ - if (inst->Opcode == OPCODE_DDX || inst->Opcode == OPCODE_DDY) { - if (pair->RGB.Src[0].Used || pair->Alpha.Src[0].Used) - return GL_FALSE; - else - nargs++; - } - - for(i = 0; i < nargs; ++i) { - int source; - if (pairinst->NeedRGB && !pairinst->IsTranscendent) { - GLboolean srcrgb = GL_FALSE; - GLboolean srcalpha = GL_FALSE; - GLuint negatebase = 0; - int j; - for(j = 0; j < 3; ++j) { - GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); - if (swz < 3) - srcrgb = GL_TRUE; - else if (swz < 4) - srcalpha = GL_TRUE; - if (swz != SWIZZLE_NIL && GET_BIT(inst->SrcReg[i].NegateBase, j)) - negatebase = 1; - } - source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha); - if (source < 0) - return GL_FALSE; - pair->RGB.Arg[i].Source = source; - pair->RGB.Arg[i].Swizzle = inst->SrcReg[i].Swizzle & 0x1ff; - pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs; - pair->RGB.Arg[i].Negate = (negatebase & ~pair->RGB.Arg[i].Abs) ^ inst->SrcReg[i].NegateAbs; - } - if (pairinst->NeedAlpha) { - GLboolean srcrgb = GL_FALSE; - GLboolean srcalpha = GL_FALSE; - GLuint negatebase = GET_BIT(inst->SrcReg[i].NegateBase, pairinst->IsTranscendent ? 0 : 3); - GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, pairinst->IsTranscendent ? 0 : 3); - if (swz < 3) - srcrgb = GL_TRUE; - else if (swz < 4) - srcalpha = GL_TRUE; - source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha); - if (source < 0) - return GL_FALSE; - pair->Alpha.Arg[i].Source = source; - pair->Alpha.Arg[i].Swizzle = swz; - pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs; - pair->Alpha.Arg[i].Negate = (negatebase & ~pair->RGB.Arg[i].Abs) ^ inst->SrcReg[i].NegateAbs; - } - } - - return GL_TRUE; -} - - -/** - * Fill in the destination register information. - * - * This is split from filling in source registers because we want - * to avoid allocating hardware temporaries for destinations until - * we are absolutely certain that we're going to emit a certain - * instruction pairing. - */ -static void fill_dest_into_pair(struct pair_state *s, struct radeon_pair_instruction *pair, int ip) -{ - struct pair_state_instruction *pairinst = s->Instructions + ip; - struct prog_instruction *inst = s->Program->Instructions + ip; - - if (inst->DstReg.File == PROGRAM_OUTPUT) { - if (inst->DstReg.Index == FRAG_RESULT_COLOR) { - pair->RGB.OutputWriteMask |= inst->DstReg.WriteMask & WRITEMASK_XYZ; - pair->Alpha.OutputWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); - } else if (inst->DstReg.Index == FRAG_RESULT_DEPTH) { - pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); - } - } else { - GLuint hwindex = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); - if (pairinst->NeedRGB) { - pair->RGB.DestIndex = hwindex; - pair->RGB.WriteMask |= inst->DstReg.WriteMask & WRITEMASK_XYZ; - } - if (pairinst->NeedAlpha) { - pair->Alpha.DestIndex = hwindex; - pair->Alpha.WriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); - } - } -} - - -/** - * Find a good ALU instruction or pair of ALU instruction and emit it. - * - * Prefer emitting full ALU instructions, so that when we reach a point - * where no full ALU instruction can be emitted, we have more candidates - * for RGB/Alpha pairing. - */ -static void emit_alu(struct pair_state *s) -{ - struct radeon_pair_instruction pair; - - if (s->ReadyFullALU || !(s->ReadyRGB && s->ReadyAlpha)) { - int ip; - if (s->ReadyFullALU) { - ip = s->ReadyFullALU - s->Instructions; - s->ReadyFullALU = s->ReadyFullALU->NextReady; - } else if (s->ReadyRGB) { - ip = s->ReadyRGB - s->Instructions; - s->ReadyRGB = s->ReadyRGB->NextReady; - } else { - ip = s->ReadyAlpha - s->Instructions; - s->ReadyAlpha = s->ReadyAlpha->NextReady; - } - - _mesa_bzero(&pair, sizeof(pair)); - fill_instruction_into_pair(s, &pair, ip); - fill_dest_into_pair(s, &pair, ip); - commit_instruction(s, ip); - } else { - struct pair_state_instruction **prgb; - struct pair_state_instruction **palpha; - - /* Some pairings might fail because they require too - * many source slots; try all possible pairings if necessary */ - for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) { - for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) { - int rgbip = *prgb - s->Instructions; - int alphaip = *palpha - s->Instructions; - _mesa_bzero(&pair, sizeof(pair)); - fill_instruction_into_pair(s, &pair, rgbip); - if (!fill_instruction_into_pair(s, &pair, alphaip)) - continue; - *prgb = (*prgb)->NextReady; - *palpha = (*palpha)->NextReady; - fill_dest_into_pair(s, &pair, rgbip); - fill_dest_into_pair(s, &pair, alphaip); - commit_instruction(s, rgbip); - commit_instruction(s, alphaip); - goto success; - } - } - - /* No success in pairing; just take the first RGB instruction */ - int ip = s->ReadyRGB - s->Instructions; - s->ReadyRGB = s->ReadyRGB->NextReady; - _mesa_bzero(&pair, sizeof(pair)); - fill_instruction_into_pair(s, &pair, ip); - fill_dest_into_pair(s, &pair, ip); - commit_instruction(s, ip); - success: ; - } - - if (s->Debug) - radeonPrintPairInstruction(&pair); - - s->Error = s->Error || !s->Handler->EmitPaired(s->UserData, &pair); -} - - -GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program, - const struct radeon_pair_handler* handler, void *userdata) -{ - struct pair_state s; - - _mesa_bzero(&s, sizeof(s)); - s.Ctx = ctx; - s.Program = program; - s.Handler = handler; - s.UserData = userdata; - s.Debug = (RADEON_DEBUG & DEBUG_PIXEL) ? GL_TRUE : GL_FALSE; - s.Verbose = GL_FALSE && s.Debug; - - s.Instructions = (struct pair_state_instruction*)_mesa_calloc( - sizeof(struct pair_state_instruction)*s.Program->NumInstructions); - s.ValuePool = (struct reg_value*)_mesa_calloc(sizeof(struct reg_value)*s.Program->NumInstructions*4); - s.ReaderPool = (struct reg_value_reader*)_mesa_calloc( - sizeof(struct reg_value_reader)*s.Program->NumInstructions*12); - - if (s.Debug) - _mesa_printf("Emit paired program\n"); - - scan_instructions(&s); - allocate_input_registers(&s); - - while(!s.Error && - (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) { - if (s.ReadyTEX) - emit_all_tex(&s); - - while(s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha) - emit_alu(&s); - } - - if (s.Debug) - _mesa_printf(" END\n"); - - _mesa_free(s.Instructions); - _mesa_free(s.ValuePool); - _mesa_free(s.ReaderPool); - - return !s.Error; -} - - -static void print_pair_src(int i, struct radeon_pair_instruction_source* src) -{ - _mesa_printf(" Src%i = %s[%i]", i, src->Constant ? "CNST" : "TEMP", src->Index); -} - -static const char* opcode_string(GLuint opcode) -{ - if (opcode == OPCODE_REPL_ALPHA) - return "SOP"; - else - return _mesa_opcode_string(opcode); -} - -static int num_pairinst_args(GLuint opcode) -{ - if (opcode == OPCODE_REPL_ALPHA) - return 0; - else - return _mesa_num_inst_src_regs(opcode); -} - -static char swizzle_char(GLuint swz) -{ - switch(swz) { - case SWIZZLE_X: return 'x'; - case SWIZZLE_Y: return 'y'; - case SWIZZLE_Z: return 'z'; - case SWIZZLE_W: return 'w'; - case SWIZZLE_ZERO: return '0'; - case SWIZZLE_ONE: return '1'; - case SWIZZLE_NIL: return '_'; - default: return '?'; - } -} - -void radeonPrintPairInstruction(struct radeon_pair_instruction *inst) -{ - int nargs; - int i; - - _mesa_printf(" RGB: "); - for(i = 0; i < 3; ++i) { - if (inst->RGB.Src[i].Used) - print_pair_src(i, inst->RGB.Src + i); - } - _mesa_printf("\n"); - _mesa_printf(" Alpha:"); - for(i = 0; i < 3; ++i) { - if (inst->Alpha.Src[i].Used) - print_pair_src(i, inst->Alpha.Src + i); - } - _mesa_printf("\n"); - - _mesa_printf(" %s%s", opcode_string(inst->RGB.Opcode), inst->RGB.Saturate ? "_SAT" : ""); - if (inst->RGB.WriteMask) - _mesa_printf(" TEMP[%i].%s%s%s", inst->RGB.DestIndex, - (inst->RGB.WriteMask & 1) ? "x" : "", - (inst->RGB.WriteMask & 2) ? "y" : "", - (inst->RGB.WriteMask & 4) ? "z" : ""); - if (inst->RGB.OutputWriteMask) - _mesa_printf(" COLOR.%s%s%s", - (inst->RGB.OutputWriteMask & 1) ? "x" : "", - (inst->RGB.OutputWriteMask & 2) ? "y" : "", - (inst->RGB.OutputWriteMask & 4) ? "z" : ""); - nargs = num_pairinst_args(inst->RGB.Opcode); - for(i = 0; i < nargs; ++i) { - const char* abs = inst->RGB.Arg[i].Abs ? "|" : ""; - const char* neg = inst->RGB.Arg[i].Negate ? "-" : ""; - _mesa_printf(", %s%sSrc%i.%c%c%c%s", neg, abs, inst->RGB.Arg[i].Source, - swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 0)), - swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 1)), - swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 2)), - abs); - } - _mesa_printf("\n"); - - _mesa_printf(" %s%s", opcode_string(inst->Alpha.Opcode), inst->Alpha.Saturate ? "_SAT" : ""); - if (inst->Alpha.WriteMask) - _mesa_printf(" TEMP[%i].w", inst->Alpha.DestIndex); - if (inst->Alpha.OutputWriteMask) - _mesa_printf(" COLOR.w"); - if (inst->Alpha.DepthWriteMask) - _mesa_printf(" DEPTH.w"); - nargs = num_pairinst_args(inst->Alpha.Opcode); - for(i = 0; i < nargs; ++i) { - const char* abs = inst->Alpha.Arg[i].Abs ? "|" : ""; - const char* neg = inst->Alpha.Arg[i].Negate ? "-" : ""; - _mesa_printf(", %s%sSrc%i.%c%s", neg, abs, inst->Alpha.Arg[i].Source, - swizzle_char(inst->Alpha.Arg[i].Swizzle), abs); - } - _mesa_printf("\n"); -} diff --git a/src/mesa/drivers/dri/r600/radeon_program_pair.h b/src/mesa/drivers/dri/r600/radeon_program_pair.h deleted file mode 100644 index 4624a24629..0000000000 --- a/src/mesa/drivers/dri/r600/radeon_program_pair.h +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Copyright (C) 2008 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef __RADEON_PROGRAM_PAIR_H_ -#define __RADEON_PROGRAM_PAIR_H_ - -#include "radeon_program.h" - - -/** - * Represents a paired instruction, as found in R300 and R500 - * fragment programs. - */ -struct radeon_pair_instruction_source { - GLuint Index:8; - GLuint Constant:1; - GLuint Used:1; -}; - -struct radeon_pair_instruction_rgb { - GLuint Opcode:8; - GLuint DestIndex:8; - GLuint WriteMask:3; - GLuint OutputWriteMask:3; - GLuint Saturate:1; - - struct radeon_pair_instruction_source Src[3]; - - struct { - GLuint Source:2; - GLuint Swizzle:9; - GLuint Abs:1; - GLuint Negate:1; - } Arg[3]; -}; - -struct radeon_pair_instruction_alpha { - GLuint Opcode:8; - GLuint DestIndex:8; - GLuint WriteMask:1; - GLuint OutputWriteMask:1; - GLuint DepthWriteMask:1; - GLuint Saturate:1; - - struct radeon_pair_instruction_source Src[3]; - - struct { - GLuint Source:2; - GLuint Swizzle:3; - GLuint Abs:1; - GLuint Negate:1; - } Arg[3]; -}; - -struct radeon_pair_instruction { - struct radeon_pair_instruction_rgb RGB; - struct radeon_pair_instruction_alpha Alpha; -}; - - -/** - * - */ -struct radeon_pair_handler { - /** - * Fill in the proper hardware index for the given constant register. - * - * @return GL_FALSE on error. - */ - GLboolean (*EmitConst)(void*, GLuint file, GLuint index, GLuint *hwindex); - - /** - * Write a paired instruction to the hardware. - * - * @return GL_FALSE on error. - */ - GLboolean (*EmitPaired)(void*, struct radeon_pair_instruction*); - - /** - * Write a texture instruction to the hardware. - * Register indices have already been rewritten to the allocated - * hardware register numbers. - * - * @return GL_FALSE on error. - */ - GLboolean (*EmitTex)(void*, struct prog_instruction*); - - /** - * Called before a block of contiguous, independent texture - * instructions is emitted. - */ - GLboolean (*BeginTexBlock)(void*); - - GLuint MaxHwTemps; -}; - -GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program, - const struct radeon_pair_handler*, void *userdata); - -void radeonPrintPairInstruction(struct radeon_pair_instruction *inst); - -#endif /* __RADEON_PROGRAM_PAIR_H_ */ |