diff options
Diffstat (limited to 'src/mesa/drivers/dri/r300/compiler')
26 files changed, 6338 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile new file mode 100644 index 0000000000..d973844192 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/Makefile @@ -0,0 +1,75 @@ +# src/mesa/drivers/dri/r300/compiler/Makefile + +TOP = ../../../../../.. +include $(TOP)/configs/current + +LIBNAME = r300compiler + +C_SOURCES = \ + radeon_code.c \ + radeon_compiler.c \ + radeon_nqssadce.c \ + radeon_program.c \ + radeon_program_alu.c \ + radeon_program_pair.c \ + r3xx_fragprog.c \ + r300_fragprog.c \ + r300_fragprog_swizzle.c \ + r300_fragprog_emit.c \ + r500_fragprog.c \ + r500_fragprog_emit.c \ + r3xx_vertprog.c \ + r3xx_vertprog_dump.c \ + \ + memory_pool.c + + +### Basic defines ### + +OBJECTS = $(C_SOURCES:.c=.o) \ + $(CPP_SOURCES:.cpp=.o) \ + $(ASM_SOURCES:.S=.o) + +INCLUDES = \ + -I. \ + -I$(TOP)/include \ + -I$(TOP)/src/mesa \ + + +##### TARGETS ##### + +default: depend lib$(LIBNAME).a + +lib$(LIBNAME).a: $(OBJECTS) Makefile $(TOP)/configs/current + $(MKLIB) -o $(LIBNAME) -static $(OBJECTS) + +depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(SYMLINKS) + rm -f depend + touch depend + $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) 2> /dev/null + +# Emacs tags +tags: + etags `find . -name \*.[ch]` `find ../include` + +# Remove .o and backup files +clean: + rm -f $(OBJECTS) lib$(LIBNAME).a depend depend.bak + +# Dummy target +install: + @echo -n "" + +##### RULES ##### + +.c.o: + $(CC) -c $(INCLUDES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@ + +.cpp.o: + $(CXX) -c $(INCLUDES) $(CXXFLAGS) $(LIBRARY_DEFINES) $< -o $@ + +.S.o: + $(CC) -c $(INCLUDES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@ + + +sinclude depend diff --git a/src/mesa/drivers/dri/r300/compiler/memory_pool.c b/src/mesa/drivers/dri/r300/compiler/memory_pool.c new file mode 100644 index 0000000000..37aa2b6579 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/memory_pool.c @@ -0,0 +1,95 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "memory_pool.h" + +#include <assert.h> +#include <stdlib.h> +#include <string.h> + + +#define POOL_LARGE_ALLOC 4096 +#define POOL_ALIGN 4 + + +struct memory_block { + struct memory_block * next; +}; + +void memory_pool_init(struct memory_pool * pool) +{ + memset(pool, 0, sizeof(struct memory_pool)); +} + + +void memory_pool_destroy(struct memory_pool * pool) +{ + while(pool->blocks) { + struct memory_block * block = pool->blocks; + pool->blocks = block->next; + free(block); + } +} + +static void refill_pool(struct memory_pool * pool) +{ + unsigned int blocksize = pool->total_allocated; + struct memory_block * newblock; + + if (!blocksize) + blocksize = 2*POOL_LARGE_ALLOC; + + newblock = (struct memory_block*)malloc(blocksize); + newblock->next = pool->blocks; + pool->blocks = newblock; + + pool->head = (unsigned char*)(newblock + 1); + pool->end = ((unsigned char*)newblock) + blocksize; + pool->total_allocated += blocksize; +} + + +void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes) +{ + if (bytes < POOL_LARGE_ALLOC) { + if (pool->head + bytes > pool->end) + refill_pool(pool); + + assert(pool->head + bytes <= pool->end); + + void * ptr = pool->head; + + pool->head += bytes; + pool->head = (unsigned char*)(((unsigned long)pool->head + POOL_ALIGN - 1) & ~(POOL_ALIGN - 1)); + + return ptr; + } else { + struct memory_block * block = (struct memory_block*)malloc(bytes + sizeof(struct memory_block)); + + block->next = pool->blocks; + pool->blocks = block; + + return (block + 1); + } +} + + diff --git a/src/mesa/drivers/dri/r300/compiler/memory_pool.h b/src/mesa/drivers/dri/r300/compiler/memory_pool.h new file mode 100644 index 0000000000..ce23c319ad --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/memory_pool.h @@ -0,0 +1,49 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef MEMORY_POOL_H +#define MEMORY_POOL_H + +struct memory_block; + +/** + * Provides a pool of memory that can quickly be allocated from, at the + * cost of being unable to explicitly free one of the allocated blocks. + * Instead, the entire pool can be freed at once. + * + * The idea is to allow one to quickly allocate a flexible amount of + * memory during operations like shader compilation while avoiding + * reference counting headaches. + */ +struct memory_pool { + unsigned char * head; + unsigned char * end; + unsigned int total_allocated; + struct memory_block * blocks; +}; + + +void memory_pool_init(struct memory_pool * pool); +void memory_pool_destroy(struct memory_pool * pool); +void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes); + +#endif /* MEMORY_POOL_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c new file mode 100644 index 0000000000..6c9fba4914 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c @@ -0,0 +1,416 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "r300_fragprog.h" + +#include "shader/prog_parameter.h" + +#include "../r300_reg.h" + +static struct prog_src_register shadow_ambient(struct radeon_compiler * c, int tmu) +{ + struct prog_src_register reg = { 0, }; + + reg.File = PROGRAM_STATE_VAR; + reg.Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_SHADOW_AMBIENT, tmu); + reg.Swizzle = SWIZZLE_WWWW; + return reg; +} + +/** + * Transform TEX, TXP, TXB, and KIL instructions in the following way: + * - premultiply texture coordinates for RECT + * - extract operand swizzles + * - introduce a temporary register when write masks are needed + */ +GLboolean r300_transform_TEX( + struct radeon_compiler * c, + struct rc_instruction* inst, + void* data) +{ + struct r300_fragment_program_compiler *compiler = + (struct r300_fragment_program_compiler*)data; + + if (inst->I.Opcode != OPCODE_TEX && + inst->I.Opcode != OPCODE_TXB && + inst->I.Opcode != OPCODE_TXP && + inst->I.Opcode != OPCODE_KIL) + return GL_FALSE; + + /* ARB_shadow & EXT_shadow_funcs */ + if (inst->I.Opcode != OPCODE_KIL && + c->Program.ShadowSamplers & (1 << inst->I.TexSrcUnit)) { + GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func; + + if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) { + inst->I.Opcode = OPCODE_MOV; + + if (comparefunc == GL_ALWAYS) { + inst->I.SrcReg[0].File = PROGRAM_BUILTIN; + inst->I.SrcReg[0].Swizzle = SWIZZLE_1111; + } else { + inst->I.SrcReg[0] = shadow_ambient(c, inst->I.TexSrcUnit); + } + + return GL_TRUE; + } else { + GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func; + GLuint depthmode = compiler->state.unit[inst->I.TexSrcUnit].depth_texture_mode; + struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, inst); + struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_rcp); + struct rc_instruction * inst_cmp = rc_insert_new_instruction(c, inst_mad); + int pass, fail; + + inst_rcp->I.Opcode = OPCODE_RCP; + inst_rcp->I.DstReg.File = PROGRAM_TEMPORARY; + inst_rcp->I.DstReg.Index = rc_find_free_temporary(c); + inst_rcp->I.DstReg.WriteMask = WRITEMASK_W; + inst_rcp->I.SrcReg[0] = inst->I.SrcReg[0]; + inst_rcp->I.SrcReg[0].Swizzle = SWIZZLE_WWWW; + + inst_cmp->I.DstReg = inst->I.DstReg; + inst->I.DstReg.File = PROGRAM_TEMPORARY; + inst->I.DstReg.Index = rc_find_free_temporary(c); + inst->I.DstReg.WriteMask = WRITEMASK_XYZW; + + inst_mad->I.Opcode = OPCODE_MAD; + inst_mad->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mad->I.DstReg.Index = rc_find_free_temporary(c); + inst_mad->I.SrcReg[0] = inst->I.SrcReg[0]; + inst_mad->I.SrcReg[0].Swizzle = SWIZZLE_ZZZZ; + inst_mad->I.SrcReg[1].File = PROGRAM_TEMPORARY; + inst_mad->I.SrcReg[1].Index = inst_rcp->I.DstReg.Index; + inst_mad->I.SrcReg[1].Swizzle = SWIZZLE_WWWW; + inst_mad->I.SrcReg[2].File = PROGRAM_TEMPORARY; + inst_mad->I.SrcReg[2].Index = inst->I.DstReg.Index; + if (depthmode == 0) /* GL_LUMINANCE */ + inst_mad->I.SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z); + else if (depthmode == 2) /* GL_ALPHA */ + inst_mad->I.SrcReg[2].Swizzle = SWIZZLE_WWWW; + + /* Recall that SrcReg[0] is tex, SrcReg[2] is r and: + * r < tex <=> -tex+r < 0 + * r >= tex <=> not (-tex+r < 0 */ + if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL) + inst_mad->I.SrcReg[2].Negate = inst_mad->I.SrcReg[2].Negate ^ NEGATE_XYZW; + else + inst_mad->I.SrcReg[0].Negate = inst_mad->I.SrcReg[0].Negate ^ NEGATE_XYZW; + + inst_cmp->I.Opcode = OPCODE_CMP; + /* DstReg has been filled out above */ + inst_cmp->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst_cmp->I.SrcReg[0].Index = inst_mad->I.DstReg.Index; + + if (comparefunc == GL_LESS || comparefunc == GL_GREATER) { + pass = 1; + fail = 2; + } else { + pass = 2; + fail = 1; + } + + inst_cmp->I.SrcReg[pass].File = PROGRAM_BUILTIN; + inst_cmp->I.SrcReg[pass].Swizzle = SWIZZLE_1111; + inst_cmp->I.SrcReg[fail] = shadow_ambient(c, inst->I.TexSrcUnit); + } + } + + /* Hardware uses [0..1]x[0..1] range for rectangle textures + * instead of [0..Width]x[0..Height]. + * Add a scaling instruction. + */ + if (inst->I.Opcode != OPCODE_KIL && inst->I.TexSrcTarget == TEXTURE_RECT_INDEX) { + struct rc_instruction * inst_mul = rc_insert_new_instruction(c, inst->Prev); + + inst_mul->I.Opcode = OPCODE_MUL; + inst_mul->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mul->I.DstReg.Index = rc_find_free_temporary(c); + inst_mul->I.SrcReg[0] = inst->I.SrcReg[0]; + inst_mul->I.SrcReg[1].File = PROGRAM_STATE_VAR; + inst_mul->I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_TEXRECT_FACTOR, inst->I.TexSrcUnit); + + reset_srcreg(&inst->I.SrcReg[0]); + inst->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst->I.SrcReg[0].Index = inst_mul->I.DstReg.Index; + } + + /* Cannot write texture to output registers or with masks */ + if (inst->I.Opcode != OPCODE_KIL && + (inst->I.DstReg.File != PROGRAM_TEMPORARY || inst->I.DstReg.WriteMask != WRITEMASK_XYZW)) { + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst); + + inst_mov->I.Opcode = OPCODE_MOV; + inst_mov->I.DstReg = inst->I.DstReg; + inst_mov->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst_mov->I.SrcReg[0].Index = rc_find_free_temporary(c); + + inst->I.DstReg.File = PROGRAM_TEMPORARY; + inst->I.DstReg.Index = inst_mov->I.SrcReg[0].Index; + inst->I.DstReg.WriteMask = WRITEMASK_XYZW; + } + + + /* Cannot read texture coordinate from constants file */ + if (inst->I.SrcReg[0].File != PROGRAM_TEMPORARY && inst->I.SrcReg[0].File != PROGRAM_INPUT) { + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); + + inst_mov->I.Opcode = OPCODE_MOV; + inst_mov->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mov->I.DstReg.Index = rc_find_free_temporary(c); + inst_mov->I.SrcReg[0] = inst->I.SrcReg[0]; + + reset_srcreg(&inst->I.SrcReg[0]); + inst->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst->I.SrcReg[0].Index = inst_mov->I.DstReg.Index; + } + + return GL_TRUE; +} + +/* just some random things... */ +void r300FragmentProgramDump(struct rX00_fragment_program_code *c) +{ + struct r300_fragment_program_code *code = &c->code.r300; + int n, i, j; + static int pc = 0; + + fprintf(stderr, "pc=%d*************************************\n", pc++); + + fprintf(stderr, "Hardware program\n"); + fprintf(stderr, "----------------\n"); + + for (n = 0; n <= (code->config & 3); n++) { + uint32_t code_addr = code->code_addr[3 - (code->config & 3) + n]; + int alu_offset = (code_addr & R300_ALU_START_MASK) >> R300_ALU_START_SHIFT; + int alu_end = (code_addr & R300_ALU_SIZE_MASK) >> R300_ALU_SIZE_SHIFT; + int tex_offset = (code_addr & R300_TEX_START_MASK) >> R300_TEX_START_SHIFT; + int tex_end = (code_addr & R300_TEX_SIZE_MASK) >> R300_TEX_SIZE_SHIFT; + + fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, " + "alu_end: %d, tex_end: %d (code_addr: %08x)\n", n, + alu_offset, tex_offset, alu_end, tex_end, code_addr); + + if (n > 0 || (code->config & R300_PFS_CNTL_FIRST_NODE_HAS_TEX)) { + fprintf(stderr, " TEX:\n"); + for (i = tex_offset; + i <= tex_offset + tex_end; + ++i) { + const char *instr; + + switch ((code->tex. + inst[i] >> R300_TEX_INST_SHIFT) & + 15) { + case R300_TEX_OP_LD: + instr = "TEX"; + break; + case R300_TEX_OP_KIL: + instr = "KIL"; + break; + case R300_TEX_OP_TXP: + instr = "TXP"; + break; + case R300_TEX_OP_TXB: + instr = "TXB"; + break; + default: + instr = "UNKNOWN"; + } + + fprintf(stderr, + " %s t%i, %c%i, texture[%i] (%08x)\n", + instr, + (code->tex. + inst[i] >> R300_DST_ADDR_SHIFT) & 31, + 't', + (code->tex. + inst[i] >> R300_SRC_ADDR_SHIFT) & 31, + (code->tex. + inst[i] & R300_TEX_ID_MASK) >> + R300_TEX_ID_SHIFT, + code->tex.inst[i]); + } + } + + for (i = alu_offset; + i <= alu_offset + alu_end; ++i) { + char srcc[3][10], dstc[20]; + char srca[3][10], dsta[20]; + char argc[3][20]; + char arga[3][20]; + char flags[5], tmp[10]; + + for (j = 0; j < 3; ++j) { + int regc = code->alu.inst[i].rgb_addr >> (j * 6); + int rega = code->alu.inst[i].alpha_addr >> (j * 6); + + sprintf(srcc[j], "%c%i", + (regc & 32) ? 'c' : 't', regc & 31); + sprintf(srca[j], "%c%i", + (rega & 32) ? 'c' : 't', rega & 31); + } + + dstc[0] = 0; + sprintf(flags, "%s%s%s", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_REG_X) ? "x" : "", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_REG_Y) ? "y" : "", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_REG_Z) ? "z" : ""); + if (flags[0] != 0) { + sprintf(dstc, "t%i.%s ", + (code->alu.inst[i]. + rgb_addr >> R300_ALU_DSTC_SHIFT) & 31, + flags); + } + sprintf(flags, "%s%s%s", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_OUTPUT_X) ? "x" : "", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_OUTPUT_Z) ? "z" : ""); + if (flags[0] != 0) { + sprintf(tmp, "o%i.%s", + (code->alu.inst[i]. + rgb_addr >> R300_ALU_DSTC_SHIFT) & 31, + flags); + strcat(dstc, tmp); + } + + dsta[0] = 0; + if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_REG) { + sprintf(dsta, "t%i.w ", + (code->alu.inst[i]. + alpha_addr >> R300_ALU_DSTA_SHIFT) & 31); + } + if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_OUTPUT) { + sprintf(tmp, "o%i.w ", + (code->alu.inst[i]. + alpha_addr >> R300_ALU_DSTA_SHIFT) & 31); + strcat(dsta, tmp); + } + if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_DEPTH) { + strcat(dsta, "Z"); + } + + fprintf(stderr, + "%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n" + " w: %3s %3s %3s -> %-20s (%08x)\n", i, + srcc[0], srcc[1], srcc[2], dstc, + code->alu.inst[i].rgb_addr, srca[0], srca[1], + srca[2], dsta, code->alu.inst[i].alpha_addr); + + for (j = 0; j < 3; ++j) { + int regc = code->alu.inst[i].rgb_inst >> (j * 7); + int rega = code->alu.inst[i].alpha_inst >> (j * 7); + int d; + char buf[20]; + + d = regc & 31; + if (d < 12) { + switch (d % 4) { + case R300_ALU_ARGC_SRC0C_XYZ: + sprintf(buf, "%s.xyz", + srcc[d / 4]); + break; + case R300_ALU_ARGC_SRC0C_XXX: + sprintf(buf, "%s.xxx", + srcc[d / 4]); + break; + case R300_ALU_ARGC_SRC0C_YYY: + sprintf(buf, "%s.yyy", + srcc[d / 4]); + break; + case R300_ALU_ARGC_SRC0C_ZZZ: + sprintf(buf, "%s.zzz", + srcc[d / 4]); + break; + } + } else if (d < 15) { + sprintf(buf, "%s.www", srca[d - 12]); + } else if (d == 20) { + sprintf(buf, "0.0"); + } else if (d == 21) { + sprintf(buf, "1.0"); + } else if (d == 22) { + sprintf(buf, "0.5"); + } else if (d >= 23 && d < 32) { + d -= 23; + switch (d / 3) { + case 0: + sprintf(buf, "%s.yzx", + srcc[d % 3]); + break; + case 1: + sprintf(buf, "%s.zxy", + srcc[d % 3]); + break; + case 2: + sprintf(buf, "%s.Wzy", + srcc[d % 3]); + break; + } + } else { + sprintf(buf, "%i", d); + } + + sprintf(argc[j], "%s%s%s%s", + (regc & 32) ? "-" : "", + (regc & 64) ? "|" : "", + buf, (regc & 64) ? "|" : ""); + + d = rega & 31; + if (d < 9) { + sprintf(buf, "%s.%c", srcc[d / 3], + 'x' + (char)(d % 3)); + } else if (d < 12) { + sprintf(buf, "%s.w", srca[d - 9]); + } else if (d == 16) { + sprintf(buf, "0.0"); + } else if (d == 17) { + sprintf(buf, "1.0"); + } else if (d == 18) { + sprintf(buf, "0.5"); + } else { + sprintf(buf, "%i", d); + } + + sprintf(arga[j], "%s%s%s%s", + (rega & 32) ? "-" : "", + (rega & 64) ? "|" : "", + buf, (rega & 64) ? "|" : ""); + } + + fprintf(stderr, " xyz: %8s %8s %8s op: %08x\n" + " w: %8s %8s %8s op: %08x\n", + argc[0], argc[1], argc[2], + code->alu.inst[i].rgb_inst, arga[0], arga[1], + arga[2], code->alu.inst[i].alpha_inst); + } + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h new file mode 100644 index 0000000000..0ac46dbd9c --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h @@ -0,0 +1,49 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/* + * Authors: + * Ben Skeggs <darktama@iinet.net.au> + * Jerome Glisse <j.glisse@gmail.com> + */ +#ifndef __R300_FRAGPROG_H_ +#define __R300_FRAGPROG_H_ + +#include "shader/program.h" +#include "shader/prog_instruction.h" + +#include "radeon_compiler.h" +#include "radeon_program.h" + + +extern void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler); + +extern void r300FragmentProgramDump(struct rX00_fragment_program_code *c); + +extern GLboolean r300_transform_TEX(struct radeon_compiler * c, struct rc_instruction* inst, void* data); + +#endif diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c new file mode 100644 index 0000000000..305dc074ee --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c @@ -0,0 +1,360 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + * + * Emit the r300_fragment_program_code that can be understood by the hardware. + * Input is a pre-transformed radeon_program. + * + * \author Ben Skeggs <darktama@iinet.net.au> + * + * \author Jerome Glisse <j.glisse@gmail.com> + * + * \todo FogOption + */ + +#include "r300_fragprog.h" + +#include "../r300_reg.h" + +#include "radeon_program_pair.h" +#include "r300_fragprog_swizzle.h" + + +struct r300_emit_state { + struct r300_fragment_program_compiler * compiler; + + unsigned current_node : 2; + unsigned node_first_tex : 8; + unsigned node_first_alu : 8; + uint32_t node_flags; +}; + +#define PROG_CODE \ + struct r300_emit_state * emit = (struct r300_emit_state*)data; \ + struct r300_fragment_program_compiler *c = emit->compiler; \ + struct r300_fragment_program_code *code = &c->code->code.r300 + +#define error(fmt, args...) do { \ + rc_error(&c->Base, "%s::%s(): " fmt "\n", \ + __FILE__, __FUNCTION__, ##args); \ + } while(0) + + +/** + * Mark a temporary register as used. + */ +static void use_temporary(struct r300_fragment_program_code *code, GLuint index) +{ + if (index > code->pixsize) + code->pixsize = index; +} + + +static GLuint translate_rgb_opcode(struct r300_fragment_program_compiler * c, GLuint opcode) +{ + switch(opcode) { + case OPCODE_CMP: return R300_ALU_OUTC_CMP; + case OPCODE_DP3: return R300_ALU_OUTC_DP3; + case OPCODE_DP4: return R300_ALU_OUTC_DP4; + case OPCODE_FRC: return R300_ALU_OUTC_FRC; + default: + error("translate_rgb_opcode(%i): Unknown opcode", opcode); + /* fall through */ + case OPCODE_NOP: + /* fall through */ + case OPCODE_MAD: return R300_ALU_OUTC_MAD; + case OPCODE_MAX: return R300_ALU_OUTC_MAX; + case OPCODE_MIN: return R300_ALU_OUTC_MIN; + case OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA; + } +} + +static GLuint translate_alpha_opcode(struct r300_fragment_program_compiler * c, GLuint opcode) +{ + switch(opcode) { + case OPCODE_CMP: return R300_ALU_OUTA_CMP; + case OPCODE_DP3: return R300_ALU_OUTA_DP4; + case OPCODE_DP4: return R300_ALU_OUTA_DP4; + case OPCODE_EX2: return R300_ALU_OUTA_EX2; + case OPCODE_FRC: return R300_ALU_OUTA_FRC; + case OPCODE_LG2: return R300_ALU_OUTA_LG2; + default: + error("translate_rgb_opcode(%i): Unknown opcode", opcode); + /* fall through */ + case OPCODE_NOP: + /* fall through */ + case OPCODE_MAD: return R300_ALU_OUTA_MAD; + case OPCODE_MAX: return R300_ALU_OUTA_MAX; + case OPCODE_MIN: return R300_ALU_OUTA_MIN; + case OPCODE_RCP: return R300_ALU_OUTA_RCP; + case OPCODE_RSQ: return R300_ALU_OUTA_RSQ; + } +} + +/** + * Emit one paired ALU instruction. + */ +static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst) +{ + PROG_CODE; + + if (code->alu.length >= R300_PFS_MAX_ALU_INST) { + error("Too many ALU instructions"); + return GL_FALSE; + } + + int ip = code->alu.length++; + int j; + + code->alu.inst[ip].rgb_inst = translate_rgb_opcode(c, inst->RGB.Opcode); + code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode); + + for(j = 0; j < 3; ++j) { + GLuint src = inst->RGB.Src[j].Index | (inst->RGB.Src[j].Constant << 5); + if (!inst->RGB.Src[j].Constant) + use_temporary(code, inst->RGB.Src[j].Index); + code->alu.inst[ip].rgb_addr |= src << (6*j); + + src = inst->Alpha.Src[j].Index | (inst->Alpha.Src[j].Constant << 5); + if (!inst->Alpha.Src[j].Constant) + use_temporary(code, inst->Alpha.Src[j].Index); + code->alu.inst[ip].alpha_addr |= src << (6*j); + + GLuint arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle); + arg |= inst->RGB.Arg[j].Abs << 6; + arg |= inst->RGB.Arg[j].Negate << 5; + code->alu.inst[ip].rgb_inst |= arg << (7*j); + + arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle); + arg |= inst->Alpha.Arg[j].Abs << 6; + arg |= inst->Alpha.Arg[j].Negate << 5; + code->alu.inst[ip].alpha_inst |= arg << (7*j); + } + + if (inst->RGB.Saturate) + code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP; + if (inst->Alpha.Saturate) + code->alu.inst[ip].alpha_inst |= R300_ALU_OUTA_CLAMP; + + if (inst->RGB.WriteMask) { + use_temporary(code, inst->RGB.DestIndex); + code->alu.inst[ip].rgb_addr |= + (inst->RGB.DestIndex << R300_ALU_DSTC_SHIFT) | + (inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT); + } + if (inst->RGB.OutputWriteMask) { + code->alu.inst[ip].rgb_addr |= (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT); + emit->node_flags |= R300_RGBA_OUT; + } + + if (inst->Alpha.WriteMask) { + use_temporary(code, inst->Alpha.DestIndex); + code->alu.inst[ip].alpha_addr |= + (inst->Alpha.DestIndex << R300_ALU_DSTA_SHIFT) | + R300_ALU_DSTA_REG; + } + if (inst->Alpha.OutputWriteMask) { + code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_OUTPUT; + emit->node_flags |= R300_RGBA_OUT; + } + if (inst->Alpha.DepthWriteMask) { + code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH; + emit->node_flags |= R300_W_OUT; + c->code->writes_depth = GL_TRUE; + } + + return GL_TRUE; +} + + +/** + * Finish the current node without advancing to the next one. + */ +static GLboolean finish_node(struct r300_emit_state * emit) +{ + struct r300_fragment_program_compiler * c = emit->compiler; + struct r300_fragment_program_code *code = &emit->compiler->code->code.r300; + + if (code->alu.length == emit->node_first_alu) { + /* Generate a single NOP for this node */ + struct radeon_pair_instruction inst; + _mesa_bzero(&inst, sizeof(inst)); + if (!emit_alu(emit, &inst)) + return GL_FALSE; + } + + unsigned alu_offset = emit->node_first_alu; + unsigned alu_end = code->alu.length - alu_offset - 1; + unsigned tex_offset = emit->node_first_tex; + unsigned tex_end = code->tex.length - tex_offset - 1; + + if (code->tex.length == emit->node_first_tex) { + if (emit->current_node > 0) { + error("Node %i has no TEX instructions", emit->current_node); + return GL_FALSE; + } + + tex_end = 0; + } else { + if (emit->current_node == 0) + code->config |= R300_PFS_CNTL_FIRST_NODE_HAS_TEX; + } + + /* Write the config register. + * Note: The order in which the words for each node are written + * is not correct here and needs to be fixed up once we're entirely + * done + * + * Also note that the register specification from AMD is slightly + * incorrect in its description of this register. */ + code->code_addr[emit->current_node] = + (alu_offset << R300_ALU_START_SHIFT) | + (alu_end << R300_ALU_SIZE_SHIFT) | + (tex_offset << R300_TEX_START_SHIFT) | + (tex_end << R300_TEX_SIZE_SHIFT) | + emit->node_flags; + + return GL_TRUE; +} + + +/** + * Begin a block of texture instructions. + * Create the necessary indirection. + */ +static GLboolean begin_tex(void* data) +{ + PROG_CODE; + + if (code->alu.length == emit->node_first_alu && + code->tex.length == emit->node_first_tex) { + return GL_TRUE; + } + + if (emit->current_node == 3) { + error("Too many texture indirections"); + return GL_FALSE; + } + + if (!finish_node(emit)) + return GL_FALSE; + + emit->current_node++; + emit->node_first_tex = code->tex.length; + emit->node_first_alu = code->alu.length; + emit->node_flags = 0; + return GL_TRUE; +} + + +static GLboolean emit_tex(void* data, struct radeon_pair_texture_instruction* inst) +{ + PROG_CODE; + + if (code->tex.length >= R300_PFS_MAX_TEX_INST) { + error("Too many TEX instructions"); + return GL_FALSE; + } + + GLuint unit = inst->TexSrcUnit; + GLuint dest = inst->DestIndex; + GLuint opcode; + + switch(inst->Opcode) { + case RADEON_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break; + case RADEON_OPCODE_TEX: opcode = R300_TEX_OP_LD; break; + case RADEON_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break; + case RADEON_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break; + default: + error("Unknown texture opcode %i", inst->Opcode); + return GL_FALSE; + } + + if (inst->Opcode == RADEON_OPCODE_KIL) { + unit = 0; + dest = 0; + } else { + use_temporary(code, dest); + } + + use_temporary(code, inst->SrcIndex); + + code->tex.inst[code->tex.length++] = + (inst->SrcIndex << R300_SRC_ADDR_SHIFT) | + (dest << R300_DST_ADDR_SHIFT) | + (unit << R300_TEX_ID_SHIFT) | + (opcode << R300_TEX_INST_SHIFT); + return GL_TRUE; +} + + +static const struct radeon_pair_handler pair_handler = { + .EmitPaired = &emit_alu, + .EmitTex = &emit_tex, + .BeginTexBlock = &begin_tex, + .MaxHwTemps = R300_PFS_NUM_TEMP_REGS +}; + +/** + * Final compilation step: Turn the intermediate radeon_program into + * machine-readable instructions. + */ +void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler) +{ + struct r300_emit_state emit; + struct r300_fragment_program_code *code = &compiler->code->code.r300; + + memset(&emit, 0, sizeof(emit)); + emit.compiler = compiler; + + _mesa_bzero(code, sizeof(struct r300_fragment_program_code)); + + radeonPairProgram(compiler, &pair_handler, &emit); + if (compiler->Base.Error) + return; + + /* Finish the program */ + finish_node(&emit); + + code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */ + code->code_offset = + (0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) | + ((code->alu.length-1) << R300_PFS_CNTL_ALU_END_SHIFT) | + (0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) | + ((code->tex.length ? code->tex.length-1 : 0) << R300_PFS_CNTL_TEX_END_SHIFT); + + if (emit.current_node < 3) { + int shift = 3 - emit.current_node; + int i; + for(i = 0; i <= emit.current_node; ++i) + code->code_addr[shift + i] = code->code_addr[i]; + for(i = 0; i < shift; ++i) + code->code_addr[i] = 0; + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c new file mode 100644 index 0000000000..1b14cc3888 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c @@ -0,0 +1,223 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * @file + * Utilities to deal with the somewhat odd restriction on R300 fragment + * program swizzles. + */ + +#include "r300_fragprog_swizzle.h" + +#include "../r300_reg.h" +#include "radeon_nqssadce.h" +#include "radeon_compiler.h" + +#define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, SWIZZLE_##y, SWIZZLE_##z, SWIZZLE_ZERO)) + +struct swizzle_data { + GLuint hash; /**< swizzle value this matches */ + GLuint base; /**< base value for hw swizzle */ + GLuint stride; /**< difference in base between arg0/1/2 */ +}; + +static const struct swizzle_data native_swizzles[] = { + {MAKE_SWZ3(X, Y, Z), R300_ALU_ARGC_SRC0C_XYZ, 4}, + {MAKE_SWZ3(X, X, X), R300_ALU_ARGC_SRC0C_XXX, 4}, + {MAKE_SWZ3(Y, Y, Y), R300_ALU_ARGC_SRC0C_YYY, 4}, + {MAKE_SWZ3(Z, Z, Z), R300_ALU_ARGC_SRC0C_ZZZ, 4}, + {MAKE_SWZ3(W, W, W), R300_ALU_ARGC_SRC0A, 1}, + {MAKE_SWZ3(Y, Z, X), R300_ALU_ARGC_SRC0C_YZX, 1}, + {MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1}, + {MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1}, + {MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0}, + {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0} +}; + +static const int num_native_swizzles = sizeof(native_swizzles)/sizeof(native_swizzles[0]); + + +/** + * Find a native RGB swizzle that matches the given swizzle. + * Returns 0 if none found. + */ +static const struct swizzle_data* lookup_native_swizzle(GLuint swizzle) +{ + int i, comp; + + for(i = 0; i < num_native_swizzles; ++i) { + const struct swizzle_data* sd = &native_swizzles[i]; + for(comp = 0; comp < 3; ++comp) { + GLuint swz = GET_SWZ(swizzle, comp); + if (swz == SWIZZLE_NIL) + continue; + if (swz != GET_SWZ(sd->hash, comp)) + break; + } + if (comp == 3) + return sd; + } + + return 0; +} + + +/** + * Check whether the given instruction supports the swizzle and negate + * combinations in the given source register. + */ +GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg) +{ + if (reg.Abs) + reg.Negate = NEGATE_NONE; + + if (opcode == OPCODE_KIL || + opcode == OPCODE_TEX || + opcode == OPCODE_TXB || + opcode == OPCODE_TXP) { + int j; + + if (reg.Abs || reg.Negate) + return GL_FALSE; + + for(j = 0; j < 4; ++j) { + GLuint swz = GET_SWZ(reg.Swizzle, j); + if (swz == SWIZZLE_NIL) + continue; + if (swz != j) + return GL_FALSE; + } + + return GL_TRUE; + } + + GLuint relevant = 0; + int j; + + for(j = 0; j < 3; ++j) + if (GET_SWZ(reg.Swizzle, j) != SWIZZLE_NIL) + relevant |= 1 << j; + + if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant)) + return GL_FALSE; + + if (!lookup_native_swizzle(reg.Swizzle)) + return GL_FALSE; + + return GL_TRUE; +} + + +/** + * Generate MOV dst, src using only native swizzles. + */ +void r300FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src) +{ + if (src.Abs) + src.Negate = NEGATE_NONE; + + while(dst.WriteMask) { + const struct swizzle_data *best_swizzle = 0; + GLuint best_matchcount = 0; + GLuint best_matchmask = 0; + int i, comp; + + for(i = 0; i < num_native_swizzles; ++i) { + const struct swizzle_data *sd = &native_swizzles[i]; + GLuint matchcount = 0; + GLuint matchmask = 0; + for(comp = 0; comp < 3; ++comp) { + if (!GET_BIT(dst.WriteMask, comp)) + continue; + GLuint swz = GET_SWZ(src.Swizzle, comp); + if (swz == SWIZZLE_NIL) + continue; + if (swz == GET_SWZ(sd->hash, comp)) { + /* check if the negate bit of current component + * is the same for already matched components */ + if (matchmask && (!!(src.Negate & matchmask) != !!(src.Negate & (1 << comp)))) + continue; + + matchcount++; + matchmask |= 1 << comp; + } + } + if (matchcount > best_matchcount) { + best_swizzle = sd; + best_matchcount = matchcount; + best_matchmask = matchmask; + if (matchmask == (dst.WriteMask & WRITEMASK_XYZ)) + break; + } + } + + struct rc_instruction *inst = rc_insert_new_instruction(s->Compiler, s->IP->Prev); + inst->I.Opcode = OPCODE_MOV; + inst->I.DstReg = dst; + inst->I.DstReg.WriteMask &= (best_matchmask | WRITEMASK_W); + inst->I.SrcReg[0] = src; + inst->I.SrcReg[0].Negate = (best_matchmask & src.Negate) ? NEGATE_XYZW : NEGATE_NONE; + /* Note: We rely on NqSSA/DCE to set unused swizzle components to NIL */ + + dst.WriteMask &= ~inst->I.DstReg.WriteMask; + } +} + + +/** + * Translate an RGB (XYZ) swizzle into the hardware code for the given + * instruction source. + */ +GLuint r300FPTranslateRGBSwizzle(GLuint src, GLuint swizzle) +{ + const struct swizzle_data* sd = lookup_native_swizzle(swizzle); + + if (!sd) { + _mesa_printf("Not a native swizzle: %08x\n", swizzle); + return 0; + } + + return sd->base + src*sd->stride; +} + + +/** + * Translate an Alpha (W) swizzle into the hardware code for the given + * instruction source. + */ +GLuint r300FPTranslateAlphaSwizzle(GLuint src, GLuint swizzle) +{ + if (swizzle < 3) + return swizzle + 3*src; + + switch(swizzle) { + case SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src; + case SWIZZLE_ONE: return R300_ALU_ARGA_ONE; + case SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO; + default: return R300_ALU_ARGA_ONE; + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h new file mode 100644 index 0000000000..231bf4eef5 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __R300_FRAGPROG_SWIZZLE_H_ +#define __R300_FRAGPROG_SWIZZLE_H_ + +#include "main/glheader.h" +#include "shader/prog_instruction.h" + +struct nqssadce_state; + +GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg); +void r300FPBuildSwizzle(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src); + +GLuint r300FPTranslateRGBSwizzle(GLuint src, GLuint swizzle); +GLuint r300FPTranslateAlphaSwizzle(GLuint src, GLuint swizzle); + +#endif /* __R300_FRAGPROG_SWIZZLE_H_ */ diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c new file mode 100644 index 0000000000..d39b82be71 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -0,0 +1,149 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_compiler.h" + +#include "shader/prog_parameter.h" +#include "shader/prog_print.h" +#include "shader/prog_statevars.h" + +#include "radeon_nqssadce.h" +#include "radeon_program_alu.h" +#include "r300_fragprog.h" +#include "r300_fragprog_swizzle.h" +#include "r500_fragprog.h" + + +static void nqssadce_init(struct nqssadce_state* s) +{ + struct r300_fragment_program_compiler * c = s->UserData; + s->Outputs[c->OutputColor].Sourced = WRITEMASK_XYZW; + s->Outputs[c->OutputDepth].Sourced = WRITEMASK_W; +} + +static void rewrite_depth_out(struct r300_fragment_program_compiler * c) +{ + struct rc_instruction *rci; + + for (rci = c->Base.Program.Instructions.Next; rci != &c->Base.Program.Instructions; rci = rci->Next) { + struct prog_instruction * inst = &rci->I; + + if (inst->DstReg.File != PROGRAM_OUTPUT || inst->DstReg.Index != c->OutputDepth) + continue; + + if (inst->DstReg.WriteMask & WRITEMASK_Z) { + inst->DstReg.WriteMask = WRITEMASK_W; + } else { + inst->DstReg.WriteMask = 0; + continue; + } + + switch (inst->Opcode) { + case OPCODE_FRC: + case OPCODE_MOV: + inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); + break; + case OPCODE_ADD: + case OPCODE_MAX: + case OPCODE_MIN: + case OPCODE_MUL: + inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); + inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]); + break; + case OPCODE_CMP: + case OPCODE_MAD: + inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); + inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]); + inst->SrcReg[2] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[2]); + break; + default: + // Scalar instructions needn't be reswizzled + break; + } + } +} + +void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) +{ + rewrite_depth_out(c); + + if (c->is_r500) { + struct radeon_program_transformation transformations[] = { + { &r500_transform_TEX, c }, + { &radeonTransformALU, 0 }, + { &radeonTransformDeriv, 0 }, + { &radeonTransformTrigScale, 0 } + }; + radeonLocalTransform(&c->Base, 4, transformations); + } else { + struct radeon_program_transformation transformations[] = { + { &r300_transform_TEX, c }, + { &radeonTransformALU, 0 }, + { &radeonTransformTrigSimple, 0 } + }; + radeonLocalTransform(&c->Base, 3, transformations); + } + + if (c->Base.Debug) { + _mesa_printf("Fragment Program: After native rewrite:\n"); + rc_print_program(&c->Base.Program); + fflush(stdout); + } + + if (c->is_r500) { + struct radeon_nqssadce_descr nqssadce = { + .Init = &nqssadce_init, + .IsNativeSwizzle = &r500FPIsNativeSwizzle, + .BuildSwizzle = &r500FPBuildSwizzle + }; + radeonNqssaDce(&c->Base, &nqssadce, c); + } else { + struct radeon_nqssadce_descr nqssadce = { + .Init = &nqssadce_init, + .IsNativeSwizzle = &r300FPIsNativeSwizzle, + .BuildSwizzle = &r300FPBuildSwizzle + }; + radeonNqssaDce(&c->Base, &nqssadce, c); + } + + if (c->Base.Debug) { + _mesa_printf("Compiler: after NqSSA-DCE:\n"); + rc_print_program(&c->Base.Program); + fflush(stdout); + } + + if (c->is_r500) { + r500BuildFragmentProgramHwCode(c); + } else { + r300BuildFragmentProgramHwCode(c); + } + + rc_constants_copy(&c->code->constants, &c->Base.Program.Constants); + + if (c->Base.Debug) { + if (c->is_r500) { + r500FragmentProgramDump(c->code); + } else { + r300FragmentProgramDump(c->code); + } + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c new file mode 100644 index 0000000000..fc9c8f805a --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -0,0 +1,615 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_compiler.h" + +#include "../r300_reg.h" + +#include "radeon_nqssadce.h" +#include "radeon_program.h" +#include "radeon_program_alu.h" + +#include "shader/prog_print.h" + + +/* + * Take an already-setup and valid source then swizzle it appropriately to + * obtain a constant ZERO or ONE source. + */ +#define __CONST(x, y) \ + (PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[x]), \ + t_swizzle(y), \ + t_swizzle(y), \ + t_swizzle(y), \ + t_swizzle(y), \ + t_src_class(vpi->SrcReg[x].File), \ + NEGATE_NONE) | (vpi->SrcReg[x].RelAddr << 4)) + + +static unsigned long t_dst_mask(GLuint mask) +{ + /* WRITEMASK_* is equivalent to VSF_FLAG_* */ + return mask & WRITEMASK_XYZW; +} + +static unsigned long t_dst_class(gl_register_file file) +{ + + switch (file) { + case PROGRAM_TEMPORARY: + return PVS_DST_REG_TEMPORARY; + case PROGRAM_OUTPUT: + return PVS_DST_REG_OUT; + case PROGRAM_ADDRESS: + return PVS_DST_REG_A0; + /* + case PROGRAM_INPUT: + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_NAMED_PARAM: + case PROGRAM_STATE_VAR: + case PROGRAM_WRITE_ONLY: + case PROGRAM_ADDRESS: + */ + default: + fprintf(stderr, "problem in %s", __FUNCTION__); + _mesa_exit(-1); + return -1; + } +} + +static unsigned long t_dst_index(struct r300_vertex_program_code *vp, + struct prog_dst_register *dst) +{ + if (dst->File == PROGRAM_OUTPUT) + return vp->outputs[dst->Index]; + + return dst->Index; +} + +static unsigned long t_src_class(gl_register_file file) +{ + switch (file) { + case PROGRAM_TEMPORARY: + return PVS_SRC_REG_TEMPORARY; + case PROGRAM_INPUT: + return PVS_SRC_REG_INPUT; + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_NAMED_PARAM: + case PROGRAM_CONSTANT: + case PROGRAM_STATE_VAR: + return PVS_SRC_REG_CONSTANT; + /* + case PROGRAM_OUTPUT: + case PROGRAM_WRITE_ONLY: + case PROGRAM_ADDRESS: + */ + default: + fprintf(stderr, "problem in %s", __FUNCTION__); + _mesa_exit(-1); + return -1; + } +} + +static GLboolean t_src_conflict(struct prog_src_register a, struct prog_src_register b) +{ + unsigned long aclass = t_src_class(a.File); + unsigned long bclass = t_src_class(b.File); + + if (aclass != bclass) + return GL_FALSE; + if (aclass == PVS_SRC_REG_TEMPORARY) + return GL_FALSE; + + if (a.RelAddr || b.RelAddr) + return GL_TRUE; + if (a.Index != b.Index) + return GL_TRUE; + + return GL_FALSE; +} + +static INLINE unsigned long t_swizzle(GLubyte swizzle) +{ + /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */ + return swizzle; +} + +static unsigned long t_src_index(struct r300_vertex_program_code *vp, + struct prog_src_register *src) +{ + if (src->File == PROGRAM_INPUT) { + assert(vp->inputs[src->Index] != -1); + return vp->inputs[src->Index]; + } else { + if (src->Index < 0) { + fprintf(stderr, + "negative offsets for indirect addressing do not work.\n"); + return 0; + } + return src->Index; + } +} + +/* these two functions should probably be merged... */ + +static unsigned long t_src(struct r300_vertex_program_code *vp, + struct prog_src_register *src) +{ + /* src->Negate uses the NEGATE_ flags from program_instruction.h, + * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. + */ + return PVS_SRC_OPERAND(t_src_index(vp, src), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 1)), + t_swizzle(GET_SWZ(src->Swizzle, 2)), + t_swizzle(GET_SWZ(src->Swizzle, 3)), + t_src_class(src->File), + src->Negate) | (src->RelAddr << 4); +} + +static unsigned long t_src_scalar(struct r300_vertex_program_code *vp, + struct prog_src_register *src) +{ + /* src->Negate uses the NEGATE_ flags from program_instruction.h, + * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. + */ + return PVS_SRC_OPERAND(t_src_index(vp, src), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_src_class(src->File), + src->Negate ? NEGATE_XYZW : NEGATE_NONE) | + (src->RelAddr << 4); +} + +static GLboolean valid_dst(struct r300_vertex_program_code *vp, + struct prog_dst_register *dst) +{ + if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) { + return GL_FALSE; + } else if (dst->File == PROGRAM_ADDRESS) { + assert(dst->Index == 0); + } + + return GL_TRUE; +} + +static void ei_vector1(struct r300_vertex_program_code *vp, + GLuint hw_opcode, + struct prog_instruction *vpi, + GLuint * inst) +{ + inst[0] = PVS_OP_DST_OPERAND(hw_opcode, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &vpi->SrcReg[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); +} + +static void ei_vector2(struct r300_vertex_program_code *vp, + GLuint hw_opcode, + struct prog_instruction *vpi, + GLuint * inst) +{ + inst[0] = PVS_OP_DST_OPERAND(hw_opcode, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &vpi->SrcReg[0]); + inst[2] = t_src(vp, &vpi->SrcReg[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); +} + +static void ei_math1(struct r300_vertex_program_code *vp, + GLuint hw_opcode, + struct prog_instruction *vpi, + GLuint * inst) +{ + inst[0] = PVS_OP_DST_OPERAND(hw_opcode, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); +} + +static void ei_lit(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst) +{ + //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W} + + inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + /* NOTE: Users swizzling might not work. */ + inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W + PVS_SRC_SELECT_FORCE_0, // Z + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y + t_src_class(vpi->SrcReg[0].File), + vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + (vpi->SrcReg[0].RelAddr << 4); + inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W + PVS_SRC_SELECT_FORCE_0, // Z + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X + t_src_class(vpi->SrcReg[0].File), + vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + (vpi->SrcReg[0].RelAddr << 4); + inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X + PVS_SRC_SELECT_FORCE_0, // Z + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W + t_src_class(vpi->SrcReg[0].File), + vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + (vpi->SrcReg[0].RelAddr << 4); +} + +static void ei_mad(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst) +{ + inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD, + GL_FALSE, + GL_TRUE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &vpi->SrcReg[0]); + inst[2] = t_src(vp, &vpi->SrcReg[1]); + inst[3] = t_src(vp, &vpi->SrcReg[2]); +} + +static void ei_pow(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst) +{ + inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]); +} + + +static void translate_vertex_program(struct r300_vertex_program_compiler * compiler) +{ + struct rc_instruction *rci; + + compiler->code->pos_end = 0; /* Not supported yet */ + compiler->code->length = 0; + + compiler->SetHwInputOutput(compiler); + + for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) { + struct prog_instruction *vpi = &rci->I; + GLuint *inst = compiler->code->body.d + compiler->code->length; + + /* Skip instructions writing to non-existing destination */ + if (!valid_dst(compiler->code, &vpi->DstReg)) + continue; + + if (compiler->code->length >= VSF_MAX_FRAGMENT_LENGTH) { + rc_error(&compiler->Base, "Vertex program has too many instructions\n"); + return; + } + + switch (vpi->Opcode) { + case OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break; + case OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break; + case OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break; + case OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break; + case OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break; + case OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break; + case OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break; + case OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break; + case OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break; + case OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break; + case OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break; + case OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break; + case OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break; + case OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break; + case OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break; + case OPCODE_POW: ei_pow(compiler->code, vpi, inst); break; + case OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break; + case OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break; + case OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break; + case OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break; + default: + rc_error(&compiler->Base, "Unknown opcode %i\n", vpi->Opcode); + return; + } + + compiler->code->length += 4; + + if (compiler->Base.Error) + return; + } +} + +struct temporary_allocation { + GLuint Allocated:1; + GLuint HwTemp:15; + struct rc_instruction * LastRead; +}; + +static void allocate_temporary_registers(struct r300_vertex_program_compiler * compiler) +{ + struct rc_instruction *inst; + GLuint num_orig_temps = 0; + GLboolean hwtemps[VSF_MAX_FRAGMENT_TEMPS]; + struct temporary_allocation * ta; + GLuint i, j; + + compiler->code->num_temporaries = 0; + memset(hwtemps, 0, sizeof(hwtemps)); + + /* Pass 1: Count original temporaries and allocate structures */ + for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { + GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode); + GLuint numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode); + + for (i = 0; i < numsrcs; ++i) { + if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) { + if (inst->I.SrcReg[i].Index >= num_orig_temps) + num_orig_temps = inst->I.SrcReg[i].Index + 1; + } + } + + if (numdsts) { + if (inst->I.DstReg.File == PROGRAM_TEMPORARY) { + if (inst->I.DstReg.Index >= num_orig_temps) + num_orig_temps = inst->I.DstReg.Index + 1; + } + } + } + + ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool, + sizeof(struct temporary_allocation) * num_orig_temps); + memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps); + + /* Pass 2: Determine original temporary lifetimes */ + for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { + GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode); + + for (i = 0; i < numsrcs; ++i) { + if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) + ta[inst->I.SrcReg[i].Index].LastRead = inst; + } + } + + /* Pass 3: Register allocation */ + for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { + GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode); + GLuint numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode); + + for (i = 0; i < numsrcs; ++i) { + if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) { + GLuint orig = inst->I.SrcReg[i].Index; + inst->I.SrcReg[i].Index = ta[orig].HwTemp; + + if (ta[orig].Allocated && inst == ta[orig].LastRead) + hwtemps[ta[orig].HwTemp] = GL_FALSE; + } + } + + if (numdsts) { + if (inst->I.DstReg.File == PROGRAM_TEMPORARY) { + GLuint orig = inst->I.DstReg.Index; + + if (!ta[orig].Allocated) { + for(j = 0; j < VSF_MAX_FRAGMENT_TEMPS; ++j) { + if (!hwtemps[j]) + break; + } + if (j >= VSF_MAX_FRAGMENT_TEMPS) { + fprintf(stderr, "Out of hw temporaries\n"); + } else { + ta[orig].Allocated = GL_TRUE; + ta[orig].HwTemp = j; + hwtemps[j] = GL_TRUE; + + if (j >= compiler->code->num_temporaries) + compiler->code->num_temporaries = j + 1; + } + } + + inst->I.DstReg.Index = ta[orig].HwTemp; + } + } + } +} + + +/** + * Vertex engine cannot read two inputs or two constants at the same time. + * Introduce intermediate MOVs to temporary registers to account for this. + */ +static GLboolean transform_source_conflicts( + struct radeon_compiler *c, + struct rc_instruction* inst, + void* unused) +{ + GLuint num_operands = _mesa_num_inst_src_regs(inst->I.Opcode); + + if (num_operands == 3) { + if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[2]) + || t_src_conflict(inst->I.SrcReg[0], inst->I.SrcReg[2])) { + int tmpreg = rc_find_free_temporary(c); + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); + inst_mov->I.Opcode = OPCODE_MOV; + inst_mov->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mov->I.DstReg.Index = tmpreg; + inst_mov->I.SrcReg[0] = inst->I.SrcReg[2]; + + reset_srcreg(&inst->I.SrcReg[2]); + inst->I.SrcReg[2].File = PROGRAM_TEMPORARY; + inst->I.SrcReg[2].Index = tmpreg; + } + } + + if (num_operands >= 2) { + if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[0])) { + int tmpreg = rc_find_free_temporary(c); + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); + inst_mov->I.Opcode = OPCODE_MOV; + inst_mov->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mov->I.DstReg.Index = tmpreg; + inst_mov->I.SrcReg[0] = inst->I.SrcReg[1]; + + reset_srcreg(&inst->I.SrcReg[1]); + inst->I.SrcReg[1].File = PROGRAM_TEMPORARY; + inst->I.SrcReg[1].Index = tmpreg; + } + } + + return GL_TRUE; +} + +static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler) +{ + int i; + + for(i = 0; i < 32; ++i) { + if ((compiler->RequiredOutputs & (1 << i)) && + !(compiler->Base.Program.OutputsWritten & (1 << i))) { + struct rc_instruction * inst = rc_insert_new_instruction(&compiler->Base, compiler->Base.Program.Instructions.Prev); + inst->I.Opcode = OPCODE_MOV; + + inst->I.DstReg.File = PROGRAM_OUTPUT; + inst->I.DstReg.Index = i; + inst->I.DstReg.WriteMask = WRITEMASK_XYZW; + + inst->I.SrcReg[0].File = PROGRAM_CONSTANT; + inst->I.SrcReg[0].Index = 0; + inst->I.SrcReg[0].Swizzle = SWIZZLE_XYZW; + + compiler->Base.Program.OutputsWritten |= 1 << i; + } + } +} + +static void nqssadceInit(struct nqssadce_state* s) +{ + struct r300_vertex_program_compiler * compiler = s->UserData; + int i; + + for(i = 0; i < VERT_RESULT_MAX; ++i) { + if (compiler->RequiredOutputs & (1 << i)) + s->Outputs[i].Sourced = WRITEMASK_XYZW; + } +} + +static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg) +{ + (void) opcode; + (void) reg; + + return GL_TRUE; +} + + + +void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) +{ + addArtificialOutputs(compiler); + + { + struct radeon_program_transformation transformations[] = { + { &r300_transform_vertex_alu, 0 }, + }; + radeonLocalTransform(&compiler->Base, 1, transformations); + } + + if (compiler->Base.Debug) { + fprintf(stderr, "Vertex program after native rewrite:\n"); + rc_print_program(&compiler->Base.Program); + fflush(stdout); + } + + { + /* Note: This pass has to be done seperately from ALU rewrite, + * otherwise non-native ALU instructions with source conflits + * will not be treated properly. + */ + struct radeon_program_transformation transformations[] = { + { &transform_source_conflicts, 0 }, + }; + radeonLocalTransform(&compiler->Base, 1, transformations); + } + + if (compiler->Base.Debug) { + fprintf(stderr, "Vertex program after source conflict resolve:\n"); + rc_print_program(&compiler->Base.Program); + fflush(stdout); + } + + { + struct radeon_nqssadce_descr nqssadce = { + .Init = &nqssadceInit, + .IsNativeSwizzle = &swizzleIsNative, + .BuildSwizzle = NULL + }; + radeonNqssaDce(&compiler->Base, &nqssadce, compiler); + + /* We need this step for reusing temporary registers */ + allocate_temporary_registers(compiler); + + if (compiler->Base.Debug) { + fprintf(stderr, "Vertex program after NQSSADCE:\n"); + rc_print_program(&compiler->Base.Program); + fflush(stdout); + } + } + + translate_vertex_program(compiler); + + rc_constants_copy(&compiler->code->constants, &compiler->Base.Program.Constants); + + compiler->code->InputsRead = compiler->Base.Program.InputsRead; + compiler->code->OutputsWritten = compiler->Base.Program.OutputsWritten; + + if (compiler->Base.Debug) { + printf("Final vertex program code:\n"); + r300_vertex_program_dump(compiler->code); + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c new file mode 100644 index 0000000000..39cc6953ba --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c @@ -0,0 +1,177 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_code.h" + +#include <stdio.h> + +static char* r300_vs_ve_ops[] = { + /* R300 vector ops */ + " VE_NO_OP", + " VE_DOT_PRODUCT", + " VE_MULTIPLY", + " VE_ADD", + " VE_MULTIPLY_ADD", + " VE_DISTANCE_FACTOR", + " VE_FRACTION", + " VE_MAXIMUM", + " VE_MINIMUM", + "VE_SET_GREATER_THAN_EQUAL", + " VE_SET_LESS_THAN", + " VE_MULTIPLYX2_ADD", + " VE_MULTIPLY_CLAMP", + " VE_FLT2FIX_DX", + " VE_FLT2FIX_DX_RND", + /* R500 vector ops */ + " VE_PRED_SET_EQ_PUSH", + " VE_PRED_SET_GT_PUSH", + " VE_PRED_SET_GTE_PUSH", + " VE_PRED_SET_NEQ_PUSH", + " VE_COND_WRITE_EQ", + " VE_COND_WRITE_GT", + " VE_COND_WRITE_GTE", + " VE_COND_WRITE_NEQ", + " VE_SET_GREATER_THAN", + " VE_SET_EQUAL", + " VE_SET_NOT_EQUAL", + " (reserved)", + " (reserved)", + " (reserved)", +}; + +static char* r300_vs_me_ops[] = { + /* R300 math ops */ + " ME_NO_OP", + " ME_EXP_BASE2_DX", + " ME_LOG_BASE2_DX", + " ME_EXP_BASEE_FF", + " ME_LIGHT_COEFF_DX", + " ME_POWER_FUNC_FF", + " ME_RECIP_DX", + " ME_RECIP_FF", + " ME_RECIP_SQRT_DX", + " ME_RECIP_SQRT_FF", + " ME_MULTIPLY", + " ME_EXP_BASE2_FULL_DX", + " ME_LOG_BASE2_FULL_DX", + " ME_POWER_FUNC_FF_CLAMP_B", + "ME_POWER_FUNC_FF_CLAMP_B1", + "ME_POWER_FUNC_FF_CLAMP_01", + " ME_SIN", + " ME_COS", + /* R500 math ops */ + " ME_LOG_BASE2_IEEE", + " ME_RECIP_IEEE", + " ME_RECIP_SQRT_IEEE", + " ME_PRED_SET_EQ", + " ME_PRED_SET_GT", + " ME_PRED_SET_GTE", + " ME_PRED_SET_NEQ", + " ME_PRED_SET_CLR", + " ME_PRED_SET_INV", + " ME_PRED_SET_POP", + " ME_PRED_SET_RESTORE", + " (reserved)", + " (reserved)", + " (reserved)", +}; + +/* XXX refactor to avoid clashing symbols */ +static char* r300_vs_src_debug[] = { + "t", + "i", + "c", + "a", +}; + +static char* r300_vs_dst_debug[] = { + "t", + "a0", + "o", + "ox", + "a", + "i", + "u", + "u", +}; + +static char* r300_vs_swiz_debug[] = { + "X", + "Y", + "Z", + "W", + "0", + "1", + "U", + "U", +}; + + +static void r300_vs_op_dump(uint32_t op) +{ + printf(" dst: %d%s op: ", + (op >> 13) & 0x7f, r300_vs_dst_debug[(op >> 8) & 0x7]); + if (op & 0x80) { + if (op & 0x1) { + printf("PVS_MACRO_OP_2CLK_M2X_ADD\n"); + } else { + printf(" PVS_MACRO_OP_2CLK_MADD\n"); + } + } else if (op & 0x40) { + printf("%s\n", r300_vs_me_ops[op & 0x1f]); + } else { + printf("%s\n", r300_vs_ve_ops[op & 0x1f]); + } +} + +static void r300_vs_src_dump(uint32_t src) +{ + printf(" reg: %d%s swiz: %s%s/%s%s/%s%s/%s%s\n", + (src >> 5) & 0x7f, r300_vs_src_debug[src & 0x3], + src & (1 << 25) ? "-" : " ", + r300_vs_swiz_debug[(src >> 13) & 0x7], + src & (1 << 26) ? "-" : " ", + r300_vs_swiz_debug[(src >> 16) & 0x7], + src & (1 << 27) ? "-" : " ", + r300_vs_swiz_debug[(src >> 19) & 0x7], + src & (1 << 28) ? "-" : " ", + r300_vs_swiz_debug[(src >> 22) & 0x7]); +} + +void r300_vertex_program_dump(struct r300_vertex_program_code * vs) +{ + unsigned instrcount = vs->length / 4; + unsigned i; + + for(i = 0; i < instrcount; i++) { + unsigned offset = i*4; + unsigned src; + + printf("%d: op: 0x%08x", i, vs->body.d[offset]); + r300_vs_op_dump(vs->body.d[offset]); + + for(src = 0; src < 3; ++src) { + printf(" src%i: 0x%08x", src, vs->body.d[offset+1+src]); + r300_vs_src_dump(vs->body.d[offset+1+src]); + } + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c new file mode 100644 index 0000000000..7e2faed690 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c @@ -0,0 +1,449 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "r500_fragprog.h" + +#include "../r300_reg.h" + +static struct prog_src_register shadow_ambient(struct radeon_compiler * c, int tmu) +{ + struct prog_src_register reg = { 0, }; + + reg.File = PROGRAM_STATE_VAR; + reg.Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_SHADOW_AMBIENT, tmu); + reg.Swizzle = SWIZZLE_WWWW; + return reg; +} + +/** + * Transform TEX, TXP, TXB, and KIL instructions in the following way: + * - implement texture compare (shadow extensions) + * - extract non-native source / destination operands + */ +GLboolean r500_transform_TEX( + struct radeon_compiler * c, + struct rc_instruction * inst, + void* data) +{ + struct r300_fragment_program_compiler *compiler = + (struct r300_fragment_program_compiler*)data; + + if (inst->I.Opcode != OPCODE_TEX && + inst->I.Opcode != OPCODE_TXB && + inst->I.Opcode != OPCODE_TXP && + inst->I.Opcode != OPCODE_KIL) + return GL_FALSE; + + /* ARB_shadow & EXT_shadow_funcs */ + if (inst->I.Opcode != OPCODE_KIL && + c->Program.ShadowSamplers & (1 << inst->I.TexSrcUnit)) { + GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func; + + if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) { + inst->I.Opcode = OPCODE_MOV; + + if (comparefunc == GL_ALWAYS) { + inst->I.SrcReg[0].File = PROGRAM_BUILTIN; + inst->I.SrcReg[0].Swizzle = SWIZZLE_1111; + } else { + inst->I.SrcReg[0] = shadow_ambient(c, inst->I.TexSrcUnit); + } + + return GL_TRUE; + } else { + GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func; + GLuint depthmode = compiler->state.unit[inst->I.TexSrcUnit].depth_texture_mode; + struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, inst); + struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_rcp); + struct rc_instruction * inst_cmp = rc_insert_new_instruction(c, inst_mad); + int pass, fail; + + inst_rcp->I.Opcode = OPCODE_RCP; + inst_rcp->I.DstReg.File = PROGRAM_TEMPORARY; + inst_rcp->I.DstReg.Index = rc_find_free_temporary(c); + inst_rcp->I.DstReg.WriteMask = WRITEMASK_W; + inst_rcp->I.SrcReg[0] = inst->I.SrcReg[0]; + inst_rcp->I.SrcReg[0].Swizzle = SWIZZLE_WWWW; + + inst_cmp->I.DstReg = inst->I.DstReg; + inst->I.DstReg.File = PROGRAM_TEMPORARY; + inst->I.DstReg.Index = rc_find_free_temporary(c); + inst->I.DstReg.WriteMask = WRITEMASK_XYZW; + + inst_mad->I.Opcode = OPCODE_MAD; + inst_mad->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mad->I.DstReg.Index = rc_find_free_temporary(c); + inst_mad->I.SrcReg[0] = inst->I.SrcReg[0]; + inst_mad->I.SrcReg[0].Swizzle = SWIZZLE_ZZZZ; + inst_mad->I.SrcReg[1].File = PROGRAM_TEMPORARY; + inst_mad->I.SrcReg[1].Index = inst_rcp->I.DstReg.Index; + inst_mad->I.SrcReg[1].Swizzle = SWIZZLE_WWWW; + inst_mad->I.SrcReg[2].File = PROGRAM_TEMPORARY; + inst_mad->I.SrcReg[2].Index = inst->I.DstReg.Index; + if (depthmode == 0) /* GL_LUMINANCE */ + inst_mad->I.SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z); + else if (depthmode == 2) /* GL_ALPHA */ + inst_mad->I.SrcReg[2].Swizzle = SWIZZLE_WWWW; + + /* Recall that SrcReg[0] is tex, SrcReg[2] is r and: + * r < tex <=> -tex+r < 0 + * r >= tex <=> not (-tex+r < 0 */ + if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL) + inst_mad->I.SrcReg[2].Negate = inst_mad->I.SrcReg[2].Negate ^ NEGATE_XYZW; + else + inst_mad->I.SrcReg[0].Negate = inst_mad->I.SrcReg[0].Negate ^ NEGATE_XYZW; + + inst_cmp->I.Opcode = OPCODE_CMP; + /* DstReg has been filled out above */ + inst_cmp->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst_cmp->I.SrcReg[0].Index = inst_mad->I.DstReg.Index; + + if (comparefunc == GL_LESS || comparefunc == GL_GREATER) { + pass = 1; + fail = 2; + } else { + pass = 2; + fail = 1; + } + + inst_cmp->I.SrcReg[pass].File = PROGRAM_BUILTIN; + inst_cmp->I.SrcReg[pass].Swizzle = SWIZZLE_1111; + inst_cmp->I.SrcReg[fail] = shadow_ambient(c, inst->I.TexSrcUnit); + } + } + + /* Cannot write texture to output registers */ + if (inst->I.Opcode != OPCODE_KIL && inst->I.DstReg.File != PROGRAM_TEMPORARY) { + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst); + + inst_mov->I.Opcode = OPCODE_MOV; + inst_mov->I.DstReg = inst->I.DstReg; + inst_mov->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst_mov->I.SrcReg[0].Index = rc_find_free_temporary(c); + + inst->I.DstReg.File = PROGRAM_TEMPORARY; + inst->I.DstReg.Index = inst_mov->I.SrcReg[0].Index; + inst->I.DstReg.WriteMask = WRITEMASK_XYZW; + } + + /* Cannot read texture coordinate from constants file */ + if (inst->I.SrcReg[0].File != PROGRAM_TEMPORARY && inst->I.SrcReg[0].File != PROGRAM_INPUT) { + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); + + inst_mov->I.Opcode = OPCODE_MOV; + inst_mov->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mov->I.DstReg.Index = rc_find_free_temporary(c); + inst_mov->I.SrcReg[0] = inst->I.SrcReg[0]; + + reset_srcreg(&inst->I.SrcReg[0]); + inst->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst->I.SrcReg[0].Index = inst_mov->I.DstReg.Index; + } + + return GL_TRUE; +} + +GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg) +{ + GLuint relevant; + int i; + + if (opcode == OPCODE_TEX || + opcode == OPCODE_TXB || + opcode == OPCODE_TXP || + opcode == OPCODE_KIL) { + if (reg.Abs) + return GL_FALSE; + + if (opcode == OPCODE_KIL && (reg.Swizzle != SWIZZLE_NOOP || reg.Negate != NEGATE_NONE)) + return GL_FALSE; + + if (reg.Negate) + reg.Negate ^= NEGATE_XYZW; + + for(i = 0; i < 4; ++i) { + GLuint swz = GET_SWZ(reg.Swizzle, i); + if (swz == SWIZZLE_NIL) { + reg.Negate &= ~(1 << i); + continue; + } + if (swz >= 4) + return GL_FALSE; + } + + if (reg.Negate) + return GL_FALSE; + + return GL_TRUE; + } else if (opcode == OPCODE_DDX || opcode == OPCODE_DDY) { + /* DDX/MDH and DDY/MDV explicitly ignore incoming swizzles; + * if it doesn't fit perfectly into a .xyzw case... */ + if (reg.Swizzle == SWIZZLE_NOOP && !reg.Abs && !reg.Negate) + return GL_TRUE; + + return GL_FALSE; + } else { + /* ALU instructions support almost everything */ + if (reg.Abs) + return GL_TRUE; + + relevant = 0; + for(i = 0; i < 3; ++i) { + GLuint swz = GET_SWZ(reg.Swizzle, i); + if (swz != SWIZZLE_NIL && swz != SWIZZLE_ZERO) + relevant |= 1 << i; + } + if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant)) + return GL_FALSE; + + return GL_TRUE; + } +} + +/** + * Implement a MOV with a potentially non-native swizzle. + * + * The only thing we *cannot* do in an ALU instruction is per-component + * negation. Therefore, we split the MOV into two instructions when necessary. + */ +void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src) +{ + GLuint negatebase[2] = { 0, 0 }; + int i; + + for(i = 0; i < 4; ++i) { + GLuint swz = GET_SWZ(src.Swizzle, i); + if (swz == SWIZZLE_NIL) + continue; + negatebase[GET_BIT(src.Negate, i)] |= 1 << i; + } + + for(i = 0; i <= 1; ++i) { + if (!negatebase[i]) + continue; + + struct rc_instruction *inst = rc_insert_new_instruction(s->Compiler, s->IP->Prev); + inst->I.Opcode = OPCODE_MOV; + inst->I.DstReg = dst; + inst->I.DstReg.WriteMask = negatebase[i]; + inst->I.SrcReg[0] = src; + inst->I.SrcReg[0].Negate = (i == 0) ? NEGATE_NONE : NEGATE_XYZW; + } +} + + +static char *toswiz(int swiz_val) { + switch(swiz_val) { + case 0: return "R"; + case 1: return "G"; + case 2: return "B"; + case 3: return "A"; + case 4: return "0"; + case 5: return "1/2"; + case 6: return "1"; + case 7: return "U"; + } + return NULL; +} + +static char *toop(int op_val) +{ + char *str = NULL; + switch (op_val) { + case 0: str = "MAD"; break; + case 1: str = "DP3"; break; + case 2: str = "DP4"; break; + case 3: str = "D2A"; break; + case 4: str = "MIN"; break; + case 5: str = "MAX"; break; + case 6: str = "Reserved"; break; + case 7: str = "CND"; break; + case 8: str = "CMP"; break; + case 9: str = "FRC"; break; + case 10: str = "SOP"; break; + case 11: str = "MDH"; break; + case 12: str = "MDV"; break; + } + return str; +} + +static char *to_alpha_op(int op_val) +{ + char *str = NULL; + switch (op_val) { + case 0: str = "MAD"; break; + case 1: str = "DP"; break; + case 2: str = "MIN"; break; + case 3: str = "MAX"; break; + case 4: str = "Reserved"; break; + case 5: str = "CND"; break; + case 6: str = "CMP"; break; + case 7: str = "FRC"; break; + case 8: str = "EX2"; break; + case 9: str = "LN2"; break; + case 10: str = "RCP"; break; + case 11: str = "RSQ"; break; + case 12: str = "SIN"; break; + case 13: str = "COS"; break; + case 14: str = "MDH"; break; + case 15: str = "MDV"; break; + } + return str; +} + +static char *to_mask(int val) +{ + char *str = NULL; + switch(val) { + case 0: str = "NONE"; break; + case 1: str = "R"; break; + case 2: str = "G"; break; + case 3: str = "RG"; break; + case 4: str = "B"; break; + case 5: str = "RB"; break; + case 6: str = "GB"; break; + case 7: str = "RGB"; break; + case 8: str = "A"; break; + case 9: str = "AR"; break; + case 10: str = "AG"; break; + case 11: str = "ARG"; break; + case 12: str = "AB"; break; + case 13: str = "ARB"; break; + case 14: str = "AGB"; break; + case 15: str = "ARGB"; break; + } + return str; +} + +static char *to_texop(int val) +{ + switch(val) { + case 0: return "NOP"; + case 1: return "LD"; + case 2: return "TEXKILL"; + case 3: return "PROJ"; + case 4: return "LODBIAS"; + case 5: return "LOD"; + case 6: return "DXDY"; + } + return NULL; +} + +void r500FragmentProgramDump(struct rX00_fragment_program_code *c) +{ + struct r500_fragment_program_code *code = &c->code.r500; + fprintf(stderr, "R500 Fragment Program:\n--------\n"); + + int n; + uint32_t inst; + uint32_t inst0; + char *str = NULL; + + for (n = 0; n < code->inst_end+1; n++) { + inst0 = inst = code->inst[n].inst0; + fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst); + switch(inst & 0x3) { + case R500_INST_TYPE_ALU: str = "ALU"; break; + case R500_INST_TYPE_OUT: str = "OUT"; break; + case R500_INST_TYPE_FC: str = "FC"; break; + case R500_INST_TYPE_TEX: str = "TEX"; break; + }; + fprintf(stderr,"%s %s %s %s %s ", str, + inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "", + inst & R500_INST_LAST ? "LAST" : "", + inst & R500_INST_NOP ? "NOP" : "", + inst & R500_INST_ALU_WAIT ? "ALU WAIT" : ""); + fprintf(stderr,"wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf), + to_mask((inst >> 15) & 0xf)); + + switch(inst0 & 0x3) { + case 0: + case 1: + fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", code->inst[n].inst1); + inst = code->inst[n].inst1; + + fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n", + inst & 0xff, (inst & (1<<8)) ? 'c' : 't', + (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't', + (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't', + (inst >> 30)); + + fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", code->inst[n].inst2); + inst = code->inst[n].inst2; + fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n", + inst & 0xff, (inst & (1<<8)) ? 'c' : 't', + (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't', + (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't', + (inst >> 30)); + fprintf(stderr,"\t3 RGB_INST: 0x%08x:", code->inst[n].inst3); + inst = code->inst[n].inst3; + fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d\n", + (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7), + (inst >> 11) & 0x3, + (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7), + (inst >> 24) & 0x3); + + + fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", code->inst[n].inst4); + inst = code->inst[n].inst4; + fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d w:%d\n", to_alpha_op(inst & 0xf), + (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"", + (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3, + (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3, + (inst >> 31) & 0x1); + + fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", code->inst[n].inst5); + inst = code->inst[n].inst5; + fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf), + (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"", + (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7), + (inst >> 23) & 0x3, + (inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3); + break; + case 2: + break; + case 3: + inst = code->inst[n].inst1; + fprintf(stderr,"\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf, + to_texop((inst >> 22) & 0x7), (inst & (1<<25)) ? "ACQ" : "", + (inst & (1<<26)) ? "IGNUNC" : "", (inst & (1<<27)) ? "UNSCALED" : "SCALED"); + inst = code->inst[n].inst2; + fprintf(stderr,"\t2:TEX_ADDR: 0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst, + inst & 127, inst & (1<<7) ? "(rel)" : "", + toswiz((inst >> 8) & 0x3), toswiz((inst >> 10) & 0x3), + toswiz((inst >> 12) & 0x3), toswiz((inst >> 14) & 0x3), + (inst >> 16) & 127, inst & (1<<23) ? "(rel)" : "", + toswiz((inst >> 24) & 0x3), toswiz((inst >> 26) & 0x3), + toswiz((inst >> 28) & 0x3), toswiz((inst >> 30) & 0x3)); + + fprintf(stderr,"\t3:TEX_DXDY: 0x%08x\n", code->inst[n].inst3); + break; + } + fprintf(stderr,"\n"); + } + +} diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h new file mode 100644 index 0000000000..9091f65cd2 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/* + * Authors: + * Ben Skeggs <darktama@iinet.net.au> + * Jerome Glisse <j.glisse@gmail.com> + */ +#ifndef __R500_FRAGPROG_H_ +#define __R500_FRAGPROG_H_ + +#include "shader/prog_parameter.h" +#include "shader/prog_instruction.h" + +#include "radeon_compiler.h" +#include "radeon_nqssadce.h" + +extern void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler); + +extern void r500FragmentProgramDump(struct rX00_fragment_program_code *c); + +extern GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg); + +extern void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src); + +extern GLboolean r500_transform_TEX( + struct radeon_compiler * c, + struct rc_instruction * inst, + void* data); + +#endif diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c new file mode 100644 index 0000000000..d694725c9b --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c @@ -0,0 +1,299 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + * + * \author Ben Skeggs <darktama@iinet.net.au> + * + * \author Jerome Glisse <j.glisse@gmail.com> + * + * \author Corbin Simpson <MostAwesomeDude@gmail.com> + * + * \todo Depth write, WPOS/FOGC inputs + * + * \todo FogOption + * + */ + +#include "r500_fragprog.h" + +#include "../r300_reg.h" + +#include "radeon_program_pair.h" + + +#define PROG_CODE \ + struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \ + struct r500_fragment_program_code *code = &c->code->code.r500 + +#define error(fmt, args...) do { \ + rc_error(&c->Base, "%s::%s(): " fmt "\n", \ + __FILE__, __FUNCTION__, ##args); \ + } while(0) + + +static GLuint translate_rgb_op(struct r300_fragment_program_compiler *c, GLuint opcode) +{ + switch(opcode) { + case OPCODE_CMP: return R500_ALU_RGBA_OP_CMP; + case OPCODE_DDX: return R500_ALU_RGBA_OP_MDH; + case OPCODE_DDY: return R500_ALU_RGBA_OP_MDV; + case OPCODE_DP3: return R500_ALU_RGBA_OP_DP3; + case OPCODE_DP4: return R500_ALU_RGBA_OP_DP4; + case OPCODE_FRC: return R500_ALU_RGBA_OP_FRC; + default: + error("translate_rgb_op(%d): unknown opcode\n", opcode); + /* fall through */ + case OPCODE_NOP: + /* fall through */ + case OPCODE_MAD: return R500_ALU_RGBA_OP_MAD; + case OPCODE_MAX: return R500_ALU_RGBA_OP_MAX; + case OPCODE_MIN: return R500_ALU_RGBA_OP_MIN; + case OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP; + } +} + +static GLuint translate_alpha_op(struct r300_fragment_program_compiler *c, GLuint opcode) +{ + switch(opcode) { + case OPCODE_CMP: return R500_ALPHA_OP_CMP; + case OPCODE_COS: return R500_ALPHA_OP_COS; + case OPCODE_DDX: return R500_ALPHA_OP_MDH; + case OPCODE_DDY: return R500_ALPHA_OP_MDV; + case OPCODE_DP3: return R500_ALPHA_OP_DP; + case OPCODE_DP4: return R500_ALPHA_OP_DP; + case OPCODE_EX2: return R500_ALPHA_OP_EX2; + case OPCODE_FRC: return R500_ALPHA_OP_FRC; + case OPCODE_LG2: return R500_ALPHA_OP_LN2; + default: + error("translate_alpha_op(%d): unknown opcode\n", opcode); + /* fall through */ + case OPCODE_NOP: + /* fall through */ + case OPCODE_MAD: return R500_ALPHA_OP_MAD; + case OPCODE_MAX: return R500_ALPHA_OP_MAX; + case OPCODE_MIN: return R500_ALPHA_OP_MIN; + case OPCODE_RCP: return R500_ALPHA_OP_RCP; + case OPCODE_RSQ: return R500_ALPHA_OP_RSQ; + case OPCODE_SIN: return R500_ALPHA_OP_SIN; + } +} + +static GLuint fix_hw_swizzle(GLuint swz) +{ + if (swz == 5) swz = 6; + if (swz == SWIZZLE_NIL) swz = 4; + return swz; +} + +static GLuint translate_arg_rgb(struct radeon_pair_instruction *inst, int arg) +{ + GLuint t = inst->RGB.Arg[arg].Source; + int comp; + t |= inst->RGB.Arg[arg].Negate << 11; + t |= inst->RGB.Arg[arg].Abs << 12; + + for(comp = 0; comp < 3; ++comp) + t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2); + + return t; +} + +static GLuint translate_arg_alpha(struct radeon_pair_instruction *inst, int i) +{ + GLuint t = inst->Alpha.Arg[i].Source; + t |= fix_hw_swizzle(inst->Alpha.Arg[i].Swizzle) << 2; + t |= inst->Alpha.Arg[i].Negate << 5; + t |= inst->Alpha.Arg[i].Abs << 6; + return t; +} + +static void use_temporary(struct r500_fragment_program_code* code, GLuint index) +{ + if (index > code->max_temp_idx) + code->max_temp_idx = index; +} + +static GLuint use_source(struct r500_fragment_program_code* code, struct radeon_pair_instruction_source src) +{ + if (!src.Constant) + use_temporary(code, src.Index); + return src.Index | src.Constant << 8; +} + + +/** + * Emit a paired ALU instruction. + */ +static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst) +{ + PROG_CODE; + + if (code->inst_end >= 511) { + error("emit_alu: Too many instructions"); + return GL_FALSE; + } + + int ip = ++code->inst_end; + + code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode); + code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode); + + if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) + code->inst[ip].inst0 = R500_INST_TYPE_OUT; + else + code->inst[ip].inst0 = R500_INST_TYPE_ALU; + code->inst[ip].inst0 |= R500_INST_TEX_SEM_WAIT; + + code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11) | (inst->Alpha.WriteMask << 14); + code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18); + if (inst->Alpha.DepthWriteMask) { + code->inst[ip].inst4 |= R500_ALPHA_W_OMASK; + c->code->writes_depth = GL_TRUE; + } + + code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex); + code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex); + use_temporary(code, inst->Alpha.DestIndex); + use_temporary(code, inst->RGB.DestIndex); + + if (inst->RGB.Saturate) + code->inst[ip].inst0 |= R500_INST_RGB_CLAMP; + if (inst->Alpha.Saturate) + code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP; + + code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0])); + code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1])); + code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2])); + + code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0])); + code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1])); + code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2])); + + code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT; + code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT; + code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT; + + code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT; + code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT; + code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT; + + return GL_TRUE; +} + +static GLuint translate_strq_swizzle(GLuint swizzle) +{ + GLuint swiz = 0; + int i; + for (i = 0; i < 4; i++) + swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2; + return swiz; +} + +/** + * Emit a single TEX instruction + */ +static GLboolean emit_tex(void *data, struct radeon_pair_texture_instruction *inst) +{ + PROG_CODE; + + if (code->inst_end >= 511) { + error("emit_tex: Too many instructions"); + return GL_FALSE; + } + + int ip = ++code->inst_end; + + code->inst[ip].inst0 = R500_INST_TYPE_TEX + | (inst->WriteMask << 11) + | R500_INST_TEX_SEM_WAIT; + code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit) + | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; + + if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) + code->inst[ip].inst1 |= R500_TEX_UNSCALED; + + switch (inst->Opcode) { + case RADEON_OPCODE_KIL: + code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL; + break; + case RADEON_OPCODE_TEX: + code->inst[ip].inst1 |= R500_TEX_INST_LD; + break; + case RADEON_OPCODE_TXB: + code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS; + break; + case RADEON_OPCODE_TXP: + code->inst[ip].inst1 |= R500_TEX_INST_PROJ; + break; + default: + error("emit_tex can't handle opcode %x\n", inst->Opcode); + } + + code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcIndex) + | (translate_strq_swizzle(inst->SrcSwizzle) << 8) + | R500_TEX_DST_ADDR(inst->DestIndex) + | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G + | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A; + + return GL_TRUE; +} + +static const struct radeon_pair_handler pair_handler = { + .EmitPaired = emit_paired, + .EmitTex = emit_tex, + .MaxHwTemps = 128 +}; + +void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler) +{ + struct r500_fragment_program_code *code = &compiler->code->code.r500; + + _mesa_bzero(code, sizeof(*code)); + code->max_temp_idx = 1; + code->inst_end = -1; + + radeonPairProgram(compiler, &pair_handler, compiler); + if (compiler->Base.Error) + return; + + if ((code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) { + /* This may happen when dead-code elimination is disabled or + * when most of the fragment program logic is leading to a KIL */ + if (code->inst_end >= 511) { + rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions"); + return; + } + + int ip = ++code->inst_end; + code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT; + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.c b/src/mesa/drivers/dri/r300/compiler/radeon_code.c new file mode 100644 index 0000000000..c7923004df --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.c @@ -0,0 +1,170 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "main/mtypes.h" +#include "shader/prog_instruction.h" + +#include "radeon_code.h" + +void rc_constants_init(struct rc_constant_list * c) +{ + memset(c, 0, sizeof(*c)); +} + +/** + * Copy a constants structure, assuming that the destination structure + * is not initialized. + */ +void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src) +{ + dst->Constants = malloc(sizeof(struct rc_constant) * src->Count); + memcpy(dst->Constants, src->Constants, sizeof(struct rc_constant) * src->Count); + dst->Count = src->Count; + dst->_Reserved = src->Count; +} + +void rc_constants_destroy(struct rc_constant_list * c) +{ + free(c->Constants); + memset(c, 0, sizeof(*c)); +} + +unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant) +{ + unsigned index = c->Count; + + if (c->Count >= c->_Reserved) { + struct rc_constant * newlist; + + c->_Reserved = c->_Reserved * 2; + if (!c->_Reserved) + c->_Reserved = 16; + + newlist = malloc(sizeof(struct rc_constant) * c->_Reserved); + memcpy(newlist, c->Constants, sizeof(struct rc_constant) * c->Count); + + free(c->Constants); + c->Constants = newlist; + } + + c->Constants[index] = *constant; + c->Count++; + + return index; +} + + +/** + * Add a state vector to the constant list, while trying to avoid duplicates. + */ +unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state0, unsigned state1) +{ + unsigned index; + struct rc_constant constant; + + for(index = 0; index < c->Count; ++index) { + if (c->Constants[index].Type == RC_CONSTANT_STATE) { + if (c->Constants[index].u.State[0] == state0 && + c->Constants[index].u.State[1] == state1) + return index; + } + } + + memset(&constant, 0, sizeof(constant)); + constant.Type = RC_CONSTANT_STATE; + constant.Size = 4; + constant.u.State[0] = state0; + constant.u.State[1] = state1; + + return rc_constants_add(c, &constant); +} + + +/** + * Add an immediate vector to the constant list, while trying to avoid + * duplicates. + */ +unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data) +{ + unsigned index; + struct rc_constant constant; + + for(index = 0; index < c->Count; ++index) { + if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) { + if (!memcmp(c->Constants[index].u.Immediate, data, sizeof(float)*4)) + return index; + } + } + + memset(&constant, 0, sizeof(constant)); + constant.Type = RC_CONSTANT_IMMEDIATE; + constant.Size = 4; + memcpy(constant.u.Immediate, data, sizeof(float) * 4); + + return rc_constants_add(c, &constant); +} + + +/** + * Add an immediate scalar to the constant list, while trying to avoid + * duplicates. + */ +unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle) +{ + unsigned index; + int free_index = -1; + struct rc_constant constant; + + for(index = 0; index < c->Count; ++index) { + if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) { + for(unsigned comp = 0; comp < c->Constants[index].Size; ++comp) { + if (c->Constants[index].u.Immediate[comp] == data) { + *swizzle = MAKE_SWIZZLE4(comp, comp, comp, comp); + return index; + } + } + + if (c->Constants[index].Size < 4) + free_index = index; + } + } + + if (free_index >= 0) { + unsigned comp = c->Constants[free_index].Size++; + c->Constants[free_index].u.Immediate[comp] = data; + *swizzle = MAKE_SWIZZLE4(comp, comp, comp, comp); + return free_index; + } + + memset(&constant, 0, sizeof(constant)); + constant.Type = RC_CONSTANT_IMMEDIATE; + constant.Size = 1; + constant.u.Immediate[0] = data; + *swizzle = SWIZZLE_XXXX; + + return rc_constants_add(c, &constant); +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h new file mode 100644 index 0000000000..0806fb1b5c --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h @@ -0,0 +1,206 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef RADEON_CODE_H +#define RADEON_CODE_H + +#include <stdint.h> + +#define R300_PFS_MAX_ALU_INST 64 +#define R300_PFS_MAX_TEX_INST 32 +#define R300_PFS_MAX_TEX_INDIRECT 4 +#define R300_PFS_NUM_TEMP_REGS 32 +#define R300_PFS_NUM_CONST_REGS 32 + +#define R500_PFS_MAX_INST 512 +#define R500_PFS_NUM_TEMP_REGS 128 +#define R500_PFS_NUM_CONST_REGS 256 + + +#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0) + +enum { + /** + * External constants are constants whose meaning is unknown to this + * compiler. For example, a Mesa gl_program's constants are turned + * into external constants. + */ + RC_CONSTANT_EXTERNAL = 0, + + RC_CONSTANT_IMMEDIATE, + + /** + * Constant referring to state that is known by this compiler, + * see RC_STATE_xxx, i.e. *not* arbitrary Mesa (or other) state. + */ + RC_CONSTANT_STATE +}; + +enum { + RC_STATE_SHADOW_AMBIENT = 0, + + RC_STATE_R300_WINDOW_DIMENSION, + RC_STATE_R300_TEXRECT_FACTOR +}; + +struct rc_constant { + unsigned Type:2; /**< RC_CONSTANT_xxx */ + unsigned Size:3; + + union { + unsigned External; + float Immediate[4]; + unsigned State[2]; + } u; +}; + +struct rc_constant_list { + struct rc_constant * Constants; + unsigned Count; + + unsigned _Reserved; +}; + +void rc_constants_init(struct rc_constant_list * c); +void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src); +void rc_constants_destroy(struct rc_constant_list * c); +unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant); +unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state1, unsigned state2); +unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data); +unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle); + +/** + * Stores state that influences the compilation of a fragment program. + */ +struct r300_fragment_program_external_state { + struct { + /** + * If the sampler is used as a shadow sampler, + * this field is: + * 0 - GL_LUMINANCE + * 1 - GL_INTENSITY + * 2 - GL_ALPHA + * depending on the depth texture mode. + */ + unsigned depth_texture_mode : 2; + + /** + * If the sampler is used as a shadow sampler, + * this field is (texture_compare_func - GL_NEVER). + * [e.g. if compare function is GL_LEQUAL, this field is 3] + * + * Otherwise, this field is 0. + */ + unsigned texture_compare_func : 3; + } unit[16]; +}; + + + +struct r300_fragment_program_node { + int tex_offset; /**< first tex instruction */ + int tex_end; /**< last tex instruction, relative to tex_offset */ + int alu_offset; /**< first ALU instruction */ + int alu_end; /**< last ALU instruction, relative to alu_offset */ + int flags; +}; + +/** + * Stores an R300 fragment program in its compiled-to-hardware form. + */ +struct r300_fragment_program_code { + struct { + int length; /**< total # of texture instructions used */ + uint32_t inst[R300_PFS_MAX_TEX_INST]; + } tex; + + struct { + int length; /**< total # of ALU instructions used */ + struct { + uint32_t rgb_inst; + uint32_t rgb_addr; + uint32_t alpha_inst; + uint32_t alpha_addr; + } inst[R300_PFS_MAX_ALU_INST]; + } alu; + + uint32_t config; /* US_CONFIG */ + uint32_t pixsize; /* US_PIXSIZE */ + uint32_t code_offset; /* US_CODE_OFFSET */ + uint32_t code_addr[4]; /* US_CODE_ADDR */ +}; + + +struct r500_fragment_program_code { + struct { + uint32_t inst0; + uint32_t inst1; + uint32_t inst2; + uint32_t inst3; + uint32_t inst4; + uint32_t inst5; + } inst[R500_PFS_MAX_INST]; + + int inst_end; /* Number of instructions - 1; also, last instruction to be executed */ + + int max_temp_idx; +}; + +struct rX00_fragment_program_code { + union { + struct r300_fragment_program_code r300; + struct r500_fragment_program_code r500; + } code; + + unsigned writes_depth:1; + + struct rc_constant_list constants; +}; + + +#define VSF_MAX_FRAGMENT_LENGTH (255*4) +#define VSF_MAX_FRAGMENT_TEMPS (14) + +#define VSF_MAX_INPUTS 32 +#define VSF_MAX_OUTPUTS 32 + +struct r300_vertex_program_code { + int length; + union { + uint32_t d[VSF_MAX_FRAGMENT_LENGTH]; + float f[VSF_MAX_FRAGMENT_LENGTH]; + } body; + + int pos_end; + int num_temporaries; /* Number of temp vars used by program */ + int inputs[VSF_MAX_INPUTS]; + int outputs[VSF_MAX_OUTPUTS]; + + struct rc_constant_list constants; + + uint32_t InputsRead; + uint32_t OutputsWritten; +}; + +void r300_vertex_program_dump(struct r300_vertex_program_code * vs); + +#endif /* RADEON_CODE_H */
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c new file mode 100644 index 0000000000..da950d5289 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c @@ -0,0 +1,262 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_compiler.h" + +#include <stdarg.h> + +#include "radeon_program.h" + + +void rc_init(struct radeon_compiler * c) +{ + memset(c, 0, sizeof(*c)); + + memory_pool_init(&c->Pool); + c->Program.Instructions.Prev = &c->Program.Instructions; + c->Program.Instructions.Next = &c->Program.Instructions; + c->Program.Instructions.I.Opcode = OPCODE_END; +} + +void rc_destroy(struct radeon_compiler * c) +{ + rc_constants_destroy(&c->Program.Constants); + memory_pool_destroy(&c->Pool); + free(c->ErrorMsg); +} + +void rc_debug(struct radeon_compiler * c, const char * fmt, ...) +{ + va_list ap; + + if (!c->Debug) + return; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + +void rc_error(struct radeon_compiler * c, const char * fmt, ...) +{ + va_list ap; + + c->Error = GL_TRUE; + + if (!c->ErrorMsg) { + /* Only remember the first error */ + char buf[1024]; + int written; + + va_start(ap, fmt); + written = vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + + if (written < sizeof(buf)) { + c->ErrorMsg = strdup(buf); + } else { + c->ErrorMsg = malloc(written + 1); + + va_start(ap, fmt); + vsnprintf(c->ErrorMsg, written + 1, fmt, ap); + va_end(ap); + } + } + + if (c->Debug) { + fprintf(stderr, "r300compiler error: "); + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + } +} + +/** + * Rewrite the program such that everything that source the given input + * register will source new_input instead. + */ +void rc_move_input(struct radeon_compiler * c, unsigned input, struct prog_src_register new_input) +{ + struct rc_instruction * inst; + + c->Program.InputsRead &= ~(1 << input); + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + const unsigned numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode); + unsigned i; + + for(i = 0; i < numsrcs; ++i) { + if (inst->I.SrcReg[i].File == PROGRAM_INPUT && inst->I.SrcReg[i].Index == input) { + inst->I.SrcReg[i].File = new_input.File; + inst->I.SrcReg[i].Index = new_input.Index; + inst->I.SrcReg[i].Swizzle = combine_swizzles(new_input.Swizzle, inst->I.SrcReg[i].Swizzle); + if (!inst->I.SrcReg[i].Abs) { + inst->I.SrcReg[i].Negate ^= new_input.Negate; + inst->I.SrcReg[i].Abs = new_input.Abs; + } + + c->Program.InputsRead |= 1 << new_input.Index; + } + } + } +} + + +/** + * Rewrite the program such that everything that writes into the given + * output register will instead write to new_output. The new_output + * writemask is honoured. + */ +void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask) +{ + struct rc_instruction * inst; + + c->Program.OutputsWritten &= ~(1 << output); + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + const unsigned numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode); + + if (numdsts) { + if (inst->I.DstReg.File == PROGRAM_OUTPUT && inst->I.DstReg.Index == output) { + inst->I.DstReg.Index = new_output; + inst->I.DstReg.WriteMask &= writemask; + + c->Program.OutputsWritten |= 1 << new_output; + } + } + } +} + + +/** + * Rewrite the program such that a given output is duplicated. + */ +void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output) +{ + unsigned tempreg = rc_find_free_temporary(c); + struct rc_instruction * inst; + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + const unsigned numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode); + + if (numdsts) { + if (inst->I.DstReg.File == PROGRAM_OUTPUT && inst->I.DstReg.Index == output) { + inst->I.DstReg.File = PROGRAM_TEMPORARY; + inst->I.DstReg.Index = tempreg; + } + } + } + + inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev); + inst->I.Opcode = OPCODE_MOV; + inst->I.DstReg.File = PROGRAM_OUTPUT; + inst->I.DstReg.Index = output; + + inst->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst->I.SrcReg[0].Index = tempreg; + inst->I.SrcReg[0].Swizzle = SWIZZLE_XYZW; + + inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev); + inst->I.Opcode = OPCODE_MOV; + inst->I.DstReg.File = PROGRAM_OUTPUT; + inst->I.DstReg.Index = dup_output; + + inst->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst->I.SrcReg[0].Index = tempreg; + inst->I.SrcReg[0].Swizzle = SWIZZLE_XYZW; + + c->Program.OutputsWritten |= 1 << dup_output; +} + + +/** + * Introduce standard code fragment to deal with fragment.position. + */ +void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input) +{ + unsigned tempregi = rc_find_free_temporary(c); + + c->Program.InputsRead &= ~(1 << wpos); + c->Program.InputsRead |= 1 << new_input; + + /* perspective divide */ + struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions); + inst_rcp->I.Opcode = OPCODE_RCP; + + inst_rcp->I.DstReg.File = PROGRAM_TEMPORARY; + inst_rcp->I.DstReg.Index = tempregi; + inst_rcp->I.DstReg.WriteMask = WRITEMASK_W; + + inst_rcp->I.SrcReg[0].File = PROGRAM_INPUT; + inst_rcp->I.SrcReg[0].Index = new_input; + inst_rcp->I.SrcReg[0].Swizzle = SWIZZLE_WWWW; + + struct rc_instruction * inst_mul = rc_insert_new_instruction(c, inst_rcp); + inst_mul->I.Opcode = OPCODE_MUL; + + inst_mul->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mul->I.DstReg.Index = tempregi; + inst_mul->I.DstReg.WriteMask = WRITEMASK_XYZ; + + inst_mul->I.SrcReg[0].File = PROGRAM_INPUT; + inst_mul->I.SrcReg[0].Index = new_input; + + inst_mul->I.SrcReg[1].File = PROGRAM_TEMPORARY; + inst_mul->I.SrcReg[1].Index = tempregi; + inst_mul->I.SrcReg[1].Swizzle = SWIZZLE_WWWW; + + /* viewport transformation */ + struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_mul); + inst_mad->I.Opcode = OPCODE_MAD; + + inst_mad->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mad->I.DstReg.Index = tempregi; + inst_mad->I.DstReg.WriteMask = WRITEMASK_XYZ; + + inst_mad->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst_mad->I.SrcReg[0].Index = tempregi; + inst_mad->I.SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + inst_mad->I.SrcReg[1].File = PROGRAM_STATE_VAR; + inst_mad->I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0); + inst_mad->I.SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + inst_mad->I.SrcReg[2].File = PROGRAM_STATE_VAR; + inst_mad->I.SrcReg[2].Index = inst_mad->I.SrcReg[1].Index; + inst_mad->I.SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + struct rc_instruction * inst; + for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) { + const unsigned numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode); + unsigned i; + + for(i = 0; i < numsrcs; i++) { + if (inst->I.SrcReg[i].File == PROGRAM_INPUT && + inst->I.SrcReg[i].Index == wpos) { + inst->I.SrcReg[i].File = PROGRAM_TEMPORARY; + inst->I.SrcReg[i].Index = tempregi; + } + } + } +} + diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h new file mode 100644 index 0000000000..e63ab8840a --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h @@ -0,0 +1,108 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef RADEON_COMPILER_H +#define RADEON_COMPILER_H + +#include "main/mtypes.h" +#include "shader/prog_instruction.h" + +#include "memory_pool.h" +#include "radeon_code.h" + + +struct rc_instruction { + struct rc_instruction * Prev; + struct rc_instruction * Next; + struct prog_instruction I; +}; + +struct rc_program { + /** + * Instructions.Next points to the first instruction, + * Instructions.Prev points to the last instruction. + */ + struct rc_instruction Instructions; + + /* Long term, we should probably remove InputsRead & OutputsWritten, + * since updating dependent state can be fragile, and they aren't + * actually used very often. */ + uint32_t InputsRead; + uint32_t OutputsWritten; + uint32_t ShadowSamplers; /**< Texture units used for shadow sampling. */ + + struct rc_constant_list Constants; +}; + +struct radeon_compiler { + struct memory_pool Pool; + struct rc_program Program; + unsigned Debug:1; + unsigned Error:1; + char * ErrorMsg; +}; + +void rc_init(struct radeon_compiler * c); +void rc_destroy(struct radeon_compiler * c); + +void rc_debug(struct radeon_compiler * c, const char * fmt, ...); +void rc_error(struct radeon_compiler * c, const char * fmt, ...); + +void rc_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * program); + +void rc_calculate_inputs_outputs(struct radeon_compiler * c); + +void rc_move_input(struct radeon_compiler * c, unsigned input, struct prog_src_register new_input); +void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask); +void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output); +void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input); + +struct r300_fragment_program_compiler { + struct radeon_compiler Base; + struct rX00_fragment_program_code *code; + struct r300_fragment_program_external_state state; + unsigned is_r500; + unsigned OutputDepth; + unsigned OutputColor; + + void * UserData; + void (*AllocateHwInputs)( + struct r300_fragment_program_compiler * c, + void (*allocate)(void * data, unsigned input, unsigned hwreg), + void * mydata); +}; + +void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c); + + +struct r300_vertex_program_compiler { + struct radeon_compiler Base; + struct r300_vertex_program_code *code; + GLbitfield RequiredOutputs; + + void * UserData; + void (*SetHwInputOutput)(struct r300_vertex_program_compiler * c); +}; + +void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* c); + +#endif /* RADEON_COMPILER_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c new file mode 100644 index 0000000000..aaaa50ad1f --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c @@ -0,0 +1,294 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * @file + * + * "Not-quite SSA" and Dead-Code Elimination. + * + * @note This code uses SWIZZLE_NIL in a source register to indicate that + * the corresponding component is ignored by the corresponding instruction. + */ + +#include "radeon_nqssadce.h" + +#include "radeon_compiler.h" + + +/** + * Return the @ref register_state for the given register (or 0 for untracked + * registers, i.e. constants). + */ +static struct register_state *get_reg_state(struct nqssadce_state* s, GLuint file, GLuint index) +{ + switch(file) { + case PROGRAM_TEMPORARY: return &s->Temps[index]; + case PROGRAM_OUTPUT: return &s->Outputs[index]; + case PROGRAM_ADDRESS: return &s->Address; + default: return 0; + } +} + + +/** + * Left multiplication of a register with a swizzle + * + * @note Works correctly only for X, Y, Z, W swizzles, not for constant swizzles. + */ +struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg) +{ + struct prog_src_register tmp = srcreg; + int i; + tmp.Swizzle = 0; + tmp.Negate = NEGATE_NONE; + for(i = 0; i < 4; ++i) { + GLuint swz = GET_SWZ(swizzle, i); + if (swz < 4) { + tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3); + tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i; + } else { + tmp.Swizzle |= swz << (i*3); + } + } + return tmp; +} + + +static void track_used_srcreg(struct nqssadce_state* s, + GLint src, GLuint sourced) +{ + struct prog_instruction * inst = &s->IP->I; + int i; + GLuint deswz_source = 0; + + for(i = 0; i < 4; ++i) { + if (GET_BIT(sourced, i)) { + GLuint swz = GET_SWZ(inst->SrcReg[src].Swizzle, i); + deswz_source |= 1 << swz; + } else { + inst->SrcReg[src].Swizzle &= ~(7 << (3*i)); + inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i); + } + } + + if (!s->Descr->IsNativeSwizzle(inst->Opcode, inst->SrcReg[src])) { + struct prog_dst_register dstreg = inst->DstReg; + dstreg.File = PROGRAM_TEMPORARY; + dstreg.Index = rc_find_free_temporary(s->Compiler); + dstreg.WriteMask = sourced; + + s->Descr->BuildSwizzle(s, dstreg, inst->SrcReg[src]); + + inst->SrcReg[src].File = PROGRAM_TEMPORARY; + inst->SrcReg[src].Index = dstreg.Index; + inst->SrcReg[src].Swizzle = 0; + inst->SrcReg[src].Negate = NEGATE_NONE; + inst->SrcReg[src].Abs = 0; + for(i = 0; i < 4; ++i) { + if (GET_BIT(sourced, i)) + inst->SrcReg[src].Swizzle |= i << (3*i); + else + inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i); + } + deswz_source = sourced; + } + + struct register_state *regstate; + + if (inst->SrcReg[src].RelAddr) { + regstate = get_reg_state(s, PROGRAM_ADDRESS, 0); + if (regstate) + regstate->Sourced |= WRITEMASK_X; + } else { + regstate = get_reg_state(s, inst->SrcReg[src].File, inst->SrcReg[src].Index); + if (regstate) + regstate->Sourced |= deswz_source & 0xf; + } +} + +static void unalias_srcregs(struct rc_instruction *inst, GLuint oldindex, GLuint newindex) +{ + int nsrc = _mesa_num_inst_src_regs(inst->I.Opcode); + int i; + for(i = 0; i < nsrc; ++i) + if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY && inst->I.SrcReg[i].Index == oldindex) + inst->I.SrcReg[i].Index = newindex; +} + +static void unalias_temporary(struct nqssadce_state* s, GLuint oldindex) +{ + GLuint newindex = rc_find_free_temporary(s->Compiler); + struct rc_instruction * inst; + for(inst = s->Compiler->Program.Instructions.Next; inst != s->IP; inst = inst->Next) { + if (inst->I.DstReg.File == PROGRAM_TEMPORARY && inst->I.DstReg.Index == oldindex) + inst->I.DstReg.Index = newindex; + unalias_srcregs(inst, oldindex, newindex); + } + unalias_srcregs(s->IP, oldindex, newindex); +} + + +/** + * Handle one instruction. + */ +static void process_instruction(struct nqssadce_state* s) +{ + struct prog_instruction *inst = &s->IP->I; + GLuint WriteMask; + + if (inst->Opcode == OPCODE_END) + return; + + if (inst->Opcode != OPCODE_KIL) { + struct register_state *regstate = get_reg_state(s, inst->DstReg.File, inst->DstReg.Index); + if (!regstate) { + rc_error(s->Compiler, "NqssaDce: bad destination register (%i[%i])\n", + inst->DstReg.File, inst->DstReg.Index); + return; + } + + inst->DstReg.WriteMask &= regstate->Sourced; + regstate->Sourced &= ~inst->DstReg.WriteMask; + + if (inst->DstReg.WriteMask == 0) { + struct rc_instruction * inst_remove = s->IP; + s->IP = s->IP->Prev; + rc_remove_instruction(inst_remove); + return; + } + + if (inst->DstReg.File == PROGRAM_TEMPORARY && !regstate->Sourced) + unalias_temporary(s, inst->DstReg.Index); + } + + WriteMask = inst->DstReg.WriteMask; + + switch (inst->Opcode) { + case OPCODE_ARL: + case OPCODE_DDX: + case OPCODE_DDY: + case OPCODE_FRC: + case OPCODE_MOV: + track_used_srcreg(s, 0, WriteMask); + break; + case OPCODE_ADD: + case OPCODE_MAX: + case OPCODE_MIN: + case OPCODE_MUL: + case OPCODE_SGE: + case OPCODE_SLT: + track_used_srcreg(s, 0, WriteMask); + track_used_srcreg(s, 1, WriteMask); + break; + case OPCODE_CMP: + case OPCODE_MAD: + track_used_srcreg(s, 0, WriteMask); + track_used_srcreg(s, 1, WriteMask); + track_used_srcreg(s, 2, WriteMask); + break; + case OPCODE_COS: + case OPCODE_EX2: + case OPCODE_LG2: + case OPCODE_RCP: + case OPCODE_RSQ: + case OPCODE_SIN: + track_used_srcreg(s, 0, 0x1); + break; + case OPCODE_DP3: + track_used_srcreg(s, 0, 0x7); + track_used_srcreg(s, 1, 0x7); + break; + case OPCODE_DP4: + track_used_srcreg(s, 0, 0xf); + track_used_srcreg(s, 1, 0xf); + break; + case OPCODE_KIL: + case OPCODE_TEX: + case OPCODE_TXB: + case OPCODE_TXP: + track_used_srcreg(s, 0, 0xf); + break; + case OPCODE_DST: + track_used_srcreg(s, 0, 0x6); + track_used_srcreg(s, 1, 0xa); + break; + case OPCODE_EXP: + case OPCODE_LOG: + case OPCODE_POW: + track_used_srcreg(s, 0, 0x3); + break; + case OPCODE_LIT: + track_used_srcreg(s, 0, 0xb); + break; + default: + rc_error(s->Compiler, "NqssaDce: Unknown opcode %d\n", inst->Opcode); + return; + } + + s->IP = s->IP->Prev; +} + +void rc_calculate_inputs_outputs(struct radeon_compiler * c) +{ + struct rc_instruction *inst; + + c->Program.InputsRead = 0; + c->Program.OutputsWritten = 0; + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) + { + int i; + int num_src_regs = _mesa_num_inst_src_regs(inst->I.Opcode); + + for (i = 0; i < num_src_regs; ++i) { + if (inst->I.SrcReg[i].File == PROGRAM_INPUT) + c->Program.InputsRead |= 1 << inst->I.SrcReg[i].Index; + } + + if (_mesa_num_inst_dst_regs(inst->I.Opcode)) { + if (inst->I.DstReg.File == PROGRAM_OUTPUT) + c->Program.OutputsWritten |= 1 << inst->I.DstReg.Index; + } + } +} + +void radeonNqssaDce(struct radeon_compiler * c, struct radeon_nqssadce_descr* descr, void * data) +{ + struct nqssadce_state s; + + _mesa_bzero(&s, sizeof(s)); + s.Compiler = c; + s.Descr = descr; + s.UserData = data; + s.Descr->Init(&s); + s.IP = c->Program.Instructions.Prev; + + while(s.IP != &c->Program.Instructions && !c->Error) + process_instruction(&s); + + rc_calculate_inputs_outputs(c); +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h new file mode 100644 index 0000000000..b3fc77a35a --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h @@ -0,0 +1,91 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RADEON_PROGRAM_NQSSADCE_H_ +#define __RADEON_PROGRAM_NQSSADCE_H_ + +#include "radeon_program.h" + +struct register_state { + /** + * Bitmask indicating which components of the register are sourced + * by later instructions. + */ + GLuint Sourced : 4; +}; + +/** + * Maintain state such as which registers are used, which registers are + * read from, etc. + */ +struct nqssadce_state { + struct radeon_compiler *Compiler; + struct radeon_nqssadce_descr *Descr; + + /** + * All instructions after this instruction pointer have been dealt with. + */ + struct rc_instruction * IP; + + /** + * Which registers are read by subsequent instructions? + */ + struct register_state Temps[MAX_PROGRAM_TEMPS]; + struct register_state Outputs[VERT_RESULT_MAX]; + struct register_state Address; + + void * UserData; +}; + + +/** + * This structure contains a description of the hardware in-so-far as + * it is required for the NqSSA-DCE pass. + */ +struct radeon_nqssadce_descr { + /** + * Fill in which outputs + */ + void (*Init)(struct nqssadce_state *); + + /** + * Check whether the given swizzle, absolute and negate combination + * can be implemented natively by the hardware for this opcode. + */ + GLboolean (*IsNativeSwizzle)(GLuint opcode, struct prog_src_register reg); + + /** + * Emit (at the current IP) the instruction MOV dst, src; + * The transformation will work recursively on the emitted instruction(s). + */ + void (*BuildSwizzle)(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src); +}; + +void radeonNqssaDce(struct radeon_compiler * c, struct radeon_nqssadce_descr* descr, void * data); +struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg); + +#endif /* __RADEON_PROGRAM_NQSSADCE_H_ */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.c b/src/mesa/drivers/dri/r300/compiler/radeon_program.c new file mode 100644 index 0000000000..208d3b90c8 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.c @@ -0,0 +1,181 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program.h" + +#include "radeon_compiler.h" +#include "shader/prog_parameter.h" +#include "shader/prog_print.h" + + +/** + * Transform the given clause in the following way: + * 1. Replace it with an empty clause + * 2. For every instruction in the original clause, try the given + * transformations in order. + * 3. If one of the transformations returns GL_TRUE, assume that it + * has emitted the appropriate instruction(s) into the new clause; + * otherwise, copy the instruction verbatim. + * + * \note The transformation is currently not recursive; in other words, + * instructions emitted by transformations are not transformed. + * + * \note The transform is called 'local' because it can only look at + * one instruction at a time. + */ +void radeonLocalTransform( + struct radeon_compiler * c, + int num_transformations, + struct radeon_program_transformation* transformations) +{ + struct rc_instruction * inst = c->Program.Instructions.Next; + + while(inst != &c->Program.Instructions) { + struct rc_instruction * current = inst; + int i; + + inst = inst->Next; + + for(i = 0; i < num_transformations; ++i) { + struct radeon_program_transformation* t = transformations + i; + + if (t->function(c, current, t->userData)) + break; + } + } +} + + +GLint rc_find_free_temporary(struct radeon_compiler * c) +{ + GLboolean used[MAX_PROGRAM_TEMPS]; + GLuint i; + + memset(used, 0, sizeof(used)); + + for (struct rc_instruction * rcinst = c->Program.Instructions.Next; rcinst != &c->Program.Instructions; rcinst = rcinst->Next) { + const struct prog_instruction *inst = &rcinst->I; + const GLuint n = _mesa_num_inst_src_regs(inst->Opcode); + GLuint k; + + for (k = 0; k < n; k++) { + if (inst->SrcReg[k].File == PROGRAM_TEMPORARY) + used[inst->SrcReg[k].Index] = GL_TRUE; + } + } + + for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { + if (!used[i]) + return i; + } + + return -1; +} + + +struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c) +{ + struct rc_instruction * inst = memory_pool_malloc(&c->Pool, sizeof(struct rc_instruction)); + + inst->Prev = 0; + inst->Next = 0; + + _mesa_init_instructions(&inst->I, 1); + + return inst; +} + + +struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after) +{ + struct rc_instruction * inst = rc_alloc_instruction(c); + + inst->Prev = after; + inst->Next = after->Next; + + inst->Prev->Next = inst; + inst->Next->Prev = inst; + + return inst; +} + +void rc_remove_instruction(struct rc_instruction * inst) +{ + inst->Prev->Next = inst->Next; + inst->Next->Prev = inst->Prev; +} + + +void rc_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * program) +{ + struct prog_instruction *source; + unsigned int i; + + for(source = program->Instructions; source->Opcode != OPCODE_END; ++source) { + struct rc_instruction * dest = rc_insert_new_instruction(c, c->Program.Instructions.Prev); + dest->I = *source; + } + + c->Program.ShadowSamplers = program->ShadowSamplers; + c->Program.InputsRead = program->InputsRead; + c->Program.OutputsWritten = program->OutputsWritten; + + for(i = 0; i < program->Parameters->NumParameters; ++i) { + struct rc_constant constant; + + constant.Type = RC_CONSTANT_EXTERNAL; + constant.Size = 4; + constant.u.External = i; + + rc_constants_add(&c->Program.Constants, &constant); + } +} + + +/** + * Print program to stderr, default options. + */ +void rc_print_program(const struct rc_program *prog) +{ + GLuint indent = 0; + GLuint linenum = 1; + struct rc_instruction *inst; + + fprintf(stderr, "# Radeon Compiler Program\n"); + + for(inst = prog->Instructions.Next; inst != &prog->Instructions; inst = inst->Next) { + fprintf(stderr, "%3d: ", linenum); + + /* Massive hack: We rely on the fact that the printers do not actually + * use the gl_program argument (last argument) in debug mode */ + indent = _mesa_fprint_instruction_opt( + stderr, &inst->I, + indent, PROG_PRINT_DEBUG, 0); + + linenum++; + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h new file mode 100644 index 0000000000..561958608c --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h @@ -0,0 +1,121 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RADEON_PROGRAM_H_ +#define __RADEON_PROGRAM_H_ + +#include "main/glheader.h" +#include "main/macros.h" +#include "main/enums.h" +#include "shader/program.h" +#include "shader/prog_instruction.h" + +struct radeon_compiler; +struct rc_instruction; +struct rc_program; + +enum { + PROGRAM_BUILTIN = PROGRAM_FILE_MAX /**< not a real register, but a special swizzle constant */ +}; + +enum { + OPCODE_REPL_ALPHA = MAX_OPCODE /**< used in paired instructions */ +}; + +#define SWIZZLE_0000 MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO) +#define SWIZZLE_1111 MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE) + +static inline GLuint get_swz(GLuint swz, GLuint idx) +{ + if (idx & 0x4) + return idx; + return GET_SWZ(swz, idx); +} + +static inline GLuint combine_swizzles4(GLuint src, GLuint swz_x, GLuint swz_y, GLuint swz_z, GLuint swz_w) +{ + GLuint ret = 0; + + ret |= get_swz(src, swz_x); + ret |= get_swz(src, swz_y) << 3; + ret |= get_swz(src, swz_z) << 6; + ret |= get_swz(src, swz_w) << 9; + + return ret; +} + +static inline GLuint combine_swizzles(GLuint src, GLuint swz) +{ + GLuint ret = 0; + + ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_X)); + ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_Y)) << 3; + ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_Z)) << 6; + ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_W)) << 9; + + return ret; +} + +static INLINE void reset_srcreg(struct prog_src_register* reg) +{ + _mesa_bzero(reg, sizeof(*reg)); + reg->Swizzle = SWIZZLE_NOOP; +} + + +/** + * A transformation that can be passed to \ref radeonLocalTransform. + * + * The function will be called once for each instruction. + * It has to either emit the appropriate transformed code for the instruction + * and return GL_TRUE, or return GL_FALSE if it doesn't understand the + * instruction. + * + * The function gets passed the userData as last parameter. + */ +struct radeon_program_transformation { + GLboolean (*function)( + struct radeon_compiler*, + struct rc_instruction*, + void*); + void *userData; +}; + +void radeonLocalTransform( + struct radeon_compiler *c, + int num_transformations, + struct radeon_program_transformation* transformations); + +GLint rc_find_free_temporary(struct radeon_compiler * c); + +struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c); +struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after); +void rc_remove_instruction(struct rc_instruction * inst); + +void rc_print_program(const struct rc_program *prog); + +#endif diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c new file mode 100644 index 0000000000..609e510ff2 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c @@ -0,0 +1,693 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * @file + * + * Shareable transformations that transform "special" ALU instructions + * into ALU instructions that are supported by hardware. + * + */ + +#include "radeon_program_alu.h" + +#include "radeon_compiler.h" + + +static struct rc_instruction *emit1( + struct radeon_compiler * c, struct rc_instruction * after, + gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg, + struct prog_src_register SrcReg) +{ + struct rc_instruction *fpi = rc_insert_new_instruction(c, after); + + fpi->I.Opcode = Opcode; + fpi->I.SaturateMode = Saturate; + fpi->I.DstReg = DstReg; + fpi->I.SrcReg[0] = SrcReg; + return fpi; +} + +static struct rc_instruction *emit2( + struct radeon_compiler * c, struct rc_instruction * after, + gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg, + struct prog_src_register SrcReg0, struct prog_src_register SrcReg1) +{ + struct rc_instruction *fpi = rc_insert_new_instruction(c, after); + + fpi->I.Opcode = Opcode; + fpi->I.SaturateMode = Saturate; + fpi->I.DstReg = DstReg; + fpi->I.SrcReg[0] = SrcReg0; + fpi->I.SrcReg[1] = SrcReg1; + return fpi; +} + +static struct rc_instruction *emit3( + struct radeon_compiler * c, struct rc_instruction * after, + gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg, + struct prog_src_register SrcReg0, struct prog_src_register SrcReg1, + struct prog_src_register SrcReg2) +{ + struct rc_instruction *fpi = rc_insert_new_instruction(c, after); + + fpi->I.Opcode = Opcode; + fpi->I.SaturateMode = Saturate; + fpi->I.DstReg = DstReg; + fpi->I.SrcReg[0] = SrcReg0; + fpi->I.SrcReg[1] = SrcReg1; + fpi->I.SrcReg[2] = SrcReg2; + return fpi; +} + +static struct prog_dst_register dstreg(int file, int index) +{ + struct prog_dst_register dst; + dst.File = file; + dst.Index = index; + dst.WriteMask = WRITEMASK_XYZW; + dst.CondMask = COND_TR; + dst.CondSwizzle = SWIZZLE_NOOP; + dst.CondSrc = 0; + dst.pad = 0; + return dst; +} + +static struct prog_dst_register dstregtmpmask(int index, int mask) +{ + struct prog_dst_register dst; + dst.File = PROGRAM_TEMPORARY; + dst.Index = index; + dst.WriteMask = mask; + dst.CondMask = COND_TR; + dst.CondSwizzle = SWIZZLE_NOOP; + dst.CondSrc = 0; + dst.pad = 0; + return dst; +} + +static const struct prog_src_register builtin_zero = { + .File = PROGRAM_BUILTIN, + .Index = 0, + .Swizzle = SWIZZLE_0000 +}; +static const struct prog_src_register builtin_one = { + .File = PROGRAM_BUILTIN, + .Index = 0, + .Swizzle = SWIZZLE_1111 +}; +static const struct prog_src_register srcreg_undefined = { + .File = PROGRAM_UNDEFINED, + .Index = 0, + .Swizzle = SWIZZLE_NOOP +}; + +static struct prog_src_register srcreg(int file, int index) +{ + struct prog_src_register src = srcreg_undefined; + src.File = file; + src.Index = index; + return src; +} + +static struct prog_src_register srcregswz(int file, int index, int swz) +{ + struct prog_src_register src = srcreg_undefined; + src.File = file; + src.Index = index; + src.Swizzle = swz; + return src; +} + +static struct prog_src_register absolute(struct prog_src_register reg) +{ + struct prog_src_register newreg = reg; + newreg.Abs = 1; + newreg.Negate = NEGATE_NONE; + return newreg; +} + +static struct prog_src_register negate(struct prog_src_register reg) +{ + struct prog_src_register newreg = reg; + newreg.Negate = newreg.Negate ^ NEGATE_XYZW; + return newreg; +} + +static struct prog_src_register swizzle(struct prog_src_register reg, GLuint x, GLuint y, GLuint z, GLuint w) +{ + struct prog_src_register swizzled = reg; + swizzled.Swizzle = MAKE_SWIZZLE4( + x >= 4 ? x : GET_SWZ(reg.Swizzle, x), + y >= 4 ? y : GET_SWZ(reg.Swizzle, y), + z >= 4 ? z : GET_SWZ(reg.Swizzle, z), + w >= 4 ? w : GET_SWZ(reg.Swizzle, w)); + return swizzled; +} + +static struct prog_src_register scalar(struct prog_src_register reg) +{ + return swizzle(reg, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X); +} + +static void transform_ABS(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct prog_src_register src = inst->I.SrcReg[0]; + src.Abs = 1; + src.Negate = NEGATE_NONE; + emit1(c, inst->Prev, OPCODE_MOV, inst->I.SaturateMode, inst->I.DstReg, src); + rc_remove_instruction(inst); +} + +static void transform_DP3(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct prog_src_register src0 = inst->I.SrcReg[0]; + struct prog_src_register src1 = inst->I.SrcReg[1]; + src0.Negate &= ~NEGATE_W; + src0.Swizzle &= ~(7 << (3 * 3)); + src0.Swizzle |= SWIZZLE_ZERO << (3 * 3); + src1.Negate &= ~NEGATE_W; + src1.Swizzle &= ~(7 << (3 * 3)); + src1.Swizzle |= SWIZZLE_ZERO << (3 * 3); + emit2(c, inst->Prev, OPCODE_DP4, inst->I.SaturateMode, inst->I.DstReg, src0, src1); + rc_remove_instruction(inst); +} + +static void transform_DPH(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct prog_src_register src0 = inst->I.SrcReg[0]; + src0.Negate &= ~NEGATE_W; + src0.Swizzle &= ~(7 << (3 * 3)); + src0.Swizzle |= SWIZZLE_ONE << (3 * 3); + emit2(c, inst->Prev, OPCODE_DP4, inst->I.SaturateMode, inst->I.DstReg, src0, inst->I.SrcReg[1]); + rc_remove_instruction(inst); +} + +/** + * [1, src0.y*src1.y, src0.z, src1.w] + * So basically MUL with lotsa swizzling. + */ +static void transform_DST(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + emit2(c, inst->Prev, OPCODE_MUL, inst->I.SaturateMode, inst->I.DstReg, + swizzle(inst->I.SrcReg[0], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE), + swizzle(inst->I.SrcReg[1], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_ONE, SWIZZLE_W)); + rc_remove_instruction(inst); +} + +static void transform_FLR(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + int tempreg = rc_find_free_temporary(c); + emit1(c, inst->Prev, OPCODE_FRC, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[0]); + emit2(c, inst->Prev, OPCODE_ADD, inst->I.SaturateMode, inst->I.DstReg, + inst->I.SrcReg[0], negate(srcreg(PROGRAM_TEMPORARY, tempreg))); + rc_remove_instruction(inst); +} + +/** + * Definition of LIT (from ARB_fragment_program): + * + * tmp = VectorLoad(op0); + * if (tmp.x < 0) tmp.x = 0; + * if (tmp.y < 0) tmp.y = 0; + * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon); + * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon; + * result.x = 1.0; + * result.y = tmp.x; + * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0; + * result.w = 1.0; + * + * The longest path of computation is the one leading to result.z, + * consisting of 5 operations. This implementation of LIT takes + * 5 slots, if the subsequent optimization passes are clever enough + * to pair instructions correctly. + */ +static void transform_LIT(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + GLuint constant; + GLuint constant_swizzle; + GLuint temp; + struct prog_src_register srctemp; + + constant = rc_constants_add_immediate_scalar(&c->Program.Constants, -127.999999, &constant_swizzle); + + if (inst->I.DstReg.WriteMask != WRITEMASK_XYZW || inst->I.DstReg.File != PROGRAM_TEMPORARY) { + struct rc_instruction * inst_mov; + + inst_mov = emit1(c, inst, + OPCODE_MOV, 0, inst->I.DstReg, + srcreg(PROGRAM_TEMPORARY, rc_find_free_temporary(c))); + + inst->I.DstReg.File = PROGRAM_TEMPORARY; + inst->I.DstReg.Index = inst_mov->I.SrcReg[0].Index; + inst->I.DstReg.WriteMask = WRITEMASK_XYZW; + } + + temp = inst->I.DstReg.Index; + srctemp = srcreg(PROGRAM_TEMPORARY, temp); + + // tmp.x = max(0.0, Src.x); + // tmp.y = max(0.0, Src.y); + // tmp.w = clamp(Src.z, -128+eps, 128-eps); + emit2(c, inst->Prev, OPCODE_MAX, 0, + dstregtmpmask(temp, WRITEMASK_XYW), + inst->I.SrcReg[0], + swizzle(srcreg(PROGRAM_CONSTANT, constant), + SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, constant_swizzle&3)); + emit2(c, inst->Prev, OPCODE_MIN, 0, + dstregtmpmask(temp, WRITEMASK_Z), + swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + negate(srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle))); + + // tmp.w = Pow(tmp.y, tmp.w) + emit1(c, inst->Prev, OPCODE_LG2, 0, + dstregtmpmask(temp, WRITEMASK_W), + swizzle(srctemp, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y)); + emit2(c, inst->Prev, OPCODE_MUL, 0, + dstregtmpmask(temp, WRITEMASK_W), + swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + swizzle(srctemp, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)); + emit1(c, inst->Prev, OPCODE_EX2, 0, + dstregtmpmask(temp, WRITEMASK_W), + swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); + + // tmp.z = (tmp.x > 0) ? tmp.w : 0.0 + emit3(c, inst->Prev, OPCODE_CMP, inst->I.SaturateMode, + dstregtmpmask(temp, WRITEMASK_Z), + negate(swizzle(srctemp, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), + swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + builtin_zero); + + // tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0 + emit1(c, inst->Prev, OPCODE_MOV, inst->I.SaturateMode, + dstregtmpmask(temp, WRITEMASK_XYW), + swizzle(srctemp, SWIZZLE_ONE, SWIZZLE_X, SWIZZLE_ONE, SWIZZLE_ONE)); + + rc_remove_instruction(inst); +} + +static void transform_LRP(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + int tempreg = rc_find_free_temporary(c); + + emit2(c, inst->Prev, OPCODE_ADD, 0, + dstreg(PROGRAM_TEMPORARY, tempreg), + inst->I.SrcReg[1], negate(inst->I.SrcReg[2])); + emit3(c, inst->Prev, OPCODE_MAD, inst->I.SaturateMode, + inst->I.DstReg, + inst->I.SrcReg[0], srcreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[2]); + + rc_remove_instruction(inst); +} + +static void transform_POW(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + int tempreg = rc_find_free_temporary(c); + struct prog_dst_register tempdst = dstreg(PROGRAM_TEMPORARY, tempreg); + struct prog_src_register tempsrc = srcreg(PROGRAM_TEMPORARY, tempreg); + tempdst.WriteMask = WRITEMASK_W; + tempsrc.Swizzle = SWIZZLE_WWWW; + + emit1(c, inst->Prev, OPCODE_LG2, 0, tempdst, scalar(inst->I.SrcReg[0])); + emit2(c, inst->Prev, OPCODE_MUL, 0, tempdst, tempsrc, scalar(inst->I.SrcReg[1])); + emit1(c, inst->Prev, OPCODE_EX2, inst->I.SaturateMode, inst->I.DstReg, tempsrc); + + rc_remove_instruction(inst); +} + +static void transform_RSQ(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + inst->I.SrcReg[0] = absolute(inst->I.SrcReg[0]); +} + +static void transform_SGE(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + int tempreg = rc_find_free_temporary(c); + + emit2(c, inst->Prev, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[0], negate(inst->I.SrcReg[1])); + emit3(c, inst->Prev, OPCODE_CMP, inst->I.SaturateMode, inst->I.DstReg, + srcreg(PROGRAM_TEMPORARY, tempreg), builtin_zero, builtin_one); + + rc_remove_instruction(inst); +} + +static void transform_SLT(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + int tempreg = rc_find_free_temporary(c); + + emit2(c, inst->Prev, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[0], negate(inst->I.SrcReg[1])); + emit3(c, inst->Prev, OPCODE_CMP, inst->I.SaturateMode, inst->I.DstReg, + srcreg(PROGRAM_TEMPORARY, tempreg), builtin_one, builtin_zero); + + rc_remove_instruction(inst); +} + +static void transform_SUB(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + inst->I.Opcode = OPCODE_ADD; + inst->I.SrcReg[1] = negate(inst->I.SrcReg[1]); +} + +static void transform_SWZ(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + inst->I.Opcode = OPCODE_MOV; +} + +static void transform_XPD(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + int tempreg = rc_find_free_temporary(c); + + emit2(c, inst->Prev, OPCODE_MUL, 0, dstreg(PROGRAM_TEMPORARY, tempreg), + swizzle(inst->I.SrcReg[0], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W), + swizzle(inst->I.SrcReg[1], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W)); + emit3(c, inst->Prev, OPCODE_MAD, inst->I.SaturateMode, inst->I.DstReg, + swizzle(inst->I.SrcReg[0], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W), + swizzle(inst->I.SrcReg[1], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W), + negate(srcreg(PROGRAM_TEMPORARY, tempreg))); + + rc_remove_instruction(inst); +} + + +/** + * Can be used as a transformation for @ref radeonClauseLocalTransform, + * no userData necessary. + * + * Eliminates the following ALU instructions: + * ABS, DPH, DST, FLR, LIT, LRP, POW, SGE, SLT, SUB, SWZ, XPD + * using: + * MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP + * + * Transforms RSQ to Radeon's native RSQ by explicitly setting + * absolute value. + * + * @note should be applicable to R300 and R500 fragment programs. + */ +GLboolean radeonTransformALU( + struct radeon_compiler * c, + struct rc_instruction* inst, + void* unused) +{ + switch(inst->I.Opcode) { + case OPCODE_ABS: transform_ABS(c, inst); return GL_TRUE; + case OPCODE_DPH: transform_DPH(c, inst); return GL_TRUE; + case OPCODE_DST: transform_DST(c, inst); return GL_TRUE; + case OPCODE_FLR: transform_FLR(c, inst); return GL_TRUE; + case OPCODE_LIT: transform_LIT(c, inst); return GL_TRUE; + case OPCODE_LRP: transform_LRP(c, inst); return GL_TRUE; + case OPCODE_POW: transform_POW(c, inst); return GL_TRUE; + case OPCODE_RSQ: transform_RSQ(c, inst); return GL_TRUE; + case OPCODE_SGE: transform_SGE(c, inst); return GL_TRUE; + case OPCODE_SLT: transform_SLT(c, inst); return GL_TRUE; + case OPCODE_SUB: transform_SUB(c, inst); return GL_TRUE; + case OPCODE_SWZ: transform_SWZ(c, inst); return GL_TRUE; + case OPCODE_XPD: transform_XPD(c, inst); return GL_TRUE; + default: + return GL_FALSE; + } +} + + +static void transform_r300_vertex_ABS(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* Note: r500 can take absolute values, but r300 cannot. */ + inst->I.Opcode = OPCODE_MAX; + inst->I.SrcReg[1] = inst->I.SrcReg[0]; + inst->I.SrcReg[1].Negate ^= NEGATE_XYZW; +} + +/** + * For use with radeonLocalTransform, this transforms non-native ALU + * instructions of the r300 up to r500 vertex engine. + */ +GLboolean r300_transform_vertex_alu( + struct radeon_compiler * c, + struct rc_instruction* inst, + void* unused) +{ + switch(inst->I.Opcode) { + case OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return GL_TRUE; + case OPCODE_DP3: transform_DP3(c, inst); return GL_TRUE; + case OPCODE_DPH: transform_DPH(c, inst); return GL_TRUE; + case OPCODE_FLR: transform_FLR(c, inst); return GL_TRUE; + case OPCODE_LRP: transform_LRP(c, inst); return GL_TRUE; + case OPCODE_SUB: transform_SUB(c, inst); return GL_TRUE; + case OPCODE_SWZ: transform_SWZ(c, inst); return GL_TRUE; + case OPCODE_XPD: transform_XPD(c, inst); return GL_TRUE; + default: + return GL_FALSE; + } +} + +static void sincos_constants(struct radeon_compiler* c, GLuint *constants) +{ + static const GLfloat SinCosConsts[2][4] = { + { + 1.273239545, // 4/PI + -0.405284735, // -4/(PI*PI) + 3.141592654, // PI + 0.2225 // weight + }, + { + 0.75, + 0.5, + 0.159154943, // 1/(2*PI) + 6.283185307 // 2*PI + } + }; + int i; + + for(i = 0; i < 2; ++i) + constants[i] = rc_constants_add_immediate_vec4(&c->Program.Constants, SinCosConsts[i]); +} + +/** + * Approximate sin(x), where x is clamped to (-pi/2, pi/2). + * + * MUL tmp.xy, src, { 4/PI, -4/(PI^2) } + * MAD tmp.x, tmp.y, |src|, tmp.x + * MAD tmp.y, tmp.x, |tmp.x|, -tmp.x + * MAD dest, tmp.y, weight, tmp.x + */ +static void sin_approx( + struct radeon_compiler* c, struct rc_instruction * after, + struct prog_dst_register dst, struct prog_src_register src, const GLuint* constants) +{ + GLuint tempreg = rc_find_free_temporary(c); + + emit2(c, after->Prev, OPCODE_MUL, 0, dstregtmpmask(tempreg, WRITEMASK_XY), + swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + srcreg(PROGRAM_CONSTANT, constants[0])); + emit3(c, after->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_X), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), + absolute(swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)); + emit3(c, after->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_Y), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + absolute(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), + negate(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X))); + emit3(c, after->Prev, OPCODE_MAD, 0, dst, + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), + swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)); +} + +/** + * Translate the trigonometric functions COS, SIN, and SCS + * using only the basic instructions + * MOV, ADD, MUL, MAD, FRC + */ +GLboolean radeonTransformTrigSimple(struct radeon_compiler* c, + struct rc_instruction* inst, + void* unused) +{ + if (inst->I.Opcode != OPCODE_COS && + inst->I.Opcode != OPCODE_SIN && + inst->I.Opcode != OPCODE_SCS) + return GL_FALSE; + + GLuint constants[2]; + GLuint tempreg = rc_find_free_temporary(c); + + sincos_constants(c, constants); + + if (inst->I.Opcode == OPCODE_COS) { + // MAD tmp.x, src, 1/(2*PI), 0.75 + // FRC tmp.x, tmp.x + // MAD tmp.z, tmp.x, 2*PI, -PI + emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), + swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)); + emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); + emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); + + sin_approx(c, inst->Prev, inst->I.DstReg, + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + constants); + } else if (inst->I.Opcode == OPCODE_SIN) { + emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), + swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y)); + emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); + emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); + + sin_approx(c, inst->Prev, inst->I.DstReg, + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + constants); + } else { + emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY), + swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W)); + emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_XY), + srcreg(PROGRAM_TEMPORARY, tempreg)); + emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY), + srcreg(PROGRAM_TEMPORARY, tempreg), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); + + struct prog_dst_register dst = inst->I.DstReg; + + dst.WriteMask = inst->I.DstReg.WriteMask & WRITEMASK_X; + sin_approx(c, inst->Prev, dst, + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + constants); + + dst.WriteMask = inst->I.DstReg.WriteMask & WRITEMASK_Y; + sin_approx(c, inst->Prev, dst, + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), + constants); + } + + rc_remove_instruction(inst); + + return GL_TRUE; +} + + +/** + * Transform the trigonometric functions COS, SIN, and SCS + * to include pre-scaling by 1/(2*PI) and taking the fractional + * part, so that the input to COS and SIN is always in the range [0,1). + * SCS is replaced by one COS and one SIN instruction. + * + * @warning This transformation implicitly changes the semantics of SIN and COS! + */ +GLboolean radeonTransformTrigScale(struct radeon_compiler* c, + struct rc_instruction* inst, + void* unused) +{ + if (inst->I.Opcode != OPCODE_COS && + inst->I.Opcode != OPCODE_SIN && + inst->I.Opcode != OPCODE_SCS) + return GL_FALSE; + + static const GLfloat RCP_2PI = 0.15915494309189535; + GLuint temp; + GLuint constant; + GLuint constant_swizzle; + + temp = rc_find_free_temporary(c); + constant = rc_constants_add_immediate_scalar(&c->Program.Constants, RCP_2PI, &constant_swizzle); + + emit2(c, inst->Prev, OPCODE_MUL, 0, dstregtmpmask(temp, WRITEMASK_W), + swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle)); + emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(temp, WRITEMASK_W), + srcreg(PROGRAM_TEMPORARY, temp)); + + if (inst->I.Opcode == OPCODE_COS) { + emit1(c, inst->Prev, OPCODE_COS, inst->I.SaturateMode, inst->I.DstReg, + srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); + } else if (inst->I.Opcode == OPCODE_SIN) { + emit1(c, inst->Prev, OPCODE_SIN, inst->I.SaturateMode, + inst->I.DstReg, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); + } else if (inst->I.Opcode == OPCODE_SCS) { + struct prog_dst_register moddst = inst->I.DstReg; + + if (inst->I.DstReg.WriteMask & WRITEMASK_X) { + moddst.WriteMask = WRITEMASK_X; + emit1(c, inst->Prev, OPCODE_COS, inst->I.SaturateMode, moddst, + srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); + } + if (inst->I.DstReg.WriteMask & WRITEMASK_Y) { + moddst.WriteMask = WRITEMASK_Y; + emit1(c, inst->Prev, OPCODE_SIN, inst->I.SaturateMode, moddst, + srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); + } + } + + rc_remove_instruction(inst); + + return GL_TRUE; +} + +/** + * Rewrite DDX/DDY instructions to properly work with r5xx shaders. + * The r5xx MDH/MDV instruction provides per-quad partial derivatives. + * It takes the form A*B+C. A and C are set by setting src0. B should be -1. + * + * @warning This explicitly changes the form of DDX and DDY! + */ + +GLboolean radeonTransformDeriv(struct radeon_compiler* c, + struct rc_instruction* inst, + void* unused) +{ + if (inst->I.Opcode != OPCODE_DDX && inst->I.Opcode != OPCODE_DDY) + return GL_FALSE; + + inst->I.SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE); + inst->I.SrcReg[1].Negate = NEGATE_XYZW; + + return GL_TRUE; +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h new file mode 100644 index 0000000000..147efec6fc --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RADEON_PROGRAM_ALU_H_ +#define __RADEON_PROGRAM_ALU_H_ + +#include "radeon_program.h" + +GLboolean radeonTransformALU( + struct radeon_compiler * c, + struct rc_instruction * inst, + void*); + +GLboolean r300_transform_vertex_alu( + struct radeon_compiler * c, + struct rc_instruction * inst, + void*); + +GLboolean radeonTransformTrigSimple( + struct radeon_compiler * c, + struct rc_instruction * inst, + void*); + +GLboolean radeonTransformTrigScale( + struct radeon_compiler * c, + struct rc_instruction * inst, + void*); + +GLboolean radeonTransformDeriv( + struct radeon_compiler * c, + struct rc_instruction * inst, + void*); + +#endif /* __RADEON_PROGRAM_ALU_H_ */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c new file mode 100644 index 0000000000..48616ac461 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c @@ -0,0 +1,958 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * @file + * + * Perform temporary register allocation and attempt to pair off instructions + * in RGB and Alpha pairs. Also attempts to optimize the TEX instruction + * vs. ALU instruction scheduling. + */ + +#include "radeon_program_pair.h" + +#include "memory_pool.h" +#include "radeon_compiler.h" +#include "shader/prog_print.h" + +#define error(fmt, args...) do { \ + rc_error(&s->Compiler->Base, "%s::%s(): " fmt "\n", \ + __FILE__, __FUNCTION__, ##args); \ +} while(0) + +struct pair_state_instruction { + struct prog_instruction Instruction; + GLuint IP; /**< Position of this instruction in original program */ + + GLuint IsTex:1; /**< Is a texture instruction */ + GLuint NeedRGB:1; /**< Needs the RGB ALU */ + GLuint NeedAlpha:1; /**< Needs the Alpha ALU */ + GLuint IsTranscendent:1; /**< Is a special transcendent instruction */ + + /** + * Number of (read and write) dependencies that must be resolved before + * this instruction can be scheduled. + */ + GLuint NumDependencies:5; + + /** + * Next instruction in the linked list of ready instructions. + */ + struct pair_state_instruction *NextReady; + + /** + * Values that this instruction writes + */ + struct reg_value *Values[4]; +}; + + +/** + * Used to keep track of which instructions read a value. + */ +struct reg_value_reader { + struct pair_state_instruction *Reader; + struct reg_value_reader *Next; +}; + +/** + * Used to keep track which values are stored in each component of a + * PROGRAM_TEMPORARY. + */ +struct reg_value { + struct pair_state_instruction *Writer; + struct reg_value *Next; /**< Pointer to the next value to be written to the same PROGRAM_TEMPORARY component */ + + /** + * Unordered linked list of instructions that read from this value. + */ + struct reg_value_reader *Readers; + + /** + * Number of readers of this value. This is calculated during @ref scan_instructions + * and continually decremented during code emission. + * When this count reaches zero, the instruction that writes the @ref Next value + * can be scheduled. + */ + GLuint NumReaders; +}; + +/** + * Used to translate a PROGRAM_INPUT or PROGRAM_TEMPORARY Mesa register + * to the proper hardware temporary. + */ +struct pair_register_translation { + GLuint Allocated:1; + GLuint HwIndex:8; + GLuint RefCount:23; /**< # of times this occurs in an unscheduled instruction SrcReg or DstReg */ + + /** + * Notes the value that is currently contained in each component + * (only used for PROGRAM_TEMPORARY registers). + */ + struct reg_value *Value[4]; +}; + +struct pair_state { + struct r300_fragment_program_compiler * Compiler; + const struct radeon_pair_handler *Handler; + GLboolean Verbose; + void *UserData; + + /** + * Translate Mesa registers to hardware registers + */ + struct pair_register_translation Inputs[FRAG_ATTRIB_MAX]; + struct pair_register_translation Temps[MAX_PROGRAM_TEMPS]; + + struct { + GLuint RefCount; /**< # of times this occurs in an unscheduled SrcReg or DstReg */ + } HwTemps[128]; + + /** + * Linked list of instructions that can be scheduled right now, + * based on which ALU/TEX resources they require. + */ + struct pair_state_instruction *ReadyFullALU; + struct pair_state_instruction *ReadyRGB; + struct pair_state_instruction *ReadyAlpha; + struct pair_state_instruction *ReadyTEX; +}; + + +static struct pair_register_translation *get_register(struct pair_state *s, GLuint file, GLuint index) +{ + switch(file) { + case PROGRAM_TEMPORARY: return &s->Temps[index]; + case PROGRAM_INPUT: return &s->Inputs[index]; + default: return 0; + } +} + +static void alloc_hw_reg(struct pair_state *s, GLuint file, GLuint index, GLuint hwindex) +{ + struct pair_register_translation *t = get_register(s, file, index); + ASSERT(!s->HwTemps[hwindex].RefCount); + ASSERT(!t->Allocated); + s->HwTemps[hwindex].RefCount = t->RefCount; + t->Allocated = 1; + t->HwIndex = hwindex; +} + +static GLuint get_hw_reg(struct pair_state *s, GLuint file, GLuint index) +{ + GLuint hwindex; + + struct pair_register_translation *t = get_register(s, file, index); + if (!t) { + error("get_hw_reg: %i[%i]\n", file, index); + return 0; + } + + if (t->Allocated) + return t->HwIndex; + + for(hwindex = 0; hwindex < s->Handler->MaxHwTemps; ++hwindex) + if (!s->HwTemps[hwindex].RefCount) + break; + + if (hwindex >= s->Handler->MaxHwTemps) { + error("Ran out of hardware temporaries"); + return 0; + } + + alloc_hw_reg(s, file, index, hwindex); + return hwindex; +} + + +static void deref_hw_reg(struct pair_state *s, GLuint hwindex) +{ + if (!s->HwTemps[hwindex].RefCount) { + error("Hwindex %i refcount error", hwindex); + return; + } + + s->HwTemps[hwindex].RefCount--; +} + +static void add_pairinst_to_list(struct pair_state_instruction **list, struct pair_state_instruction *pairinst) +{ + pairinst->NextReady = *list; + *list = pairinst; +} + +/** + * The given instruction has become ready. Link it into the ready + * instructions. + */ +static void instruction_ready(struct pair_state *s, struct pair_state_instruction *pairinst) +{ + if (s->Verbose) + _mesa_printf("instruction_ready(%i)\n", pairinst->IP); + + if (pairinst->IsTex) + add_pairinst_to_list(&s->ReadyTEX, pairinst); + else if (!pairinst->NeedAlpha) + add_pairinst_to_list(&s->ReadyRGB, pairinst); + else if (!pairinst->NeedRGB) + add_pairinst_to_list(&s->ReadyAlpha, pairinst); + else + add_pairinst_to_list(&s->ReadyFullALU, pairinst); +} + + +/** + * Finally rewrite ADD, MOV, MUL as the appropriate native instruction + * and reverse the order of arguments for CMP. + */ +static void final_rewrite(struct pair_state *s, struct prog_instruction *inst) +{ + struct prog_src_register tmp; + + switch(inst->Opcode) { + case OPCODE_ADD: + inst->SrcReg[2] = inst->SrcReg[1]; + inst->SrcReg[1].File = PROGRAM_BUILTIN; + inst->SrcReg[1].Swizzle = SWIZZLE_1111; + inst->SrcReg[1].Negate = NEGATE_NONE; + inst->Opcode = OPCODE_MAD; + break; + case OPCODE_CMP: + tmp = inst->SrcReg[2]; + inst->SrcReg[2] = inst->SrcReg[0]; + inst->SrcReg[0] = tmp; + break; + case OPCODE_MOV: + /* AMD say we should use CMP. + * However, when we transform + * KIL -r0; + * into + * CMP tmp, -r0, -r0, 0; + * KIL tmp; + * we get incorrect behaviour on R500 when r0 == 0.0. + * It appears that the R500 KIL hardware treats -0.0 as less + * than zero. + */ + inst->SrcReg[1].File = PROGRAM_BUILTIN; + inst->SrcReg[1].Swizzle = SWIZZLE_1111; + inst->SrcReg[2].File = PROGRAM_BUILTIN; + inst->SrcReg[2].Swizzle = SWIZZLE_0000; + inst->Opcode = OPCODE_MAD; + break; + case OPCODE_MUL: + inst->SrcReg[2].File = PROGRAM_BUILTIN; + inst->SrcReg[2].Swizzle = SWIZZLE_0000; + inst->Opcode = OPCODE_MAD; + break; + default: + /* nothing to do */ + break; + } +} + + +/** + * Classify an instruction according to which ALUs etc. it needs + */ +static void classify_instruction(struct pair_state *s, + struct pair_state_instruction *psi) +{ + psi->NeedRGB = (psi->Instruction.DstReg.WriteMask & WRITEMASK_XYZ) ? 1 : 0; + psi->NeedAlpha = (psi->Instruction.DstReg.WriteMask & WRITEMASK_W) ? 1 : 0; + + switch(psi->Instruction.Opcode) { + case OPCODE_ADD: + case OPCODE_CMP: + case OPCODE_DDX: + case OPCODE_DDY: + case OPCODE_FRC: + case OPCODE_MAD: + case OPCODE_MAX: + case OPCODE_MIN: + case OPCODE_MOV: + case OPCODE_MUL: + break; + case OPCODE_COS: + case OPCODE_EX2: + case OPCODE_LG2: + case OPCODE_RCP: + case OPCODE_RSQ: + case OPCODE_SIN: + psi->IsTranscendent = 1; + psi->NeedAlpha = 1; + break; + case OPCODE_DP4: + psi->NeedAlpha = 1; + /* fall through */ + case OPCODE_DP3: + psi->NeedRGB = 1; + break; + case OPCODE_KIL: + case OPCODE_TEX: + case OPCODE_TXB: + case OPCODE_TXP: + case OPCODE_END: + psi->IsTex = 1; + break; + default: + error("Unknown opcode %d\n", psi->Instruction.Opcode); + break; + } +} + + +/** + * Count which (input, temporary) register is read and written how often, + * and scan the instruction stream to find dependencies. + */ +static void scan_instructions(struct pair_state *s) +{ + struct rc_instruction *source; + GLuint ip; + + for(source = s->Compiler->Base.Program.Instructions.Next, ip = 0; + source != &s->Compiler->Base.Program.Instructions; + source = source->Next, ++ip) { + struct pair_state_instruction *pairinst = memory_pool_malloc(&s->Compiler->Base.Pool, sizeof(*pairinst)); + memset(pairinst, 0, sizeof(struct pair_state_instruction)); + + pairinst->Instruction = source->I; + pairinst->IP = ip; + final_rewrite(s, &pairinst->Instruction); + classify_instruction(s, pairinst); + + int nsrc = _mesa_num_inst_src_regs(pairinst->Instruction.Opcode); + int j; + for(j = 0; j < nsrc; j++) { + struct pair_register_translation *t = + get_register(s, pairinst->Instruction.SrcReg[j].File, pairinst->Instruction.SrcReg[j].Index); + if (!t) + continue; + + t->RefCount++; + + if (pairinst->Instruction.SrcReg[j].File == PROGRAM_TEMPORARY) { + int i; + for(i = 0; i < 4; ++i) { + GLuint swz = GET_SWZ(pairinst->Instruction.SrcReg[j].Swizzle, i); + if (swz >= 4) + continue; /* constant or NIL swizzle */ + if (!t->Value[swz]) + continue; /* this is an undefined read */ + + /* Do not add a dependency if this instruction + * also rewrites the value. The code below adds + * a dependency for the DstReg, which is a superset + * of the SrcReg dependency. */ + if (pairinst->Instruction.DstReg.File == PROGRAM_TEMPORARY && + pairinst->Instruction.DstReg.Index == pairinst->Instruction.SrcReg[j].Index && + GET_BIT(pairinst->Instruction.DstReg.WriteMask, swz)) + continue; + + struct reg_value_reader* r = memory_pool_malloc(&s->Compiler->Base.Pool, sizeof(*r)); + pairinst->NumDependencies++; + t->Value[swz]->NumReaders++; + r->Reader = pairinst; + r->Next = t->Value[swz]->Readers; + t->Value[swz]->Readers = r; + } + } + } + + int ndst = _mesa_num_inst_dst_regs(pairinst->Instruction.Opcode); + if (ndst) { + struct pair_register_translation *t = + get_register(s, pairinst->Instruction.DstReg.File, pairinst->Instruction.DstReg.Index); + if (t) { + t->RefCount++; + + if (pairinst->Instruction.DstReg.File == PROGRAM_TEMPORARY) { + int j; + for(j = 0; j < 4; ++j) { + if (!GET_BIT(pairinst->Instruction.DstReg.WriteMask, j)) + continue; + + struct reg_value* v = memory_pool_malloc(&s->Compiler->Base.Pool, sizeof(*v)); + memset(v, 0, sizeof(struct reg_value)); + v->Writer = pairinst; + if (t->Value[j]) { + pairinst->NumDependencies++; + t->Value[j]->Next = v; + } + t->Value[j] = v; + pairinst->Values[j] = v; + } + } + } + } + + if (s->Verbose) + _mesa_printf("scan(%i): NumDeps = %i\n", ip, pairinst->NumDependencies); + + if (!pairinst->NumDependencies) + instruction_ready(s, pairinst); + } + + /* Clear the PROGRAM_TEMPORARY state */ + int i, j; + for(i = 0; i < MAX_PROGRAM_TEMPS; ++i) { + for(j = 0; j < 4; ++j) + s->Temps[i].Value[j] = 0; + } +} + + +static void decrement_dependencies(struct pair_state *s, struct pair_state_instruction *pairinst) +{ + ASSERT(pairinst->NumDependencies > 0); + if (!--pairinst->NumDependencies) + instruction_ready(s, pairinst); +} + +/** + * Update the dependency tracking state based on what the instruction + * at the given IP does. + */ +static void commit_instruction(struct pair_state *s, struct pair_state_instruction *pairinst) +{ + struct prog_instruction *inst = &pairinst->Instruction; + + if (s->Verbose) + _mesa_printf("commit_instruction(%i)\n", pairinst->IP); + + if (inst->DstReg.File == PROGRAM_TEMPORARY) { + struct pair_register_translation *t = &s->Temps[inst->DstReg.Index]; + deref_hw_reg(s, t->HwIndex); + + int i; + for(i = 0; i < 4; ++i) { + if (!GET_BIT(inst->DstReg.WriteMask, i)) + continue; + + t->Value[i] = pairinst->Values[i]; + if (t->Value[i]->NumReaders) { + struct reg_value_reader *r; + for(r = pairinst->Values[i]->Readers; r; r = r->Next) + decrement_dependencies(s, r->Reader); + } else if (t->Value[i]->Next) { + /* This happens when the only reader writes + * the register at the same time */ + decrement_dependencies(s, t->Value[i]->Next->Writer); + } + } + } + + int nsrc = _mesa_num_inst_src_regs(inst->Opcode); + int i; + for(i = 0; i < nsrc; i++) { + struct pair_register_translation *t = get_register(s, inst->SrcReg[i].File, inst->SrcReg[i].Index); + if (!t) + continue; + + deref_hw_reg(s, get_hw_reg(s, inst->SrcReg[i].File, inst->SrcReg[i].Index)); + + if (inst->SrcReg[i].File != PROGRAM_TEMPORARY) + continue; + + int j; + for(j = 0; j < 4; ++j) { + GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); + if (swz >= 4) + continue; + if (!t->Value[swz]) + continue; + + /* Do not free a dependency if this instruction + * also rewrites the value. See scan_instructions. */ + if (inst->DstReg.File == PROGRAM_TEMPORARY && + inst->DstReg.Index == inst->SrcReg[i].Index && + GET_BIT(inst->DstReg.WriteMask, swz)) + continue; + + if (!--t->Value[swz]->NumReaders) { + if (t->Value[swz]->Next) + decrement_dependencies(s, t->Value[swz]->Next->Writer); + } + } + } +} + + +/** + * Emit all ready texture instructions in a single block. + * + * Emit as a single block to (hopefully) sample many textures in parallel, + * and to avoid hardware indirections on R300. + * + * In R500, we don't really know when the result of a texture instruction + * arrives. So allocate all destinations first, to make sure they do not + * arrive early and overwrite a texture coordinate we're going to use later + * in the block. + */ +static void emit_all_tex(struct pair_state *s) +{ + struct pair_state_instruction *readytex; + struct pair_state_instruction *pairinst; + + ASSERT(s->ReadyTEX); + + // Don't let the ready list change under us! + readytex = s->ReadyTEX; + s->ReadyTEX = 0; + + // Allocate destination hardware registers in one block to avoid conflicts. + for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) { + struct prog_instruction *inst = &pairinst->Instruction; + if (inst->Opcode != OPCODE_KIL) + get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); + } + + if (s->Compiler->Base.Debug) + _mesa_printf(" BEGIN_TEX\n"); + + if (s->Handler->BeginTexBlock) + s->Compiler->Base.Error = s->Compiler->Base.Error || !s->Handler->BeginTexBlock(s->UserData); + + for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) { + struct prog_instruction *inst = &pairinst->Instruction; + commit_instruction(s, pairinst); + + if (inst->Opcode != OPCODE_KIL) + inst->DstReg.Index = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); + inst->SrcReg[0].Index = get_hw_reg(s, inst->SrcReg[0].File, inst->SrcReg[0].Index); + + if (s->Compiler->Base.Debug) { + _mesa_printf(" "); + _mesa_print_instruction(inst); + fflush(stdout); + } + + struct radeon_pair_texture_instruction rpti; + + switch(inst->Opcode) { + case OPCODE_TEX: rpti.Opcode = RADEON_OPCODE_TEX; break; + case OPCODE_TXB: rpti.Opcode = RADEON_OPCODE_TXB; break; + case OPCODE_TXP: rpti.Opcode = RADEON_OPCODE_TXP; break; + default: + case OPCODE_KIL: rpti.Opcode = RADEON_OPCODE_KIL; break; + } + + rpti.DestIndex = inst->DstReg.Index; + rpti.WriteMask = inst->DstReg.WriteMask; + rpti.TexSrcUnit = inst->TexSrcUnit; + rpti.TexSrcTarget = inst->TexSrcTarget; + rpti.SrcIndex = inst->SrcReg[0].Index; + rpti.SrcSwizzle = inst->SrcReg[0].Swizzle; + + s->Compiler->Base.Error = s->Compiler->Base.Error || !s->Handler->EmitTex(s->UserData, &rpti); + } + + if (s->Compiler->Base.Debug) + _mesa_printf(" END_TEX\n"); +} + + +static int alloc_pair_source(struct pair_state *s, struct radeon_pair_instruction *pair, + struct prog_src_register src, GLboolean rgb, GLboolean alpha) +{ + int candidate = -1; + int candidate_quality = -1; + int i; + + if (!rgb && !alpha) + return 0; + + GLuint constant; + GLuint index; + + if (src.File == PROGRAM_TEMPORARY || src.File == PROGRAM_INPUT) { + constant = 0; + index = get_hw_reg(s, src.File, src.Index); + } else { + constant = 1; + index = src.Index; + } + + for(i = 0; i < 3; ++i) { + int q = 0; + if (rgb) { + if (pair->RGB.Src[i].Used) { + if (pair->RGB.Src[i].Constant != constant || + pair->RGB.Src[i].Index != index) + continue; + q++; + } + } + if (alpha) { + if (pair->Alpha.Src[i].Used) { + if (pair->Alpha.Src[i].Constant != constant || + pair->Alpha.Src[i].Index != index) + continue; + q++; + } + } + if (q > candidate_quality) { + candidate_quality = q; + candidate = i; + } + } + + if (candidate >= 0) { + if (rgb) { + pair->RGB.Src[candidate].Used = 1; + pair->RGB.Src[candidate].Constant = constant; + pair->RGB.Src[candidate].Index = index; + } + if (alpha) { + pair->Alpha.Src[candidate].Used = 1; + pair->Alpha.Src[candidate].Constant = constant; + pair->Alpha.Src[candidate].Index = index; + } + } + + return candidate; +} + +/** + * Fill the given ALU instruction's opcodes and source operands into the given pair, + * if possible. + */ +static GLboolean fill_instruction_into_pair( + struct pair_state *s, + struct radeon_pair_instruction *pair, + struct pair_state_instruction *pairinst) +{ + struct prog_instruction *inst = &pairinst->Instruction; + + ASSERT(!pairinst->NeedRGB || pair->RGB.Opcode == OPCODE_NOP); + ASSERT(!pairinst->NeedAlpha || pair->Alpha.Opcode == OPCODE_NOP); + + if (pairinst->NeedRGB) { + if (pairinst->IsTranscendent) + pair->RGB.Opcode = OPCODE_REPL_ALPHA; + else + pair->RGB.Opcode = inst->Opcode; + if (inst->SaturateMode == SATURATE_ZERO_ONE) + pair->RGB.Saturate = 1; + } + if (pairinst->NeedAlpha) { + pair->Alpha.Opcode = inst->Opcode; + if (inst->SaturateMode == SATURATE_ZERO_ONE) + pair->Alpha.Saturate = 1; + } + + int nargs = _mesa_num_inst_src_regs(inst->Opcode); + int i; + + /* Special case for DDX/DDY (MDH/MDV). */ + if (inst->Opcode == OPCODE_DDX || inst->Opcode == OPCODE_DDY) { + if (pair->RGB.Src[0].Used || pair->Alpha.Src[0].Used) + return GL_FALSE; + else + nargs++; + } + + for(i = 0; i < nargs; ++i) { + int source; + if (pairinst->NeedRGB && !pairinst->IsTranscendent) { + GLboolean srcrgb = GL_FALSE; + GLboolean srcalpha = GL_FALSE; + int j; + for(j = 0; j < 3; ++j) { + GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); + if (swz < 3) + srcrgb = GL_TRUE; + else if (swz < 4) + srcalpha = GL_TRUE; + } + source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha); + if (source < 0) + return GL_FALSE; + pair->RGB.Arg[i].Source = source; + pair->RGB.Arg[i].Swizzle = inst->SrcReg[i].Swizzle & 0x1ff; + pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs; + pair->RGB.Arg[i].Negate = !!(inst->SrcReg[i].Negate & (NEGATE_X | NEGATE_Y | NEGATE_Z)); + } + if (pairinst->NeedAlpha) { + GLboolean srcrgb = GL_FALSE; + GLboolean srcalpha = GL_FALSE; + GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, pairinst->IsTranscendent ? 0 : 3); + if (swz < 3) + srcrgb = GL_TRUE; + else if (swz < 4) + srcalpha = GL_TRUE; + source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha); + if (source < 0) + return GL_FALSE; + pair->Alpha.Arg[i].Source = source; + pair->Alpha.Arg[i].Swizzle = swz; + pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs; + pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & NEGATE_W); + } + } + + return GL_TRUE; +} + + +/** + * Fill in the destination register information. + * + * This is split from filling in source registers because we want + * to avoid allocating hardware temporaries for destinations until + * we are absolutely certain that we're going to emit a certain + * instruction pairing. + */ +static void fill_dest_into_pair( + struct pair_state *s, + struct radeon_pair_instruction *pair, + struct pair_state_instruction *pairinst) +{ + struct prog_instruction *inst = &pairinst->Instruction; + + if (inst->DstReg.File == PROGRAM_OUTPUT) { + if (inst->DstReg.Index == s->Compiler->OutputColor) { + pair->RGB.OutputWriteMask |= inst->DstReg.WriteMask & WRITEMASK_XYZ; + pair->Alpha.OutputWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); + } else if (inst->DstReg.Index == s->Compiler->OutputDepth) { + pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); + } + } else { + GLuint hwindex = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); + if (pairinst->NeedRGB) { + pair->RGB.DestIndex = hwindex; + pair->RGB.WriteMask |= inst->DstReg.WriteMask & WRITEMASK_XYZ; + } + if (pairinst->NeedAlpha) { + pair->Alpha.DestIndex = hwindex; + pair->Alpha.WriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); + } + } +} + + +/** + * Find a good ALU instruction or pair of ALU instruction and emit it. + * + * Prefer emitting full ALU instructions, so that when we reach a point + * where no full ALU instruction can be emitted, we have more candidates + * for RGB/Alpha pairing. + */ +static void emit_alu(struct pair_state *s) +{ + struct radeon_pair_instruction pair; + struct pair_state_instruction *psi; + + if (s->ReadyFullALU || !(s->ReadyRGB && s->ReadyAlpha)) { + if (s->ReadyFullALU) { + psi = s->ReadyFullALU; + s->ReadyFullALU = s->ReadyFullALU->NextReady; + } else if (s->ReadyRGB) { + psi = s->ReadyRGB; + s->ReadyRGB = s->ReadyRGB->NextReady; + } else { + psi = s->ReadyAlpha; + s->ReadyAlpha = s->ReadyAlpha->NextReady; + } + + _mesa_bzero(&pair, sizeof(pair)); + fill_instruction_into_pair(s, &pair, psi); + fill_dest_into_pair(s, &pair, psi); + commit_instruction(s, psi); + } else { + struct pair_state_instruction **prgb; + struct pair_state_instruction **palpha; + + /* Some pairings might fail because they require too + * many source slots; try all possible pairings if necessary */ + for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) { + for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) { + struct pair_state_instruction * psirgb = *prgb; + struct pair_state_instruction * psialpha = *palpha; + _mesa_bzero(&pair, sizeof(pair)); + fill_instruction_into_pair(s, &pair, psirgb); + if (!fill_instruction_into_pair(s, &pair, psialpha)) + continue; + *prgb = (*prgb)->NextReady; + *palpha = (*palpha)->NextReady; + fill_dest_into_pair(s, &pair, psirgb); + fill_dest_into_pair(s, &pair, psialpha); + commit_instruction(s, psirgb); + commit_instruction(s, psialpha); + goto success; + } + } + + /* No success in pairing; just take the first RGB instruction */ + psi = s->ReadyRGB; + s->ReadyRGB = s->ReadyRGB->NextReady; + + _mesa_bzero(&pair, sizeof(pair)); + fill_instruction_into_pair(s, &pair, psi); + fill_dest_into_pair(s, &pair, psi); + commit_instruction(s, psi); + success: ; + } + + if (s->Compiler->Base.Debug) + radeonPrintPairInstruction(&pair); + + s->Compiler->Base.Error = s->Compiler->Base.Error || !s->Handler->EmitPaired(s->UserData, &pair); +} + +/* Callback function for assigning input registers to hardware registers */ +static void alloc_helper(void * data, unsigned input, unsigned hwreg) +{ + struct pair_state * s = data; + alloc_hw_reg(s, PROGRAM_INPUT, input, hwreg); +} + +void radeonPairProgram( + struct r300_fragment_program_compiler * compiler, + const struct radeon_pair_handler* handler, void *userdata) +{ + struct pair_state s; + + _mesa_bzero(&s, sizeof(s)); + s.Compiler = compiler; + s.Handler = handler; + s.UserData = userdata; + s.Verbose = GL_FALSE && s.Compiler->Base.Debug; + + if (s.Compiler->Base.Debug) + _mesa_printf("Emit paired program\n"); + + scan_instructions(&s); + s.Compiler->AllocateHwInputs(s.Compiler, &alloc_helper, &s); + + while(!s.Compiler->Base.Error && + (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) { + if (s.ReadyTEX) + emit_all_tex(&s); + + while(s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha) + emit_alu(&s); + } + + if (s.Compiler->Base.Debug) + _mesa_printf(" END\n"); +} + + +static void print_pair_src(int i, struct radeon_pair_instruction_source* src) +{ + _mesa_printf(" Src%i = %s[%i]", i, src->Constant ? "CNST" : "TEMP", src->Index); +} + +static const char* opcode_string(GLuint opcode) +{ + if (opcode == OPCODE_REPL_ALPHA) + return "SOP"; + else + return _mesa_opcode_string(opcode); +} + +static int num_pairinst_args(GLuint opcode) +{ + if (opcode == OPCODE_REPL_ALPHA) + return 0; + else + return _mesa_num_inst_src_regs(opcode); +} + +static char swizzle_char(GLuint swz) +{ + switch(swz) { + case SWIZZLE_X: return 'x'; + case SWIZZLE_Y: return 'y'; + case SWIZZLE_Z: return 'z'; + case SWIZZLE_W: return 'w'; + case SWIZZLE_ZERO: return '0'; + case SWIZZLE_ONE: return '1'; + case SWIZZLE_NIL: return '_'; + default: return '?'; + } +} + +void radeonPrintPairInstruction(struct radeon_pair_instruction *inst) +{ + int nargs; + int i; + + _mesa_printf(" RGB: "); + for(i = 0; i < 3; ++i) { + if (inst->RGB.Src[i].Used) + print_pair_src(i, inst->RGB.Src + i); + } + _mesa_printf("\n"); + _mesa_printf(" Alpha:"); + for(i = 0; i < 3; ++i) { + if (inst->Alpha.Src[i].Used) + print_pair_src(i, inst->Alpha.Src + i); + } + _mesa_printf("\n"); + + _mesa_printf(" %s%s", opcode_string(inst->RGB.Opcode), inst->RGB.Saturate ? "_SAT" : ""); + if (inst->RGB.WriteMask) + _mesa_printf(" TEMP[%i].%s%s%s", inst->RGB.DestIndex, + (inst->RGB.WriteMask & 1) ? "x" : "", + (inst->RGB.WriteMask & 2) ? "y" : "", + (inst->RGB.WriteMask & 4) ? "z" : ""); + if (inst->RGB.OutputWriteMask) + _mesa_printf(" COLOR.%s%s%s", + (inst->RGB.OutputWriteMask & 1) ? "x" : "", + (inst->RGB.OutputWriteMask & 2) ? "y" : "", + (inst->RGB.OutputWriteMask & 4) ? "z" : ""); + nargs = num_pairinst_args(inst->RGB.Opcode); + for(i = 0; i < nargs; ++i) { + const char* abs = inst->RGB.Arg[i].Abs ? "|" : ""; + const char* neg = inst->RGB.Arg[i].Negate ? "-" : ""; + _mesa_printf(", %s%sSrc%i.%c%c%c%s", neg, abs, inst->RGB.Arg[i].Source, + swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 0)), + swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 1)), + swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 2)), + abs); + } + _mesa_printf("\n"); + + _mesa_printf(" %s%s", opcode_string(inst->Alpha.Opcode), inst->Alpha.Saturate ? "_SAT" : ""); + if (inst->Alpha.WriteMask) + _mesa_printf(" TEMP[%i].w", inst->Alpha.DestIndex); + if (inst->Alpha.OutputWriteMask) + _mesa_printf(" COLOR.w"); + if (inst->Alpha.DepthWriteMask) + _mesa_printf(" DEPTH.w"); + nargs = num_pairinst_args(inst->Alpha.Opcode); + for(i = 0; i < nargs; ++i) { + const char* abs = inst->Alpha.Arg[i].Abs ? "|" : ""; + const char* neg = inst->Alpha.Arg[i].Negate ? "-" : ""; + _mesa_printf(", %s%sSrc%i.%c%s", neg, abs, inst->Alpha.Arg[i].Source, + swizzle_char(inst->Alpha.Arg[i].Swizzle), abs); + } + _mesa_printf("\n"); +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h new file mode 100644 index 0000000000..ff76178551 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h @@ -0,0 +1,143 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RADEON_PROGRAM_PAIR_H_ +#define __RADEON_PROGRAM_PAIR_H_ + +#include "radeon_program.h" + +struct r300_fragment_program_compiler; + + +/** + * Represents a paired instruction, as found in R300 and R500 + * fragment programs. + */ +struct radeon_pair_instruction_source { + GLuint Index:8; + GLuint Constant:1; + GLuint Used:1; +}; + +struct radeon_pair_instruction_rgb { + GLuint Opcode:8; + GLuint DestIndex:8; + GLuint WriteMask:3; + GLuint OutputWriteMask:3; + GLuint Saturate:1; + + struct radeon_pair_instruction_source Src[3]; + + struct { + GLuint Source:2; + GLuint Swizzle:9; + GLuint Abs:1; + GLuint Negate:1; + } Arg[3]; +}; + +struct radeon_pair_instruction_alpha { + GLuint Opcode:8; + GLuint DestIndex:8; + GLuint WriteMask:1; + GLuint OutputWriteMask:1; + GLuint DepthWriteMask:1; + GLuint Saturate:1; + + struct radeon_pair_instruction_source Src[3]; + + struct { + GLuint Source:2; + GLuint Swizzle:3; + GLuint Abs:1; + GLuint Negate:1; + } Arg[3]; +}; + +struct radeon_pair_instruction { + struct radeon_pair_instruction_rgb RGB; + struct radeon_pair_instruction_alpha Alpha; +}; + + +enum { + RADEON_OPCODE_TEX = 0, + RADEON_OPCODE_TXB, + RADEON_OPCODE_TXP, + RADEON_OPCODE_KIL +}; + +struct radeon_pair_texture_instruction { + GLuint Opcode:2; /**< one of RADEON_OPCODE_xxx */ + + GLuint DestIndex:8; + GLuint WriteMask:4; + + GLuint TexSrcUnit:5; + GLuint TexSrcTarget:3; + + GLuint SrcIndex:8; + GLuint SrcSwizzle:12; +}; + + +/** + * + */ +struct radeon_pair_handler { + /** + * Write a paired instruction to the hardware. + * + * @return GL_FALSE on error. + */ + GLboolean (*EmitPaired)(void*, struct radeon_pair_instruction*); + + /** + * Write a texture instruction to the hardware. + * Register indices have already been rewritten to the allocated + * hardware register numbers. + * + * @return GL_FALSE on error. + */ + GLboolean (*EmitTex)(void*, struct radeon_pair_texture_instruction*); + + /** + * Called before a block of contiguous, independent texture + * instructions is emitted. + */ + GLboolean (*BeginTexBlock)(void*); + + unsigned MaxHwTemps; +}; + +void radeonPairProgram( + struct r300_fragment_program_compiler * compiler, + const struct radeon_pair_handler*, void *userdata); + +void radeonPrintPairInstruction(struct radeon_pair_instruction *inst); + +#endif /* __RADEON_PROGRAM_PAIR_H_ */ |