summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/r300/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/r300/compiler')
-rw-r--r--src/mesa/drivers/dri/r300/compiler/Makefile86
-rwxr-xr-xsrc/mesa/drivers/dri/r300/compiler/SConscript41
-rw-r--r--src/mesa/drivers/dri/r300/compiler/memory_pool.c97
-rw-r--r--src/mesa/drivers/dri/r300/compiler/memory_pool.h80
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog.c256
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog.h44
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c380
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c228
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h38
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c209
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c651
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c180
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog.c344
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog.h50
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c495
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_code.c173
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_code.h265
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler.c309
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler.h117
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c281
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h70
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c284
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c102
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c331
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.h30
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c474
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h12
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c472
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h246
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_optimize.c446
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c280
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c501
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c256
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program.c183
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program.h233
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c975
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h63
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h150
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c84
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h126
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_print.c300
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c389
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_tex.h39
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_swizzle.h57
44 files changed, 10427 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile
new file mode 100644
index 0000000000..ff3801dc67
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/Makefile
@@ -0,0 +1,86 @@
+# src/mesa/drivers/dri/r300/compiler/Makefile
+
+TOP = ../../../../../..
+include $(TOP)/configs/current
+
+LIBNAME = r300compiler
+
+C_SOURCES = \
+ radeon_code.c \
+ radeon_compiler.c \
+ radeon_emulate_branches.c \
+ radeon_emulate_loops.c \
+ radeon_program.c \
+ radeon_program_print.c \
+ radeon_opcodes.c \
+ radeon_program_alu.c \
+ radeon_program_pair.c \
+ radeon_program_tex.c \
+ radeon_pair_translate.c \
+ radeon_pair_schedule.c \
+ radeon_pair_regalloc.c \
+ radeon_dataflow.c \
+ radeon_dataflow_deadcode.c \
+ radeon_dataflow_swizzles.c \
+ radeon_optimize.c \
+ r3xx_fragprog.c \
+ r300_fragprog.c \
+ r300_fragprog_swizzle.c \
+ r300_fragprog_emit.c \
+ r500_fragprog.c \
+ r500_fragprog_emit.c \
+ r3xx_vertprog.c \
+ r3xx_vertprog_dump.c \
+ \
+ memory_pool.c
+
+
+### Basic defines ###
+
+OBJECTS = $(C_SOURCES:.c=.o) \
+ $(CPP_SOURCES:.cpp=.o) \
+ $(ASM_SOURCES:.S=.o)
+
+INCLUDES = \
+ -I. \
+ -I$(TOP)/include \
+ -I$(TOP)/src/mesa \
+
+
+##### TARGETS #####
+
+default: depend lib$(LIBNAME).a
+
+lib$(LIBNAME).a: $(OBJECTS) Makefile $(TOP)/configs/current
+ $(MKLIB) -o $(LIBNAME) -static $(OBJECTS)
+
+depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(SYMLINKS)
+ rm -f depend
+ touch depend
+ $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) 2> /dev/null
+
+# Emacs tags
+tags:
+ etags `find . -name \*.[ch]` `find ../include`
+
+# Remove .o and backup files
+clean:
+ rm -f $(OBJECTS) lib$(LIBNAME).a depend depend.bak
+
+# Dummy target
+install:
+ @echo -n ""
+
+##### RULES #####
+
+.c.o:
+ $(CC) -c $(INCLUDES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@
+
+.cpp.o:
+ $(CXX) -c $(INCLUDES) $(CXXFLAGS) $(LIBRARY_DEFINES) $< -o $@
+
+.S.o:
+ $(CC) -c $(INCLUDES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@
+
+
+sinclude depend
diff --git a/src/mesa/drivers/dri/r300/compiler/SConscript b/src/mesa/drivers/dri/r300/compiler/SConscript
new file mode 100755
index 0000000000..50d9cdb7f2
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/SConscript
@@ -0,0 +1,41 @@
+Import('*')
+
+env = env.Clone()
+env.Append(CPPPATH = '#/include')
+env.Append(CPPPATH = '#/src/mesa')
+
+# temporary fix
+env['CFLAGS'] = str(env['CFLAGS']).replace('-Werror=declaration-after-statement', '')
+
+r300compiler = env.ConvenienceLibrary(
+ target = 'r300compiler',
+ source = [
+ 'radeon_code.c',
+ 'radeon_compiler.c',
+ 'radeon_program.c',
+ 'radeon_program_print.c',
+ 'radeon_opcodes.c',
+ 'radeon_program_alu.c',
+ 'radeon_program_pair.c',
+ 'radeon_program_tex.c',
+ 'radeon_pair_translate.c',
+ 'radeon_pair_schedule.c',
+ 'radeon_pair_regalloc.c',
+ 'radeon_optimize.c',
+ 'radeon_emulate_branches.c',
+ 'radeon_emulate_loops.c',
+ 'radeon_dataflow.c',
+ 'radeon_dataflow_deadcode.c',
+ 'radeon_dataflow_swizzles.c',
+ 'r3xx_fragprog.c',
+ 'r300_fragprog.c',
+ 'r300_fragprog_swizzle.c',
+ 'r300_fragprog_emit.c',
+ 'r500_fragprog.c',
+ 'r500_fragprog_emit.c',
+ 'r3xx_vertprog.c',
+ 'r3xx_vertprog_dump.c',
+ 'memory_pool.c',
+ ])
+
+Return('r300compiler')
diff --git a/src/mesa/drivers/dri/r300/compiler/memory_pool.c b/src/mesa/drivers/dri/r300/compiler/memory_pool.c
new file mode 100644
index 0000000000..76c7c60d8f
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/memory_pool.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "memory_pool.h"
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+
+#define POOL_LARGE_ALLOC 4096
+#define POOL_ALIGN 4
+
+
+struct memory_block {
+ struct memory_block * next;
+};
+
+void memory_pool_init(struct memory_pool * pool)
+{
+ memset(pool, 0, sizeof(struct memory_pool));
+}
+
+
+void memory_pool_destroy(struct memory_pool * pool)
+{
+ while(pool->blocks) {
+ struct memory_block * block = pool->blocks;
+ pool->blocks = block->next;
+ free(block);
+ }
+}
+
+static void refill_pool(struct memory_pool * pool)
+{
+ unsigned int blocksize = pool->total_allocated;
+ struct memory_block * newblock;
+
+ if (!blocksize)
+ blocksize = 2*POOL_LARGE_ALLOC;
+
+ newblock = (struct memory_block*)malloc(blocksize);
+ newblock->next = pool->blocks;
+ pool->blocks = newblock;
+
+ pool->head = (unsigned char*)(newblock + 1);
+ pool->end = ((unsigned char*)newblock) + blocksize;
+ pool->total_allocated += blocksize;
+}
+
+
+void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes)
+{
+ if (bytes < POOL_LARGE_ALLOC) {
+ void * ptr;
+
+ if (pool->head + bytes > pool->end)
+ refill_pool(pool);
+
+ assert(pool->head + bytes <= pool->end);
+
+ ptr = pool->head;
+
+ pool->head += bytes;
+ pool->head = (unsigned char*)(((unsigned long)pool->head + POOL_ALIGN - 1) & ~(POOL_ALIGN - 1));
+
+ return ptr;
+ } else {
+ struct memory_block * block = (struct memory_block*)malloc(bytes + sizeof(struct memory_block));
+
+ block->next = pool->blocks;
+ pool->blocks = block;
+
+ return (block + 1);
+ }
+}
+
+
diff --git a/src/mesa/drivers/dri/r300/compiler/memory_pool.h b/src/mesa/drivers/dri/r300/compiler/memory_pool.h
new file mode 100644
index 0000000000..42344d0e3b
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/memory_pool.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef MEMORY_POOL_H
+#define MEMORY_POOL_H
+
+struct memory_block;
+
+/**
+ * Provides a pool of memory that can quickly be allocated from, at the
+ * cost of being unable to explicitly free one of the allocated blocks.
+ * Instead, the entire pool can be freed at once.
+ *
+ * The idea is to allow one to quickly allocate a flexible amount of
+ * memory during operations like shader compilation while avoiding
+ * reference counting headaches.
+ */
+struct memory_pool {
+ unsigned char * head;
+ unsigned char * end;
+ unsigned int total_allocated;
+ struct memory_block * blocks;
+};
+
+
+void memory_pool_init(struct memory_pool * pool);
+void memory_pool_destroy(struct memory_pool * pool);
+void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes);
+
+
+/**
+ * Generic helper for growing an array that has separate size/count
+ * and reserved counters to accomodate up to num new element.
+ *
+ * type * Array;
+ * unsigned int Size;
+ * unsigned int Reserved;
+ *
+ * memory_pool_array_reserve(pool, type, Array, Size, Reserved, k);
+ * assert(Size + k < Reserved);
+ *
+ * \note Size is not changed by this macro.
+ *
+ * \warning Array, Size, Reserved have to be lvalues and may be evaluated
+ * several times.
+ */
+#define memory_pool_array_reserve(pool, type, array, size, reserved, num) do { \
+ unsigned int _num = (num); \
+ if ((size) + _num > (reserved)) { \
+ unsigned int newreserve = (reserved) * 2; \
+ type * newarray; \
+ if (newreserve < _num) \
+ newreserve = 4 * _num; /* arbitrary heuristic */ \
+ newarray = memory_pool_malloc((pool), newreserve * sizeof(type)); \
+ memcpy(newarray, (array), (size) * sizeof(type)); \
+ (array) = newarray; \
+ (reserved) = newreserve; \
+ } \
+} while(0)
+
+#endif /* MEMORY_POOL_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c
new file mode 100644
index 0000000000..794db8335a
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c
@@ -0,0 +1,256 @@
+/*
+ * Copyright (C) 2005 Ben Skeggs.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "r300_fragprog.h"
+
+#include <stdio.h>
+
+#include "../r300_reg.h"
+
+/* just some random things... */
+void r300FragmentProgramDump(struct rX00_fragment_program_code *c)
+{
+ struct r300_fragment_program_code *code = &c->code.r300;
+ int n, i, j;
+ static int pc = 0;
+
+ fprintf(stderr, "pc=%d*************************************\n", pc++);
+
+ fprintf(stderr, "Hardware program\n");
+ fprintf(stderr, "----------------\n");
+
+ for (n = 0; n <= (code->config & 3); n++) {
+ uint32_t code_addr = code->code_addr[3 - (code->config & 3) + n];
+ int alu_offset = (code_addr & R300_ALU_START_MASK) >> R300_ALU_START_SHIFT;
+ int alu_end = (code_addr & R300_ALU_SIZE_MASK) >> R300_ALU_SIZE_SHIFT;
+ int tex_offset = (code_addr & R300_TEX_START_MASK) >> R300_TEX_START_SHIFT;
+ int tex_end = (code_addr & R300_TEX_SIZE_MASK) >> R300_TEX_SIZE_SHIFT;
+
+ fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, "
+ "alu_end: %d, tex_end: %d (code_addr: %08x)\n", n,
+ alu_offset, tex_offset, alu_end, tex_end, code_addr);
+
+ if (n > 0 || (code->config & R300_PFS_CNTL_FIRST_NODE_HAS_TEX)) {
+ fprintf(stderr, " TEX:\n");
+ for (i = tex_offset;
+ i <= tex_offset + tex_end;
+ ++i) {
+ const char *instr;
+
+ switch ((code->tex.
+ inst[i] >> R300_TEX_INST_SHIFT) &
+ 15) {
+ case R300_TEX_OP_LD:
+ instr = "TEX";
+ break;
+ case R300_TEX_OP_KIL:
+ instr = "KIL";
+ break;
+ case R300_TEX_OP_TXP:
+ instr = "TXP";
+ break;
+ case R300_TEX_OP_TXB:
+ instr = "TXB";
+ break;
+ default:
+ instr = "UNKNOWN";
+ }
+
+ fprintf(stderr,
+ " %s t%i, %c%i, texture[%i] (%08x)\n",
+ instr,
+ (code->tex.
+ inst[i] >> R300_DST_ADDR_SHIFT) & 31,
+ 't',
+ (code->tex.
+ inst[i] >> R300_SRC_ADDR_SHIFT) & 31,
+ (code->tex.
+ inst[i] & R300_TEX_ID_MASK) >>
+ R300_TEX_ID_SHIFT,
+ code->tex.inst[i]);
+ }
+ }
+
+ for (i = alu_offset;
+ i <= alu_offset + alu_end; ++i) {
+ char srcc[3][10], dstc[20];
+ char srca[3][10], dsta[20];
+ char argc[3][20];
+ char arga[3][20];
+ char flags[5], tmp[10];
+
+ for (j = 0; j < 3; ++j) {
+ int regc = code->alu.inst[i].rgb_addr >> (j * 6);
+ int rega = code->alu.inst[i].alpha_addr >> (j * 6);
+
+ sprintf(srcc[j], "%c%i",
+ (regc & 32) ? 'c' : 't', regc & 31);
+ sprintf(srca[j], "%c%i",
+ (rega & 32) ? 'c' : 't', rega & 31);
+ }
+
+ dstc[0] = 0;
+ sprintf(flags, "%s%s%s",
+ (code->alu.inst[i].
+ rgb_addr & R300_ALU_DSTC_REG_X) ? "x" : "",
+ (code->alu.inst[i].
+ rgb_addr & R300_ALU_DSTC_REG_Y) ? "y" : "",
+ (code->alu.inst[i].
+ rgb_addr & R300_ALU_DSTC_REG_Z) ? "z" : "");
+ if (flags[0] != 0) {
+ sprintf(dstc, "t%i.%s ",
+ (code->alu.inst[i].
+ rgb_addr >> R300_ALU_DSTC_SHIFT) & 31,
+ flags);
+ }
+ sprintf(flags, "%s%s%s",
+ (code->alu.inst[i].
+ rgb_addr & R300_ALU_DSTC_OUTPUT_X) ? "x" : "",
+ (code->alu.inst[i].
+ rgb_addr & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "",
+ (code->alu.inst[i].
+ rgb_addr & R300_ALU_DSTC_OUTPUT_Z) ? "z" : "");
+ if (flags[0] != 0) {
+ sprintf(tmp, "o%i.%s",
+ (code->alu.inst[i].
+ rgb_addr >> 29) & 3,
+ flags);
+ strcat(dstc, tmp);
+ }
+
+ dsta[0] = 0;
+ if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_REG) {
+ sprintf(dsta, "t%i.w ",
+ (code->alu.inst[i].
+ alpha_addr >> R300_ALU_DSTA_SHIFT) & 31);
+ }
+ if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_OUTPUT) {
+ sprintf(tmp, "o%i.w ",
+ (code->alu.inst[i].
+ alpha_addr >> 25) & 3);
+ strcat(dsta, tmp);
+ }
+ if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_DEPTH) {
+ strcat(dsta, "Z");
+ }
+
+ fprintf(stderr,
+ "%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n"
+ " w: %3s %3s %3s -> %-20s (%08x)\n", i,
+ srcc[0], srcc[1], srcc[2], dstc,
+ code->alu.inst[i].rgb_addr, srca[0], srca[1],
+ srca[2], dsta, code->alu.inst[i].alpha_addr);
+
+ for (j = 0; j < 3; ++j) {
+ int regc = code->alu.inst[i].rgb_inst >> (j * 7);
+ int rega = code->alu.inst[i].alpha_inst >> (j * 7);
+ int d;
+ char buf[20];
+
+ d = regc & 31;
+ if (d < 12) {
+ switch (d % 4) {
+ case R300_ALU_ARGC_SRC0C_XYZ:
+ sprintf(buf, "%s.xyz",
+ srcc[d / 4]);
+ break;
+ case R300_ALU_ARGC_SRC0C_XXX:
+ sprintf(buf, "%s.xxx",
+ srcc[d / 4]);
+ break;
+ case R300_ALU_ARGC_SRC0C_YYY:
+ sprintf(buf, "%s.yyy",
+ srcc[d / 4]);
+ break;
+ case R300_ALU_ARGC_SRC0C_ZZZ:
+ sprintf(buf, "%s.zzz",
+ srcc[d / 4]);
+ break;
+ }
+ } else if (d < 15) {
+ sprintf(buf, "%s.www", srca[d - 12]);
+ } else if (d == 20) {
+ sprintf(buf, "0.0");
+ } else if (d == 21) {
+ sprintf(buf, "1.0");
+ } else if (d == 22) {
+ sprintf(buf, "0.5");
+ } else if (d >= 23 && d < 32) {
+ d -= 23;
+ switch (d / 3) {
+ case 0:
+ sprintf(buf, "%s.yzx",
+ srcc[d % 3]);
+ break;
+ case 1:
+ sprintf(buf, "%s.zxy",
+ srcc[d % 3]);
+ break;
+ case 2:
+ sprintf(buf, "%s.Wzy",
+ srcc[d % 3]);
+ break;
+ }
+ } else {
+ sprintf(buf, "%i", d);
+ }
+
+ sprintf(argc[j], "%s%s%s%s",
+ (regc & 32) ? "-" : "",
+ (regc & 64) ? "|" : "",
+ buf, (regc & 64) ? "|" : "");
+
+ d = rega & 31;
+ if (d < 9) {
+ sprintf(buf, "%s.%c", srcc[d / 3],
+ 'x' + (char)(d % 3));
+ } else if (d < 12) {
+ sprintf(buf, "%s.w", srca[d - 9]);
+ } else if (d == 16) {
+ sprintf(buf, "0.0");
+ } else if (d == 17) {
+ sprintf(buf, "1.0");
+ } else if (d == 18) {
+ sprintf(buf, "0.5");
+ } else {
+ sprintf(buf, "%i", d);
+ }
+
+ sprintf(arga[j], "%s%s%s%s",
+ (rega & 32) ? "-" : "",
+ (rega & 64) ? "|" : "",
+ buf, (rega & 64) ? "|" : "");
+ }
+
+ fprintf(stderr, " xyz: %8s %8s %8s op: %08x\n"
+ " w: %8s %8s %8s op: %08x\n",
+ argc[0], argc[1], argc[2],
+ code->alu.inst[i].rgb_inst, arga[0], arga[1],
+ arga[2], code->alu.inst[i].alpha_inst);
+ }
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h
new file mode 100644
index 0000000000..8b755703be
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2005 Ben Skeggs.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/*
+ * Authors:
+ * Ben Skeggs <darktama@iinet.net.au>
+ * Jerome Glisse <j.glisse@gmail.com>
+ */
+#ifndef __R300_FRAGPROG_H_
+#define __R300_FRAGPROG_H_
+
+#include "radeon_compiler.h"
+#include "radeon_program.h"
+
+
+extern void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler);
+
+extern void r300FragmentProgramDump(struct rX00_fragment_program_code *c);
+
+#endif
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
new file mode 100644
index 0000000000..b27a683c39
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
@@ -0,0 +1,380 @@
+/*
+ * Copyright (C) 2005 Ben Skeggs.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * \file
+ *
+ * Emit the r300_fragment_program_code that can be understood by the hardware.
+ * Input is a pre-transformed radeon_program.
+ *
+ * \author Ben Skeggs <darktama@iinet.net.au>
+ *
+ * \author Jerome Glisse <j.glisse@gmail.com>
+ *
+ * \todo FogOption
+ */
+
+#include "r300_fragprog.h"
+
+#include "../r300_reg.h"
+
+#include "radeon_program_pair.h"
+#include "r300_fragprog_swizzle.h"
+
+
+struct r300_emit_state {
+ struct r300_fragment_program_compiler * compiler;
+
+ unsigned current_node : 2;
+ unsigned node_first_tex : 8;
+ unsigned node_first_alu : 8;
+ uint32_t node_flags;
+};
+
+#define PROG_CODE \
+ struct r300_fragment_program_compiler *c = emit->compiler; \
+ struct r300_fragment_program_code *code = &c->code->code.r300
+
+#define error(fmt, args...) do { \
+ rc_error(&c->Base, "%s::%s(): " fmt "\n", \
+ __FILE__, __FUNCTION__, ##args); \
+ } while(0)
+
+
+/**
+ * Mark a temporary register as used.
+ */
+static void use_temporary(struct r300_fragment_program_code *code, unsigned int index)
+{
+ if (index > code->pixsize)
+ code->pixsize = index;
+}
+
+static unsigned int use_source(struct r300_fragment_program_code* code, struct radeon_pair_instruction_source src)
+{
+ if (src.File == RC_FILE_CONSTANT) {
+ return src.Index | (1 << 5);
+ } else if (src.File == RC_FILE_TEMPORARY) {
+ use_temporary(code, src.Index);
+ return src.Index;
+ }
+
+ return 0;
+}
+
+
+static unsigned int translate_rgb_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode)
+{
+ switch(opcode) {
+ case RC_OPCODE_CMP: return R300_ALU_OUTC_CMP;
+ case RC_OPCODE_DP3: return R300_ALU_OUTC_DP3;
+ case RC_OPCODE_DP4: return R300_ALU_OUTC_DP4;
+ case RC_OPCODE_FRC: return R300_ALU_OUTC_FRC;
+ default:
+ error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name);
+ /* fall through */
+ case RC_OPCODE_NOP:
+ /* fall through */
+ case RC_OPCODE_MAD: return R300_ALU_OUTC_MAD;
+ case RC_OPCODE_MAX: return R300_ALU_OUTC_MAX;
+ case RC_OPCODE_MIN: return R300_ALU_OUTC_MIN;
+ case RC_OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA;
+ }
+}
+
+static unsigned int translate_alpha_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode)
+{
+ switch(opcode) {
+ case RC_OPCODE_CMP: return R300_ALU_OUTA_CMP;
+ case RC_OPCODE_DP3: return R300_ALU_OUTA_DP4;
+ case RC_OPCODE_DP4: return R300_ALU_OUTA_DP4;
+ case RC_OPCODE_EX2: return R300_ALU_OUTA_EX2;
+ case RC_OPCODE_FRC: return R300_ALU_OUTA_FRC;
+ case RC_OPCODE_LG2: return R300_ALU_OUTA_LG2;
+ default:
+ error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name);
+ /* fall through */
+ case RC_OPCODE_NOP:
+ /* fall through */
+ case RC_OPCODE_MAD: return R300_ALU_OUTA_MAD;
+ case RC_OPCODE_MAX: return R300_ALU_OUTA_MAX;
+ case RC_OPCODE_MIN: return R300_ALU_OUTA_MIN;
+ case RC_OPCODE_RCP: return R300_ALU_OUTA_RCP;
+ case RC_OPCODE_RSQ: return R300_ALU_OUTA_RSQ;
+ }
+}
+
+/**
+ * Emit one paired ALU instruction.
+ */
+static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* inst)
+{
+ PROG_CODE;
+
+ if (code->alu.length >= R300_PFS_MAX_ALU_INST) {
+ error("Too many ALU instructions");
+ return 0;
+ }
+
+ int ip = code->alu.length++;
+ int j;
+
+ code->alu.inst[ip].rgb_inst = translate_rgb_opcode(c, inst->RGB.Opcode);
+ code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode);
+
+ for(j = 0; j < 3; ++j) {
+ unsigned int src = use_source(code, inst->RGB.Src[j]);
+ code->alu.inst[ip].rgb_addr |= src << (6*j);
+
+ src = use_source(code, inst->Alpha.Src[j]);
+ code->alu.inst[ip].alpha_addr |= src << (6*j);
+
+ unsigned int arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle);
+ arg |= inst->RGB.Arg[j].Abs << 6;
+ arg |= inst->RGB.Arg[j].Negate << 5;
+ code->alu.inst[ip].rgb_inst |= arg << (7*j);
+
+ arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle);
+ arg |= inst->Alpha.Arg[j].Abs << 6;
+ arg |= inst->Alpha.Arg[j].Negate << 5;
+ code->alu.inst[ip].alpha_inst |= arg << (7*j);
+ }
+
+ if (inst->RGB.Saturate)
+ code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP;
+ if (inst->Alpha.Saturate)
+ code->alu.inst[ip].alpha_inst |= R300_ALU_OUTA_CLAMP;
+
+ if (inst->RGB.WriteMask) {
+ use_temporary(code, inst->RGB.DestIndex);
+ code->alu.inst[ip].rgb_addr |=
+ (inst->RGB.DestIndex << R300_ALU_DSTC_SHIFT) |
+ (inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT);
+ }
+ if (inst->RGB.OutputWriteMask) {
+ code->alu.inst[ip].rgb_addr |=
+ (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT) |
+ R300_RGB_TARGET(inst->RGB.Target);
+ emit->node_flags |= R300_RGBA_OUT;
+ }
+
+ if (inst->Alpha.WriteMask) {
+ use_temporary(code, inst->Alpha.DestIndex);
+ code->alu.inst[ip].alpha_addr |=
+ (inst->Alpha.DestIndex << R300_ALU_DSTA_SHIFT) |
+ R300_ALU_DSTA_REG;
+ }
+ if (inst->Alpha.OutputWriteMask) {
+ code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_OUTPUT |
+ R300_ALPHA_TARGET(inst->Alpha.Target);
+ emit->node_flags |= R300_RGBA_OUT;
+ }
+ if (inst->Alpha.DepthWriteMask) {
+ code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH;
+ emit->node_flags |= R300_W_OUT;
+ c->code->writes_depth = 1;
+ }
+
+ return 1;
+}
+
+
+/**
+ * Finish the current node without advancing to the next one.
+ */
+static int finish_node(struct r300_emit_state * emit)
+{
+ struct r300_fragment_program_compiler * c = emit->compiler;
+ struct r300_fragment_program_code *code = &emit->compiler->code->code.r300;
+
+ if (code->alu.length == emit->node_first_alu) {
+ /* Generate a single NOP for this node */
+ struct rc_pair_instruction inst;
+ memset(&inst, 0, sizeof(inst));
+ if (!emit_alu(emit, &inst))
+ return 0;
+ }
+
+ unsigned alu_offset = emit->node_first_alu;
+ unsigned alu_end = code->alu.length - alu_offset - 1;
+ unsigned tex_offset = emit->node_first_tex;
+ unsigned tex_end = code->tex.length - tex_offset - 1;
+
+ if (code->tex.length == emit->node_first_tex) {
+ if (emit->current_node > 0) {
+ error("Node %i has no TEX instructions", emit->current_node);
+ return 0;
+ }
+
+ tex_end = 0;
+ } else {
+ if (emit->current_node == 0)
+ code->config |= R300_PFS_CNTL_FIRST_NODE_HAS_TEX;
+ }
+
+ /* Write the config register.
+ * Note: The order in which the words for each node are written
+ * is not correct here and needs to be fixed up once we're entirely
+ * done
+ *
+ * Also note that the register specification from AMD is slightly
+ * incorrect in its description of this register. */
+ code->code_addr[emit->current_node] =
+ (alu_offset << R300_ALU_START_SHIFT) |
+ (alu_end << R300_ALU_SIZE_SHIFT) |
+ (tex_offset << R300_TEX_START_SHIFT) |
+ (tex_end << R300_TEX_SIZE_SHIFT) |
+ emit->node_flags;
+
+ return 1;
+}
+
+
+/**
+ * Begin a block of texture instructions.
+ * Create the necessary indirection.
+ */
+static int begin_tex(struct r300_emit_state * emit)
+{
+ PROG_CODE;
+
+ if (code->alu.length == emit->node_first_alu &&
+ code->tex.length == emit->node_first_tex) {
+ return 1;
+ }
+
+ if (emit->current_node == 3) {
+ error("Too many texture indirections");
+ return 0;
+ }
+
+ if (!finish_node(emit))
+ return 0;
+
+ emit->current_node++;
+ emit->node_first_tex = code->tex.length;
+ emit->node_first_alu = code->alu.length;
+ emit->node_flags = 0;
+ return 1;
+}
+
+
+static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst)
+{
+ PROG_CODE;
+
+ if (code->tex.length >= R300_PFS_MAX_TEX_INST) {
+ error("Too many TEX instructions");
+ return 0;
+ }
+
+ unsigned int unit = inst->U.I.TexSrcUnit;
+ unsigned int dest = inst->U.I.DstReg.Index;
+ unsigned int opcode;
+
+ switch(inst->U.I.Opcode) {
+ case RC_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break;
+ case RC_OPCODE_TEX: opcode = R300_TEX_OP_LD; break;
+ case RC_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break;
+ case RC_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break;
+ default:
+ error("Unknown texture opcode %s", rc_get_opcode_info(inst->U.I.Opcode)->Name);
+ return 0;
+ }
+
+ if (inst->U.I.Opcode == RC_OPCODE_KIL) {
+ unit = 0;
+ dest = 0;
+ } else {
+ use_temporary(code, dest);
+ }
+
+ use_temporary(code, inst->U.I.SrcReg[0].Index);
+
+ code->tex.inst[code->tex.length++] =
+ (inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT) |
+ (dest << R300_DST_ADDR_SHIFT) |
+ (unit << R300_TEX_ID_SHIFT) |
+ (opcode << R300_TEX_INST_SHIFT);
+ return 1;
+}
+
+
+/**
+ * Final compilation step: Turn the intermediate radeon_program into
+ * machine-readable instructions.
+ */
+void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler)
+{
+ struct r300_emit_state emit;
+ struct r300_fragment_program_code *code = &compiler->code->code.r300;
+
+ memset(&emit, 0, sizeof(emit));
+ emit.compiler = compiler;
+
+ memset(code, 0, sizeof(struct r300_fragment_program_code));
+
+ for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
+ inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
+ inst = inst->Next) {
+ if (inst->Type == RC_INSTRUCTION_NORMAL) {
+ if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
+ begin_tex(&emit);
+ continue;
+ }
+
+ emit_tex(&emit, inst);
+ } else {
+ emit_alu(&emit, &inst->U.P);
+ }
+ }
+
+ if (code->pixsize >= compiler->Base.max_temp_regs)
+ rc_error(&compiler->Base, "Too many hardware temporaries used.\n");
+
+ if (compiler->Base.Error)
+ return;
+
+ /* Finish the program */
+ finish_node(&emit);
+
+ code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */
+ code->code_offset =
+ (0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) |
+ ((code->alu.length-1) << R300_PFS_CNTL_ALU_END_SHIFT) |
+ (0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) |
+ ((code->tex.length ? code->tex.length-1 : 0) << R300_PFS_CNTL_TEX_END_SHIFT);
+
+ if (emit.current_node < 3) {
+ int shift = 3 - emit.current_node;
+ int i;
+ for(i = emit.current_node; i >= 0; --i)
+ code->code_addr[shift + i] = code->code_addr[i];
+ for(i = 0; i < shift; ++i)
+ code->code_addr[i] = 0;
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
new file mode 100644
index 0000000000..5d5de2f1b2
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
@@ -0,0 +1,228 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * @file
+ * Utilities to deal with the somewhat odd restriction on R300 fragment
+ * program swizzles.
+ */
+
+#include "r300_fragprog_swizzle.h"
+
+#include <stdio.h>
+
+#include "../r300_reg.h"
+#include "radeon_compiler.h"
+
+#define MAKE_SWZ3(x, y, z) (RC_MAKE_SWIZZLE(RC_SWIZZLE_##x, RC_SWIZZLE_##y, RC_SWIZZLE_##z, RC_SWIZZLE_ZERO))
+
+struct swizzle_data {
+ unsigned int hash; /**< swizzle value this matches */
+ unsigned int base; /**< base value for hw swizzle */
+ unsigned int stride; /**< difference in base between arg0/1/2 */
+};
+
+static const struct swizzle_data native_swizzles[] = {
+ {MAKE_SWZ3(X, Y, Z), R300_ALU_ARGC_SRC0C_XYZ, 4},
+ {MAKE_SWZ3(X, X, X), R300_ALU_ARGC_SRC0C_XXX, 4},
+ {MAKE_SWZ3(Y, Y, Y), R300_ALU_ARGC_SRC0C_YYY, 4},
+ {MAKE_SWZ3(Z, Z, Z), R300_ALU_ARGC_SRC0C_ZZZ, 4},
+ {MAKE_SWZ3(W, W, W), R300_ALU_ARGC_SRC0A, 1},
+ {MAKE_SWZ3(Y, Z, X), R300_ALU_ARGC_SRC0C_YZX, 1},
+ {MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1},
+ {MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1},
+ {MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0},
+ {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0},
+ {MAKE_SWZ3(HALF, HALF, HALF), R300_ALU_ARGC_HALF, 0}
+};
+
+static const int num_native_swizzles = sizeof(native_swizzles)/sizeof(native_swizzles[0]);
+
+
+/**
+ * Find a native RGB swizzle that matches the given swizzle.
+ * Returns 0 if none found.
+ */
+static const struct swizzle_data* lookup_native_swizzle(unsigned int swizzle)
+{
+ int i, comp;
+
+ for(i = 0; i < num_native_swizzles; ++i) {
+ const struct swizzle_data* sd = &native_swizzles[i];
+ for(comp = 0; comp < 3; ++comp) {
+ unsigned int swz = GET_SWZ(swizzle, comp);
+ if (swz == RC_SWIZZLE_UNUSED)
+ continue;
+ if (swz != GET_SWZ(sd->hash, comp))
+ break;
+ }
+ if (comp == 3)
+ return sd;
+ }
+
+ return 0;
+}
+
+
+/**
+ * Check whether the given instruction supports the swizzle and negate
+ * combinations in the given source register.
+ */
+static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
+{
+ if (reg.Abs)
+ reg.Negate = RC_MASK_NONE;
+
+ if (opcode == RC_OPCODE_KIL ||
+ opcode == RC_OPCODE_TEX ||
+ opcode == RC_OPCODE_TXB ||
+ opcode == RC_OPCODE_TXP) {
+ int j;
+
+ if (reg.Abs || reg.Negate)
+ return 0;
+
+ for(j = 0; j < 4; ++j) {
+ unsigned int swz = GET_SWZ(reg.Swizzle, j);
+ if (swz == RC_SWIZZLE_UNUSED)
+ continue;
+ if (swz != j)
+ return 0;
+ }
+
+ return 1;
+ }
+
+ unsigned int relevant = 0;
+ int j;
+
+ for(j = 0; j < 3; ++j)
+ if (GET_SWZ(reg.Swizzle, j) != RC_SWIZZLE_UNUSED)
+ relevant |= 1 << j;
+
+ if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant))
+ return 0;
+
+ if (!lookup_native_swizzle(reg.Swizzle))
+ return 0;
+
+ return 1;
+}
+
+
+static void r300_swizzle_split(
+ struct rc_src_register src, unsigned int mask,
+ struct rc_swizzle_split * split)
+{
+ if (src.Abs)
+ src.Negate = RC_MASK_NONE;
+
+ split->NumPhases = 0;
+
+ while(mask) {
+ const struct swizzle_data *best_swizzle = 0;
+ unsigned int best_matchcount = 0;
+ unsigned int best_matchmask = 0;
+ int i, comp;
+
+ for(i = 0; i < num_native_swizzles; ++i) {
+ const struct swizzle_data *sd = &native_swizzles[i];
+ unsigned int matchcount = 0;
+ unsigned int matchmask = 0;
+ for(comp = 0; comp < 3; ++comp) {
+ if (!GET_BIT(mask, comp))
+ continue;
+ unsigned int swz = GET_SWZ(src.Swizzle, comp);
+ if (swz == RC_SWIZZLE_UNUSED)
+ continue;
+ if (swz == GET_SWZ(sd->hash, comp)) {
+ /* check if the negate bit of current component
+ * is the same for already matched components */
+ if (matchmask && (!!(src.Negate & matchmask) != !!(src.Negate & (1 << comp))))
+ continue;
+
+ matchcount++;
+ matchmask |= 1 << comp;
+ }
+ }
+ if (matchcount > best_matchcount) {
+ best_swizzle = sd;
+ best_matchcount = matchcount;
+ best_matchmask = matchmask;
+ if (matchmask == (mask & RC_MASK_XYZ))
+ break;
+ }
+ }
+
+ if (mask & RC_MASK_W)
+ best_matchmask |= RC_MASK_W;
+
+ split->Phase[split->NumPhases++] = best_matchmask;
+ mask &= ~best_matchmask;
+ }
+}
+
+struct rc_swizzle_caps r300_swizzle_caps = {
+ .IsNative = r300_swizzle_is_native,
+ .Split = r300_swizzle_split
+};
+
+
+/**
+ * Translate an RGB (XYZ) swizzle into the hardware code for the given
+ * instruction source.
+ */
+unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle)
+{
+ const struct swizzle_data* sd = lookup_native_swizzle(swizzle);
+
+ if (!sd) {
+ fprintf(stderr, "Not a native swizzle: %08x\n", swizzle);
+ return 0;
+ }
+
+ return sd->base + src*sd->stride;
+}
+
+
+/**
+ * Translate an Alpha (W) swizzle into the hardware code for the given
+ * instruction source.
+ */
+unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle)
+{
+ if (swizzle < 3)
+ return swizzle + 3*src;
+
+ switch(swizzle) {
+ case RC_SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src;
+ case RC_SWIZZLE_ONE: return R300_ALU_ARGA_ONE;
+ case RC_SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO;
+ case RC_SWIZZLE_HALF: return R300_ALU_ARGA_HALF;
+ default: return R300_ALU_ARGA_ONE;
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h
new file mode 100644
index 0000000000..118476af13
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __R300_FRAGPROG_SWIZZLE_H_
+#define __R300_FRAGPROG_SWIZZLE_H_
+
+#include "radeon_swizzle.h"
+
+extern struct rc_swizzle_caps r300_swizzle_caps;
+
+unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle);
+unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle);
+
+#endif /* __R300_FRAGPROG_SWIZZLE_H_ */
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
new file mode 100644
index 0000000000..38312658d6
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
@@ -0,0 +1,209 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_compiler.h"
+
+#include <stdio.h>
+
+#include "radeon_dataflow.h"
+#include "radeon_emulate_branches.h"
+#include "radeon_emulate_loops.h"
+#include "radeon_program_alu.h"
+#include "radeon_program_tex.h"
+#include "r300_fragprog.h"
+#include "r300_fragprog_swizzle.h"
+#include "r500_fragprog.h"
+
+
+static void dataflow_outputs_mark_use(void * userdata, void * data,
+ void (*callback)(void *, unsigned int, unsigned int))
+{
+ struct r300_fragment_program_compiler * c = userdata;
+ callback(data, c->OutputColor[0], RC_MASK_XYZW);
+ callback(data, c->OutputColor[1], RC_MASK_XYZW);
+ callback(data, c->OutputColor[2], RC_MASK_XYZW);
+ callback(data, c->OutputColor[3], RC_MASK_XYZW);
+ callback(data, c->OutputDepth, RC_MASK_W);
+}
+
+static void rewrite_depth_out(struct r300_fragment_program_compiler * c)
+{
+ struct rc_instruction *rci;
+
+ for (rci = c->Base.Program.Instructions.Next; rci != &c->Base.Program.Instructions; rci = rci->Next) {
+ struct rc_sub_instruction * inst = &rci->U.I;
+
+ if (inst->DstReg.File != RC_FILE_OUTPUT || inst->DstReg.Index != c->OutputDepth)
+ continue;
+
+ if (inst->DstReg.WriteMask & RC_MASK_Z) {
+ inst->DstReg.WriteMask = RC_MASK_W;
+ } else {
+ inst->DstReg.WriteMask = 0;
+ continue;
+ }
+
+ switch (inst->Opcode) {
+ case RC_OPCODE_FRC:
+ case RC_OPCODE_MOV:
+ inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]);
+ break;
+ case RC_OPCODE_ADD:
+ case RC_OPCODE_MAX:
+ case RC_OPCODE_MIN:
+ case RC_OPCODE_MUL:
+ inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]);
+ inst->SrcReg[1] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[1]);
+ break;
+ case RC_OPCODE_CMP:
+ case RC_OPCODE_MAD:
+ inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]);
+ inst->SrcReg[1] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[1]);
+ inst->SrcReg[2] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[2]);
+ break;
+ default:
+ // Scalar instructions needn't be reswizzled
+ break;
+ }
+ }
+}
+
+static void debug_program_log(struct r300_fragment_program_compiler* c, const char * where)
+{
+ if (c->Base.Debug) {
+ fprintf(stderr, "Fragment Program: %s\n", where);
+ rc_print_program(&c->Base.Program);
+ }
+}
+
+void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
+{
+ rewrite_depth_out(c);
+
+ debug_program_log(c, "before compilation");
+
+ /* XXX Ideally this should be done only for r3xx, but since
+ * we don't have branching support for r5xx, we use the emulation
+ * on all chipsets. */
+
+ if(c->Base.is_r500){
+ rc_emulate_loops(&c->Base, R500_PFS_MAX_INST);
+ }
+ else{
+ rc_emulate_loops(&c->Base, R300_PFS_MAX_ALU_INST);
+ }
+ debug_program_log(c, "after emulate loops");
+
+ rc_emulate_branches(&c->Base);
+
+ debug_program_log(c, "after emulate branches");
+
+ if (c->Base.is_r500) {
+ struct radeon_program_transformation transformations[] = {
+ { &r500_transform_IF, 0 },
+ { &radeonTransformALU, 0 },
+ { &radeonTransformDeriv, 0 },
+ { &radeonTransformTrigScale, 0 }
+ };
+ radeonLocalTransform(&c->Base, 4, transformations);
+
+ debug_program_log(c, "after native rewrite part 1");
+
+ c->Base.SwizzleCaps = &r500_swizzle_caps;
+ } else {
+ struct radeon_program_transformation transformations[] = {
+ { &radeonTransformALU, 0 },
+ { &radeonTransformTrigSimple, 0 }
+ };
+ radeonLocalTransform(&c->Base, 2, transformations);
+
+ debug_program_log(c, "after native rewrite part 1");
+
+ c->Base.SwizzleCaps = &r300_swizzle_caps;
+ }
+
+ /* Run the common transformations too.
+ * Remember, lowering comes last! */
+ struct radeon_program_transformation common_transformations[] = {
+ { &radeonTransformTEX, c },
+ };
+ radeonLocalTransform(&c->Base, 1, common_transformations);
+
+ common_transformations[0].function = &radeonTransformALU;
+ radeonLocalTransform(&c->Base, 1, common_transformations);
+
+ if (c->Base.Error)
+ return;
+
+ debug_program_log(c, "after native rewrite part 2");
+
+ rc_dataflow_deadcode(&c->Base, &dataflow_outputs_mark_use, c);
+ if (c->Base.Error)
+ return;
+
+ debug_program_log(c, "after deadcode");
+
+ rc_optimize(&c->Base);
+
+ debug_program_log(c, "after dataflow optimize");
+
+ rc_dataflow_swizzles(&c->Base);
+ if (c->Base.Error)
+ return;
+
+ debug_program_log(c, "after dataflow passes");
+
+ rc_pair_translate(c);
+ if (c->Base.Error)
+ return;
+
+ debug_program_log(c, "after pair translate");
+
+ rc_pair_schedule(c);
+ if (c->Base.Error)
+ return;
+
+ debug_program_log(c, "after pair scheduling");
+
+ rc_pair_regalloc(c, c->Base.max_temp_regs);
+
+ if (c->Base.Error)
+ return;
+
+ debug_program_log(c, "after register allocation");
+
+ if (c->Base.is_r500) {
+ r500BuildFragmentProgramHwCode(c);
+ } else {
+ r300BuildFragmentProgramHwCode(c);
+ }
+
+ rc_constants_copy(&c->code->constants, &c->Base.Program.Constants);
+
+ if (c->Base.Debug) {
+ if (c->Base.is_r500) {
+ r500FragmentProgramDump(c->code);
+ } else {
+ r300FragmentProgramDump(c->code);
+ }
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
new file mode 100644
index 0000000000..507b2e532f
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
@@ -0,0 +1,651 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_compiler.h"
+
+#include <stdio.h>
+
+#include "../r300_reg.h"
+
+#include "radeon_dataflow.h"
+#include "radeon_program_alu.h"
+#include "radeon_swizzle.h"
+#include "radeon_emulate_branches.h"
+
+/*
+ * Take an already-setup and valid source then swizzle it appropriately to
+ * obtain a constant ZERO or ONE source.
+ */
+#define __CONST(x, y) \
+ (PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[x]), \
+ t_swizzle(y), \
+ t_swizzle(y), \
+ t_swizzle(y), \
+ t_swizzle(y), \
+ t_src_class(vpi->SrcReg[x].File), \
+ RC_MASK_NONE) | (vpi->SrcReg[x].RelAddr << 4))
+
+
+static unsigned long t_dst_mask(unsigned int mask)
+{
+ /* RC_MASK_* is equivalent to VSF_FLAG_* */
+ return mask & RC_MASK_XYZW;
+}
+
+static unsigned long t_dst_class(rc_register_file file)
+{
+ switch (file) {
+ default:
+ fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file);
+ /* fall-through */
+ case RC_FILE_TEMPORARY:
+ return PVS_DST_REG_TEMPORARY;
+ case RC_FILE_OUTPUT:
+ return PVS_DST_REG_OUT;
+ case RC_FILE_ADDRESS:
+ return PVS_DST_REG_A0;
+ }
+}
+
+static unsigned long t_dst_index(struct r300_vertex_program_code *vp,
+ struct rc_dst_register *dst)
+{
+ if (dst->File == RC_FILE_OUTPUT)
+ return vp->outputs[dst->Index];
+
+ return dst->Index;
+}
+
+static unsigned long t_src_class(rc_register_file file)
+{
+ switch (file) {
+ default:
+ fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file);
+ /* fall-through */
+ case RC_FILE_NONE:
+ case RC_FILE_TEMPORARY:
+ return PVS_SRC_REG_TEMPORARY;
+ case RC_FILE_INPUT:
+ return PVS_SRC_REG_INPUT;
+ case RC_FILE_CONSTANT:
+ return PVS_SRC_REG_CONSTANT;
+ }
+}
+
+static int t_src_conflict(struct rc_src_register a, struct rc_src_register b)
+{
+ unsigned long aclass = t_src_class(a.File);
+ unsigned long bclass = t_src_class(b.File);
+
+ if (aclass != bclass)
+ return 0;
+ if (aclass == PVS_SRC_REG_TEMPORARY)
+ return 0;
+
+ if (a.RelAddr || b.RelAddr)
+ return 1;
+ if (a.Index != b.Index)
+ return 1;
+
+ return 0;
+}
+
+static inline unsigned long t_swizzle(unsigned int swizzle)
+{
+ /* this is in fact a NOP as the Mesa RC_SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
+ return swizzle;
+}
+
+static unsigned long t_src_index(struct r300_vertex_program_code *vp,
+ struct rc_src_register *src)
+{
+ if (src->File == RC_FILE_INPUT) {
+ assert(vp->inputs[src->Index] != -1);
+ return vp->inputs[src->Index];
+ } else {
+ if (src->Index < 0) {
+ fprintf(stderr,
+ "negative offsets for indirect addressing do not work.\n");
+ return 0;
+ }
+ return src->Index;
+ }
+}
+
+/* these two functions should probably be merged... */
+
+static unsigned long t_src(struct r300_vertex_program_code *vp,
+ struct rc_src_register *src)
+{
+ /* src->Negate uses the RC_MASK_ flags from program_instruction.h,
+ * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
+ */
+ return PVS_SRC_OPERAND(t_src_index(vp, src),
+ t_swizzle(GET_SWZ(src->Swizzle, 0)),
+ t_swizzle(GET_SWZ(src->Swizzle, 1)),
+ t_swizzle(GET_SWZ(src->Swizzle, 2)),
+ t_swizzle(GET_SWZ(src->Swizzle, 3)),
+ t_src_class(src->File),
+ src->Negate) | (src->RelAddr << 4);
+}
+
+static unsigned long t_src_scalar(struct r300_vertex_program_code *vp,
+ struct rc_src_register *src)
+{
+ /* src->Negate uses the RC_MASK_ flags from program_instruction.h,
+ * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
+ */
+ return PVS_SRC_OPERAND(t_src_index(vp, src),
+ t_swizzle(GET_SWZ(src->Swizzle, 0)),
+ t_swizzle(GET_SWZ(src->Swizzle, 0)),
+ t_swizzle(GET_SWZ(src->Swizzle, 0)),
+ t_swizzle(GET_SWZ(src->Swizzle, 0)),
+ t_src_class(src->File),
+ src->Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
+ (src->RelAddr << 4);
+}
+
+static int valid_dst(struct r300_vertex_program_code *vp,
+ struct rc_dst_register *dst)
+{
+ if (dst->File == RC_FILE_OUTPUT && vp->outputs[dst->Index] == -1) {
+ return 0;
+ } else if (dst->File == RC_FILE_ADDRESS) {
+ assert(dst->Index == 0);
+ }
+
+ return 1;
+}
+
+static void ei_vector1(struct r300_vertex_program_code *vp,
+ unsigned int hw_opcode,
+ struct rc_sub_instruction *vpi,
+ unsigned int * inst)
+{
+ inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
+ 0,
+ 0,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src(vp, &vpi->SrcReg[0]);
+ inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
+ inst[3] = __CONST(0, RC_SWIZZLE_ZERO);
+}
+
+static void ei_vector2(struct r300_vertex_program_code *vp,
+ unsigned int hw_opcode,
+ struct rc_sub_instruction *vpi,
+ unsigned int * inst)
+{
+ inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
+ 0,
+ 0,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src(vp, &vpi->SrcReg[0]);
+ inst[2] = t_src(vp, &vpi->SrcReg[1]);
+ inst[3] = __CONST(1, RC_SWIZZLE_ZERO);
+}
+
+static void ei_math1(struct r300_vertex_program_code *vp,
+ unsigned int hw_opcode,
+ struct rc_sub_instruction *vpi,
+ unsigned int * inst)
+{
+ inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
+ 1,
+ 0,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]);
+ inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
+ inst[3] = __CONST(0, RC_SWIZZLE_ZERO);
+}
+
+static void ei_lit(struct r300_vertex_program_code *vp,
+ struct rc_sub_instruction *vpi,
+ unsigned int * inst)
+{
+ //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
+
+ inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX,
+ 1,
+ 0,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ /* NOTE: Users swizzling might not work. */
+ inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X
+ t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W
+ PVS_SRC_SELECT_FORCE_0, // Z
+ t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
+ t_src_class(vpi->SrcReg[0].File),
+ vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
+ (vpi->SrcReg[0].RelAddr << 4);
+ inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
+ t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W
+ PVS_SRC_SELECT_FORCE_0, // Z
+ t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X
+ t_src_class(vpi->SrcReg[0].File),
+ vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
+ (vpi->SrcReg[0].RelAddr << 4);
+ inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
+ t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X
+ PVS_SRC_SELECT_FORCE_0, // Z
+ t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W
+ t_src_class(vpi->SrcReg[0].File),
+ vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
+ (vpi->SrcReg[0].RelAddr << 4);
+}
+
+static void ei_mad(struct r300_vertex_program_code *vp,
+ struct rc_sub_instruction *vpi,
+ unsigned int * inst)
+{
+ /* Remarks about hardware limitations of MAD
+ * (please preserve this comment, as this information is _NOT_
+ * in the documentation provided by AMD).
+ *
+ * As described in the documentation, MAD with three unique temporary
+ * source registers requires the use of the macro version.
+ *
+ * However (and this is not mentioned in the documentation), apparently
+ * the macro version is _NOT_ a full superset of the normal version.
+ * In particular, the macro version does not always work when relative
+ * addressing is used in the source operands.
+ *
+ * This limitation caused incorrect rendering in Sauerbraten's OpenGL
+ * assembly shader path when using medium quality animations
+ * (i.e. animations with matrix blending instead of quaternion blending).
+ *
+ * Unfortunately, I (nha) have been unable to extract a Piglit regression
+ * test for this issue - for some reason, it is possible to have vertex
+ * programs whose prefix is *exactly* the same as the prefix of the
+ * offending program in Sauerbraten up to the offending instruction
+ * without causing any trouble.
+ *
+ * Bottom line: Only use the macro version only when really necessary;
+ * according to AMD docs, this should improve performance by one clock
+ * as a nice side bonus.
+ */
+ if (vpi->SrcReg[0].File == RC_FILE_TEMPORARY &&
+ vpi->SrcReg[1].File == RC_FILE_TEMPORARY &&
+ vpi->SrcReg[2].File == RC_FILE_TEMPORARY &&
+ vpi->SrcReg[0].Index != vpi->SrcReg[1].Index &&
+ vpi->SrcReg[0].Index != vpi->SrcReg[2].Index &&
+ vpi->SrcReg[1].Index != vpi->SrcReg[2].Index) {
+ inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD,
+ 0,
+ 1,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ } else {
+ inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
+ 0,
+ 0,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ }
+ inst[1] = t_src(vp, &vpi->SrcReg[0]);
+ inst[2] = t_src(vp, &vpi->SrcReg[1]);
+ inst[3] = t_src(vp, &vpi->SrcReg[2]);
+}
+
+static void ei_pow(struct r300_vertex_program_code *vp,
+ struct rc_sub_instruction *vpi,
+ unsigned int * inst)
+{
+ inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF,
+ 1,
+ 0,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]);
+ inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
+ inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]);
+}
+
+
+static void translate_vertex_program(struct r300_vertex_program_compiler * compiler)
+{
+ struct rc_instruction *rci;
+
+ compiler->code->pos_end = 0; /* Not supported yet */
+ compiler->code->length = 0;
+
+ compiler->SetHwInputOutput(compiler);
+
+ for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) {
+ struct rc_sub_instruction *vpi = &rci->U.I;
+ unsigned int *inst = compiler->code->body.d + compiler->code->length;
+
+ /* Skip instructions writing to non-existing destination */
+ if (!valid_dst(compiler->code, &vpi->DstReg))
+ continue;
+
+ if (compiler->code->length >= VSF_MAX_FRAGMENT_LENGTH) {
+ rc_error(&compiler->Base, "Vertex program has too many instructions\n");
+ return;
+ }
+
+ assert(compiler->Base.is_r500 ||
+ (vpi->Opcode != RC_OPCODE_SEQ &&
+ vpi->Opcode != RC_OPCODE_SNE));
+
+ switch (vpi->Opcode) {
+ case RC_OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break;
+ case RC_OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break;
+ case RC_OPCODE_COS: ei_math1(compiler->code, ME_COS, vpi, inst); break;
+ case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break;
+ case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break;
+ case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break;
+ case RC_OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break;
+ case RC_OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break;
+ case RC_OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break;
+ case RC_OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break;
+ case RC_OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break;
+ case RC_OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break;
+ case RC_OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break;
+ case RC_OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break;
+ case RC_OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break;
+ case RC_OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break;
+ case RC_OPCODE_POW: ei_pow(compiler->code, vpi, inst); break;
+ case RC_OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break;
+ case RC_OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break;
+ case RC_OPCODE_SEQ: ei_vector2(compiler->code, VE_SET_EQUAL, vpi, inst); break;
+ case RC_OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break;
+ case RC_OPCODE_SIN: ei_math1(compiler->code, ME_SIN, vpi, inst); break;
+ case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break;
+ case RC_OPCODE_SNE: ei_vector2(compiler->code, VE_SET_NOT_EQUAL, vpi, inst); break;
+ default:
+ rc_error(&compiler->Base, "Unknown opcode %s\n", rc_get_opcode_info(vpi->Opcode)->Name);
+ return;
+ }
+
+ compiler->code->length += 4;
+
+ if (compiler->Base.Error)
+ return;
+ }
+}
+
+struct temporary_allocation {
+ unsigned int Allocated:1;
+ unsigned int HwTemp:15;
+ struct rc_instruction * LastRead;
+};
+
+static void allocate_temporary_registers(struct r300_vertex_program_compiler * compiler)
+{
+ struct rc_instruction *inst;
+ unsigned int num_orig_temps = 0;
+ char hwtemps[VSF_MAX_FRAGMENT_TEMPS];
+ struct temporary_allocation * ta;
+ unsigned int i, j;
+
+ compiler->code->num_temporaries = 0;
+ memset(hwtemps, 0, sizeof(hwtemps));
+
+ /* Pass 1: Count original temporaries and allocate structures */
+ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ for (i = 0; i < opcode->NumSrcRegs; ++i) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
+ if (inst->U.I.SrcReg[i].Index >= num_orig_temps)
+ num_orig_temps = inst->U.I.SrcReg[i].Index + 1;
+ }
+ }
+
+ if (opcode->HasDstReg) {
+ if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) {
+ if (inst->U.I.DstReg.Index >= num_orig_temps)
+ num_orig_temps = inst->U.I.DstReg.Index + 1;
+ }
+ }
+ }
+
+ ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool,
+ sizeof(struct temporary_allocation) * num_orig_temps);
+ memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps);
+
+ /* Pass 2: Determine original temporary lifetimes */
+ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ for (i = 0; i < opcode->NumSrcRegs; ++i) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY)
+ ta[inst->U.I.SrcReg[i].Index].LastRead = inst;
+ }
+ }
+
+ /* Pass 3: Register allocation */
+ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ for (i = 0; i < opcode->NumSrcRegs; ++i) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
+ unsigned int orig = inst->U.I.SrcReg[i].Index;
+ inst->U.I.SrcReg[i].Index = ta[orig].HwTemp;
+
+ if (ta[orig].Allocated && inst == ta[orig].LastRead)
+ hwtemps[ta[orig].HwTemp] = 0;
+ }
+ }
+
+ if (opcode->HasDstReg) {
+ if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) {
+ unsigned int orig = inst->U.I.DstReg.Index;
+
+ if (!ta[orig].Allocated) {
+ for(j = 0; j < VSF_MAX_FRAGMENT_TEMPS; ++j) {
+ if (!hwtemps[j])
+ break;
+ }
+ if (j >= VSF_MAX_FRAGMENT_TEMPS) {
+ fprintf(stderr, "Out of hw temporaries\n");
+ } else {
+ ta[orig].Allocated = 1;
+ ta[orig].HwTemp = j;
+ hwtemps[j] = 1;
+
+ if (j >= compiler->code->num_temporaries)
+ compiler->code->num_temporaries = j + 1;
+ }
+ }
+
+ inst->U.I.DstReg.Index = ta[orig].HwTemp;
+ }
+ }
+ }
+}
+
+
+/**
+ * Vertex engine cannot read two inputs or two constants at the same time.
+ * Introduce intermediate MOVs to temporary registers to account for this.
+ */
+static int transform_source_conflicts(
+ struct radeon_compiler *c,
+ struct rc_instruction* inst,
+ void* unused)
+{
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ if (opcode->NumSrcRegs == 3) {
+ if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[2])
+ || t_src_conflict(inst->U.I.SrcReg[0], inst->U.I.SrcReg[2])) {
+ int tmpreg = rc_find_free_temporary(c);
+ struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.DstReg.Index = tmpreg;
+ inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
+
+ reset_srcreg(&inst->U.I.SrcReg[2]);
+ inst->U.I.SrcReg[2].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[2].Index = tmpreg;
+ }
+ }
+
+ if (opcode->NumSrcRegs >= 2) {
+ if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[0])) {
+ int tmpreg = rc_find_free_temporary(c);
+ struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.DstReg.Index = tmpreg;
+ inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
+
+ reset_srcreg(&inst->U.I.SrcReg[1]);
+ inst->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[1].Index = tmpreg;
+ }
+ }
+
+ return 1;
+}
+
+static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler)
+{
+ int i;
+
+ for(i = 0; i < 32; ++i) {
+ if ((compiler->RequiredOutputs & (1 << i)) &&
+ !(compiler->Base.Program.OutputsWritten & (1 << i))) {
+ struct rc_instruction * inst = rc_insert_new_instruction(&compiler->Base, compiler->Base.Program.Instructions.Prev);
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+
+ inst->U.I.DstReg.File = RC_FILE_OUTPUT;
+ inst->U.I.DstReg.Index = i;
+ inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+
+ inst->U.I.SrcReg[0].File = RC_FILE_CONSTANT;
+ inst->U.I.SrcReg[0].Index = 0;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
+
+ compiler->Base.Program.OutputsWritten |= 1 << i;
+ }
+ }
+}
+
+static void dataflow_outputs_mark_used(void * userdata, void * data,
+ void (*callback)(void *, unsigned int, unsigned int))
+{
+ struct r300_vertex_program_compiler * c = userdata;
+ int i;
+
+ for(i = 0; i < 32; ++i) {
+ if (c->RequiredOutputs & (1 << i))
+ callback(data, i, RC_MASK_XYZW);
+ }
+}
+
+static int swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
+{
+ (void) opcode;
+ (void) reg;
+
+ return 1;
+}
+
+static void debug_program_log(struct r300_vertex_program_compiler* c, const char * where)
+{
+ if (c->Base.Debug) {
+ fprintf(stderr, "Vertex Program: %s\n", where);
+ rc_print_program(&c->Base.Program);
+ }
+}
+
+
+static struct rc_swizzle_caps r300_vertprog_swizzle_caps = {
+ .IsNative = &swizzle_is_native,
+ .Split = 0 /* should never be called */
+};
+
+
+void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
+{
+ compiler->Base.SwizzleCaps = &r300_vertprog_swizzle_caps;
+
+ addArtificialOutputs(compiler);
+
+ debug_program_log(compiler, "before compilation");
+
+ /* XXX Ideally this should be done only for r3xx, but since
+ * we don't have branching support for r5xx, we use the emulation
+ * on all chipsets. */
+ rc_emulate_branches(&compiler->Base);
+
+ debug_program_log(compiler, "after emulate branches");
+
+ {
+ struct radeon_program_transformation transformations[] = {
+ { &r300_transform_vertex_alu, 0 },
+ { &r300_transform_trig_scale_vertex, 0 }
+ };
+ radeonLocalTransform(&compiler->Base, 2, transformations);
+ }
+
+ debug_program_log(compiler, "after native rewrite");
+
+ {
+ /* Note: This pass has to be done seperately from ALU rewrite,
+ * otherwise non-native ALU instructions with source conflits
+ * will not be treated properly.
+ */
+ struct radeon_program_transformation transformations[] = {
+ { &transform_source_conflicts, 0 },
+ };
+ radeonLocalTransform(&compiler->Base, 1, transformations);
+ }
+
+ debug_program_log(compiler, "after source conflict resolve");
+
+ rc_dataflow_deadcode(&compiler->Base, &dataflow_outputs_mark_used, compiler);
+
+ debug_program_log(compiler, "after deadcode");
+
+ rc_dataflow_swizzles(&compiler->Base);
+
+ allocate_temporary_registers(compiler);
+
+ debug_program_log(compiler, "after dataflow");
+
+ translate_vertex_program(compiler);
+
+ rc_constants_copy(&compiler->code->constants, &compiler->Base.Program.Constants);
+
+ compiler->code->InputsRead = compiler->Base.Program.InputsRead;
+ compiler->code->OutputsWritten = compiler->Base.Program.OutputsWritten;
+
+ if (compiler->Base.Debug) {
+ fprintf(stderr, "Final vertex program code:\n");
+ r300_vertex_program_dump(compiler->code);
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c
new file mode 100644
index 0000000000..5800f1a78e
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c
@@ -0,0 +1,180 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_code.h"
+
+#include <stdio.h>
+
+static char* r300_vs_ve_ops[] = {
+ /* R300 vector ops */
+ " VE_NO_OP",
+ " VE_DOT_PRODUCT",
+ " VE_MULTIPLY",
+ " VE_ADD",
+ " VE_MULTIPLY_ADD",
+ " VE_DISTANCE_FACTOR",
+ " VE_FRACTION",
+ " VE_MAXIMUM",
+ " VE_MINIMUM",
+ "VE_SET_GREATER_THAN_EQUAL",
+ " VE_SET_LESS_THAN",
+ " VE_MULTIPLYX2_ADD",
+ " VE_MULTIPLY_CLAMP",
+ " VE_FLT2FIX_DX",
+ " VE_FLT2FIX_DX_RND",
+ /* R500 vector ops */
+ " VE_PRED_SET_EQ_PUSH",
+ " VE_PRED_SET_GT_PUSH",
+ " VE_PRED_SET_GTE_PUSH",
+ " VE_PRED_SET_NEQ_PUSH",
+ " VE_COND_WRITE_EQ",
+ " VE_COND_WRITE_GT",
+ " VE_COND_WRITE_GTE",
+ " VE_COND_WRITE_NEQ",
+ " VE_COND_MUX_EQ",
+ " VE_COND_MUX_GT",
+ " VE_COND_MUX_GTE",
+ " VE_SET_GREATER_THAN",
+ " VE_SET_EQUAL",
+ " VE_SET_NOT_EQUAL",
+ " (reserved)",
+ " (reserved)",
+ " (reserved)",
+};
+
+static char* r300_vs_me_ops[] = {
+ /* R300 math ops */
+ " ME_NO_OP",
+ " ME_EXP_BASE2_DX",
+ " ME_LOG_BASE2_DX",
+ " ME_EXP_BASEE_FF",
+ " ME_LIGHT_COEFF_DX",
+ " ME_POWER_FUNC_FF",
+ " ME_RECIP_DX",
+ " ME_RECIP_FF",
+ " ME_RECIP_SQRT_DX",
+ " ME_RECIP_SQRT_FF",
+ " ME_MULTIPLY",
+ " ME_EXP_BASE2_FULL_DX",
+ " ME_LOG_BASE2_FULL_DX",
+ " ME_POWER_FUNC_FF_CLAMP_B",
+ "ME_POWER_FUNC_FF_CLAMP_B1",
+ "ME_POWER_FUNC_FF_CLAMP_01",
+ " ME_SIN",
+ " ME_COS",
+ /* R500 math ops */
+ " ME_LOG_BASE2_IEEE",
+ " ME_RECIP_IEEE",
+ " ME_RECIP_SQRT_IEEE",
+ " ME_PRED_SET_EQ",
+ " ME_PRED_SET_GT",
+ " ME_PRED_SET_GTE",
+ " ME_PRED_SET_NEQ",
+ " ME_PRED_SET_CLR",
+ " ME_PRED_SET_INV",
+ " ME_PRED_SET_POP",
+ " ME_PRED_SET_RESTORE",
+ " (reserved)",
+ " (reserved)",
+ " (reserved)",
+};
+
+/* XXX refactor to avoid clashing symbols */
+static char* r300_vs_src_debug[] = {
+ "t",
+ "i",
+ "c",
+ "a",
+};
+
+static char* r300_vs_dst_debug[] = {
+ "t",
+ "a0",
+ "o",
+ "ox",
+ "a",
+ "i",
+ "u",
+ "u",
+};
+
+static char* r300_vs_swiz_debug[] = {
+ "X",
+ "Y",
+ "Z",
+ "W",
+ "0",
+ "1",
+ "U",
+ "U",
+};
+
+
+static void r300_vs_op_dump(uint32_t op)
+{
+ fprintf(stderr, " dst: %d%s op: ",
+ (op >> 13) & 0x7f, r300_vs_dst_debug[(op >> 8) & 0x7]);
+ if (op & 0x80) {
+ if (op & 0x1) {
+ fprintf(stderr, "PVS_MACRO_OP_2CLK_M2X_ADD\n");
+ } else {
+ fprintf(stderr, " PVS_MACRO_OP_2CLK_MADD\n");
+ }
+ } else if (op & 0x40) {
+ fprintf(stderr, "%s\n", r300_vs_me_ops[op & 0x1f]);
+ } else {
+ fprintf(stderr, "%s\n", r300_vs_ve_ops[op & 0x1f]);
+ }
+}
+
+static void r300_vs_src_dump(uint32_t src)
+{
+ fprintf(stderr, " reg: %d%s swiz: %s%s/%s%s/%s%s/%s%s\n",
+ (src >> 5) & 0xff, r300_vs_src_debug[src & 0x3],
+ src & (1 << 25) ? "-" : " ",
+ r300_vs_swiz_debug[(src >> 13) & 0x7],
+ src & (1 << 26) ? "-" : " ",
+ r300_vs_swiz_debug[(src >> 16) & 0x7],
+ src & (1 << 27) ? "-" : " ",
+ r300_vs_swiz_debug[(src >> 19) & 0x7],
+ src & (1 << 28) ? "-" : " ",
+ r300_vs_swiz_debug[(src >> 22) & 0x7]);
+}
+
+void r300_vertex_program_dump(struct r300_vertex_program_code * vs)
+{
+ unsigned instrcount = vs->length / 4;
+ unsigned i;
+
+ for(i = 0; i < instrcount; i++) {
+ unsigned offset = i*4;
+ unsigned src;
+
+ fprintf(stderr, "%d: op: 0x%08x", i, vs->body.d[offset]);
+ r300_vs_op_dump(vs->body.d[offset]);
+
+ for(src = 0; src < 3; ++src) {
+ fprintf(stderr, " src%i: 0x%08x", src, vs->body.d[offset+1+src]);
+ r300_vs_src_dump(vs->body.d[offset+1+src]);
+ }
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
new file mode 100644
index 0000000000..632f0bcf4f
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
@@ -0,0 +1,344 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "r500_fragprog.h"
+
+#include <stdio.h>
+
+#include "../r300_reg.h"
+
+/**
+ * Rewrite IF instructions to use the ALU result special register.
+ */
+int r500_transform_IF(
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
+ void* data)
+{
+ if (inst->U.I.Opcode != RC_OPCODE_IF)
+ return 0;
+
+ struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg.WriteMask = 0;
+ inst_mov->U.I.WriteALUResult = RC_ALURESULT_W;
+ inst_mov->U.I.ALUResultCompare = RC_COMPARE_FUNC_NOTEQUAL;
+ inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+ inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4(inst_mov->U.I.SrcReg[0].Swizzle,
+ RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_X);
+
+ inst->U.I.SrcReg[0].File = RC_FILE_SPECIAL;
+ inst->U.I.SrcReg[0].Index = RC_SPECIAL_ALU_RESULT;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
+ inst->U.I.SrcReg[0].Negate = 0;
+
+ return 1;
+}
+
+static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
+{
+ unsigned int relevant;
+ int i;
+
+ if (opcode == RC_OPCODE_TEX ||
+ opcode == RC_OPCODE_TXB ||
+ opcode == RC_OPCODE_TXP ||
+ opcode == RC_OPCODE_KIL) {
+ if (reg.Abs)
+ return 0;
+
+ if (opcode == RC_OPCODE_KIL && (reg.Swizzle != RC_SWIZZLE_XYZW || reg.Negate != RC_MASK_NONE))
+ return 0;
+
+ if (reg.Negate)
+ reg.Negate ^= RC_MASK_XYZW;
+
+ for(i = 0; i < 4; ++i) {
+ unsigned int swz = GET_SWZ(reg.Swizzle, i);
+ if (swz == RC_SWIZZLE_UNUSED) {
+ reg.Negate &= ~(1 << i);
+ continue;
+ }
+ if (swz >= 4)
+ return 0;
+ }
+
+ if (reg.Negate)
+ return 0;
+
+ return 1;
+ } else if (opcode == RC_OPCODE_DDX || opcode == RC_OPCODE_DDY) {
+ /* DDX/MDH and DDY/MDV explicitly ignore incoming swizzles;
+ * if it doesn't fit perfectly into a .xyzw case... */
+ if (reg.Swizzle == RC_SWIZZLE_XYZW && !reg.Abs && !reg.Negate)
+ return 1;
+
+ return 0;
+ } else {
+ /* ALU instructions support almost everything */
+ if (reg.Abs)
+ return 1;
+
+ relevant = 0;
+ for(i = 0; i < 3; ++i) {
+ unsigned int swz = GET_SWZ(reg.Swizzle, i);
+ if (swz != RC_SWIZZLE_UNUSED && swz != RC_SWIZZLE_ZERO)
+ relevant |= 1 << i;
+ }
+ if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant))
+ return 0;
+
+ return 1;
+ }
+}
+
+/**
+ * Split source register access.
+ *
+ * The only thing we *cannot* do in an ALU instruction is per-component
+ * negation.
+ */
+static void r500_swizzle_split(struct rc_src_register src, unsigned int usemask,
+ struct rc_swizzle_split * split)
+{
+ unsigned int negatebase[2] = { 0, 0 };
+ int i;
+
+ for(i = 0; i < 4; ++i) {
+ unsigned int swz = GET_SWZ(src.Swizzle, i);
+ if (swz == RC_SWIZZLE_UNUSED || !GET_BIT(usemask, i))
+ continue;
+ negatebase[GET_BIT(src.Negate, i)] |= 1 << i;
+ }
+
+ split->NumPhases = 0;
+
+ for(i = 0; i <= 1; ++i) {
+ if (!negatebase[i])
+ continue;
+
+ split->Phase[split->NumPhases++] = negatebase[i];
+ }
+}
+
+struct rc_swizzle_caps r500_swizzle_caps = {
+ .IsNative = r500_swizzle_is_native,
+ .Split = r500_swizzle_split
+};
+
+static char *toswiz(int swiz_val) {
+ switch(swiz_val) {
+ case 0: return "R";
+ case 1: return "G";
+ case 2: return "B";
+ case 3: return "A";
+ case 4: return "0";
+ case 5: return "H";
+ case 6: return "1";
+ case 7: return "U";
+ }
+ return NULL;
+}
+
+static char *toop(int op_val)
+{
+ char *str = NULL;
+ switch (op_val) {
+ case 0: str = "MAD"; break;
+ case 1: str = "DP3"; break;
+ case 2: str = "DP4"; break;
+ case 3: str = "D2A"; break;
+ case 4: str = "MIN"; break;
+ case 5: str = "MAX"; break;
+ case 6: str = "Reserved"; break;
+ case 7: str = "CND"; break;
+ case 8: str = "CMP"; break;
+ case 9: str = "FRC"; break;
+ case 10: str = "SOP"; break;
+ case 11: str = "MDH"; break;
+ case 12: str = "MDV"; break;
+ }
+ return str;
+}
+
+static char *to_alpha_op(int op_val)
+{
+ char *str = NULL;
+ switch (op_val) {
+ case 0: str = "MAD"; break;
+ case 1: str = "DP"; break;
+ case 2: str = "MIN"; break;
+ case 3: str = "MAX"; break;
+ case 4: str = "Reserved"; break;
+ case 5: str = "CND"; break;
+ case 6: str = "CMP"; break;
+ case 7: str = "FRC"; break;
+ case 8: str = "EX2"; break;
+ case 9: str = "LN2"; break;
+ case 10: str = "RCP"; break;
+ case 11: str = "RSQ"; break;
+ case 12: str = "SIN"; break;
+ case 13: str = "COS"; break;
+ case 14: str = "MDH"; break;
+ case 15: str = "MDV"; break;
+ }
+ return str;
+}
+
+static char *to_mask(int val)
+{
+ char *str = NULL;
+ switch(val) {
+ case 0: str = "NONE"; break;
+ case 1: str = "R"; break;
+ case 2: str = "G"; break;
+ case 3: str = "RG"; break;
+ case 4: str = "B"; break;
+ case 5: str = "RB"; break;
+ case 6: str = "GB"; break;
+ case 7: str = "RGB"; break;
+ case 8: str = "A"; break;
+ case 9: str = "AR"; break;
+ case 10: str = "AG"; break;
+ case 11: str = "ARG"; break;
+ case 12: str = "AB"; break;
+ case 13: str = "ARB"; break;
+ case 14: str = "AGB"; break;
+ case 15: str = "ARGB"; break;
+ }
+ return str;
+}
+
+static char *to_texop(int val)
+{
+ switch(val) {
+ case 0: return "NOP";
+ case 1: return "LD";
+ case 2: return "TEXKILL";
+ case 3: return "PROJ";
+ case 4: return "LODBIAS";
+ case 5: return "LOD";
+ case 6: return "DXDY";
+ }
+ return NULL;
+}
+
+void r500FragmentProgramDump(struct rX00_fragment_program_code *c)
+{
+ struct r500_fragment_program_code *code = &c->code.r500;
+ fprintf(stderr, "R500 Fragment Program:\n--------\n");
+
+ int n;
+ uint32_t inst;
+ uint32_t inst0;
+ char *str = NULL;
+
+ for (n = 0; n < code->inst_end+1; n++) {
+ inst0 = inst = code->inst[n].inst0;
+ fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst);
+ switch(inst & 0x3) {
+ case R500_INST_TYPE_ALU: str = "ALU"; break;
+ case R500_INST_TYPE_OUT: str = "OUT"; break;
+ case R500_INST_TYPE_FC: str = "FC"; break;
+ case R500_INST_TYPE_TEX: str = "TEX"; break;
+ };
+ fprintf(stderr,"%s %s %s %s %s ", str,
+ inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "",
+ inst & R500_INST_LAST ? "LAST" : "",
+ inst & R500_INST_NOP ? "NOP" : "",
+ inst & R500_INST_ALU_WAIT ? "ALU WAIT" : "");
+ fprintf(stderr,"wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf),
+ to_mask((inst >> 15) & 0xf));
+
+ switch(inst0 & 0x3) {
+ case 0:
+ case 1:
+ fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", code->inst[n].inst1);
+ inst = code->inst[n].inst1;
+
+ fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
+ inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
+ (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
+ (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
+ (inst >> 30));
+
+ fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", code->inst[n].inst2);
+ inst = code->inst[n].inst2;
+ fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
+ inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
+ (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
+ (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
+ (inst >> 30));
+ fprintf(stderr,"\t3 RGB_INST: 0x%08x:", code->inst[n].inst3);
+ inst = code->inst[n].inst3;
+ fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d targ: %d\n",
+ (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7),
+ (inst >> 11) & 0x3,
+ (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7),
+ (inst >> 24) & 0x3, (inst >> 29) & 0x3);
+
+
+ fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", code->inst[n].inst4);
+ inst = code->inst[n].inst4;
+ fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d targ %d w:%d\n", to_alpha_op(inst & 0xf),
+ (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
+ (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3,
+ (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3,
+ (inst >> 29) & 0x3,
+ (inst >> 31) & 0x1);
+
+ fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", code->inst[n].inst5);
+ inst = code->inst[n].inst5;
+ fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf),
+ (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
+ (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7),
+ (inst >> 23) & 0x3,
+ (inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3);
+ break;
+ case 2:
+ break;
+ case 3:
+ inst = code->inst[n].inst1;
+ fprintf(stderr,"\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf,
+ to_texop((inst >> 22) & 0x7), (inst & (1<<25)) ? "ACQ" : "",
+ (inst & (1<<26)) ? "IGNUNC" : "", (inst & (1<<27)) ? "UNSCALED" : "SCALED");
+ inst = code->inst[n].inst2;
+ fprintf(stderr,"\t2:TEX_ADDR: 0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst,
+ inst & 127, inst & (1<<7) ? "(rel)" : "",
+ toswiz((inst >> 8) & 0x3), toswiz((inst >> 10) & 0x3),
+ toswiz((inst >> 12) & 0x3), toswiz((inst >> 14) & 0x3),
+ (inst >> 16) & 127, inst & (1<<23) ? "(rel)" : "",
+ toswiz((inst >> 24) & 0x3), toswiz((inst >> 26) & 0x3),
+ toswiz((inst >> 28) & 0x3), toswiz((inst >> 30) & 0x3));
+
+ fprintf(stderr,"\t3:TEX_DXDY: 0x%08x\n", code->inst[n].inst3);
+ break;
+ }
+ fprintf(stderr,"\n");
+ }
+
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
new file mode 100644
index 0000000000..4efbae7ba6
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2005 Ben Skeggs.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/*
+ * Authors:
+ * Ben Skeggs <darktama@iinet.net.au>
+ * Jerome Glisse <j.glisse@gmail.com>
+ */
+#ifndef __R500_FRAGPROG_H_
+#define __R500_FRAGPROG_H_
+
+#include "radeon_compiler.h"
+#include "radeon_swizzle.h"
+
+extern void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler);
+
+extern void r500FragmentProgramDump(struct rX00_fragment_program_code *c);
+
+extern struct rc_swizzle_caps r500_swizzle_caps;
+
+extern int r500_transform_IF(
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
+ void* data);
+
+#endif
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
new file mode 100644
index 0000000000..fb2d8b5a9c
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
@@ -0,0 +1,495 @@
+/*
+ * Copyright (C) 2005 Ben Skeggs.
+ *
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * \file
+ *
+ * \author Ben Skeggs <darktama@iinet.net.au>
+ *
+ * \author Jerome Glisse <j.glisse@gmail.com>
+ *
+ * \author Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ */
+
+#include "r500_fragprog.h"
+
+#include "../r300_reg.h"
+
+#include "radeon_program_pair.h"
+
+
+#define PROG_CODE \
+ struct r500_fragment_program_code *code = &c->code->code.r500
+
+#define error(fmt, args...) do { \
+ rc_error(&c->Base, "%s::%s(): " fmt "\n", \
+ __FILE__, __FUNCTION__, ##args); \
+ } while(0)
+
+
+struct branch_info {
+ int If;
+ int Else;
+ int Endif;
+};
+
+struct emit_state {
+ struct radeon_compiler * C;
+ struct r500_fragment_program_code * Code;
+
+ struct branch_info * Branches;
+ unsigned int CurrentBranchDepth;
+ unsigned int BranchesReserved;
+
+ unsigned int MaxBranchDepth;
+};
+
+static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
+{
+ switch(opcode) {
+ case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP;
+ case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH;
+ case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV;
+ case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3;
+ case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4;
+ case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC;
+ default:
+ error("translate_rgb_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
+ /* fall through */
+ case RC_OPCODE_NOP:
+ /* fall through */
+ case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD;
+ case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX;
+ case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN;
+ case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP;
+ }
+}
+
+static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
+{
+ switch(opcode) {
+ case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP;
+ case RC_OPCODE_COS: return R500_ALPHA_OP_COS;
+ case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH;
+ case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV;
+ case RC_OPCODE_DP3: return R500_ALPHA_OP_DP;
+ case RC_OPCODE_DP4: return R500_ALPHA_OP_DP;
+ case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2;
+ case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC;
+ case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2;
+ default:
+ error("translate_alpha_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
+ /* fall through */
+ case RC_OPCODE_NOP:
+ /* fall through */
+ case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD;
+ case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX;
+ case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN;
+ case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP;
+ case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ;
+ case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN;
+ }
+}
+
+static unsigned int fix_hw_swizzle(unsigned int swz)
+{
+ switch (swz) {
+ case RC_SWIZZLE_ZERO:
+ case RC_SWIZZLE_UNUSED:
+ swz = 4;
+ break;
+ case RC_SWIZZLE_HALF:
+ swz = 5;
+ break;
+ case RC_SWIZZLE_ONE:
+ swz = 6;
+ break;
+ }
+
+ return swz;
+}
+
+static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg)
+{
+ unsigned int t = inst->RGB.Arg[arg].Source;
+ int comp;
+ t |= inst->RGB.Arg[arg].Negate << 11;
+ t |= inst->RGB.Arg[arg].Abs << 12;
+
+ for(comp = 0; comp < 3; ++comp)
+ t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2);
+
+ return t;
+}
+
+static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i)
+{
+ unsigned int t = inst->Alpha.Arg[i].Source;
+ t |= fix_hw_swizzle(inst->Alpha.Arg[i].Swizzle) << 2;
+ t |= inst->Alpha.Arg[i].Negate << 5;
+ t |= inst->Alpha.Arg[i].Abs << 6;
+ return t;
+}
+
+static uint32_t translate_alu_result_op(struct r300_fragment_program_compiler * c, rc_compare_func func)
+{
+ switch(func) {
+ case RC_COMPARE_FUNC_EQUAL: return R500_INST_ALU_RESULT_OP_EQ;
+ case RC_COMPARE_FUNC_LESS: return R500_INST_ALU_RESULT_OP_LT;
+ case RC_COMPARE_FUNC_GEQUAL: return R500_INST_ALU_RESULT_OP_GE;
+ case RC_COMPARE_FUNC_NOTEQUAL: return R500_INST_ALU_RESULT_OP_NE;
+ default:
+ rc_error(&c->Base, "%s: unsupported compare func %i\n", __FUNCTION__, func);
+ return 0;
+ }
+}
+
+static void use_temporary(struct r500_fragment_program_code* code, unsigned int index)
+{
+ if (index > code->max_temp_idx)
+ code->max_temp_idx = index;
+}
+
+static unsigned int use_source(struct r500_fragment_program_code* code, struct radeon_pair_instruction_source src)
+{
+ if (src.File == RC_FILE_CONSTANT) {
+ return src.Index | 0x100;
+ } else if (src.File == RC_FILE_TEMPORARY) {
+ use_temporary(code, src.Index);
+ return src.Index;
+ }
+
+ return 0;
+}
+
+/**
+ * NOP the specified instruction if it is not a texture lookup.
+ */
+static void alu_nop(struct r300_fragment_program_compiler *c, int ip)
+{
+ PROG_CODE;
+
+ if ((code->inst[ip].inst0 & 0x3) != R500_INST_TYPE_TEX) {
+ code->inst[ip].inst0 |= R500_INST_NOP;
+ }
+}
+
+/**
+ * Emit a paired ALU instruction.
+ */
+static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst)
+{
+ PROG_CODE;
+
+ if (code->inst_end >= 511) {
+ error("emit_alu: Too many instructions");
+ return;
+ }
+
+ int ip = ++code->inst_end;
+
+ /* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */
+ if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX ||
+ inst->RGB.Opcode == RC_OPCODE_DDY || inst->Alpha.Opcode == RC_OPCODE_DDY) {
+ if (ip > 0) {
+ alu_nop(c, ip - 1);
+ }
+ }
+
+ code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode);
+ code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode);
+
+ if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) {
+ code->inst[ip].inst0 = R500_INST_TYPE_OUT;
+ if (inst->WriteALUResult) {
+ error("%s: cannot write output and ALU result at the same time");
+ return;
+ }
+ } else {
+ code->inst[ip].inst0 = R500_INST_TYPE_ALU;
+ }
+ code->inst[ip].inst0 |= R500_INST_TEX_SEM_WAIT;
+
+ code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11) | (inst->Alpha.WriteMask << 14);
+ code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18);
+ if (inst->Alpha.DepthWriteMask) {
+ code->inst[ip].inst4 |= R500_ALPHA_W_OMASK;
+ c->code->writes_depth = 1;
+ }
+
+ code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex);
+ code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex);
+ use_temporary(code, inst->Alpha.DestIndex);
+ use_temporary(code, inst->RGB.DestIndex);
+
+ if (inst->RGB.Saturate)
+ code->inst[ip].inst0 |= R500_INST_RGB_CLAMP;
+ if (inst->Alpha.Saturate)
+ code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP;
+
+ code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0]));
+ code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1]));
+ code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2]));
+
+ code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0]));
+ code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1]));
+ code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2]));
+
+ code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT;
+ code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT;
+ code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT;
+
+ code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT;
+ code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT;
+ code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT;
+
+ code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target);
+ code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target);
+
+ if (inst->WriteALUResult) {
+ code->inst[ip].inst3 |= R500_ALU_RGB_WMASK;
+
+ if (inst->WriteALUResult == RC_ALURESULT_X)
+ code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_RED;
+ else
+ code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_ALPHA;
+
+ code->inst[ip].inst0 |= translate_alu_result_op(c, inst->ALUResultCompare);
+ }
+}
+
+static unsigned int translate_strq_swizzle(unsigned int swizzle)
+{
+ unsigned int swiz = 0;
+ int i;
+ for (i = 0; i < 4; i++)
+ swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2;
+ return swiz;
+}
+
+/**
+ * Emit a single TEX instruction
+ */
+static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst)
+{
+ PROG_CODE;
+
+ if (code->inst_end >= 511) {
+ error("emit_tex: Too many instructions");
+ return 0;
+ }
+
+ int ip = ++code->inst_end;
+
+ code->inst[ip].inst0 = R500_INST_TYPE_TEX
+ | (inst->DstReg.WriteMask << 11)
+ | R500_INST_TEX_SEM_WAIT;
+ code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit)
+ | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED;
+
+ if (inst->TexSrcTarget == RC_TEXTURE_RECT)
+ code->inst[ip].inst1 |= R500_TEX_UNSCALED;
+
+ switch (inst->Opcode) {
+ case RC_OPCODE_KIL:
+ code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL;
+ break;
+ case RC_OPCODE_TEX:
+ code->inst[ip].inst1 |= R500_TEX_INST_LD;
+ break;
+ case RC_OPCODE_TXB:
+ code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS;
+ break;
+ case RC_OPCODE_TXP:
+ code->inst[ip].inst1 |= R500_TEX_INST_PROJ;
+ break;
+ default:
+ error("emit_tex can't handle opcode %s\n", rc_get_opcode_info(inst->Opcode)->Name);
+ }
+
+ use_temporary(code, inst->SrcReg[0].Index);
+ if (inst->Opcode != RC_OPCODE_KIL)
+ use_temporary(code, inst->DstReg.Index);
+
+ code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index)
+ | (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8)
+ | R500_TEX_DST_ADDR(inst->DstReg.Index)
+ | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G
+ | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
+
+ return 1;
+}
+
+static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst)
+{
+ if (s->Code->inst_end >= 511) {
+ rc_error(s->C, "emit_tex: Too many instructions");
+ return;
+ }
+
+ unsigned int newip = ++s->Code->inst_end;
+
+ s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT;
+
+ if (inst->U.I.Opcode == RC_OPCODE_IF) {
+ if (s->CurrentBranchDepth >= 32) {
+ rc_error(s->C, "Branch depth exceeds hardware limit");
+ return;
+ }
+
+ memory_pool_array_reserve(&s->C->Pool, struct branch_info,
+ s->Branches, s->CurrentBranchDepth, s->BranchesReserved, 1);
+
+ struct branch_info * branch = &s->Branches[s->CurrentBranchDepth++];
+ branch->If = newip;
+ branch->Else = -1;
+ branch->Endif = -1;
+
+ if (s->CurrentBranchDepth > s->MaxBranchDepth)
+ s->MaxBranchDepth = s->CurrentBranchDepth;
+
+ /* actual instruction is filled in at ENDIF time */
+ } else if (inst->U.I.Opcode == RC_OPCODE_ELSE) {
+ if (!s->CurrentBranchDepth) {
+ rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
+ return;
+ }
+
+ struct branch_info * branch = &s->Branches[s->CurrentBranchDepth - 1];
+ branch->Else = newip;
+
+ /* actual instruction is filled in at ENDIF time */
+ } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) {
+ if (!s->CurrentBranchDepth) {
+ rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
+ return;
+ }
+
+ struct branch_info * branch = &s->Branches[s->CurrentBranchDepth - 1];
+ branch->Endif = newip;
+
+ s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP
+ | R500_FC_A_OP_NONE /* no address stack */
+ | R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */
+ | R500_FC_B_OP0_INCR /* increment branch counter if stay */
+ ;
+
+ if (branch->Else >= 0) {
+ /* increment branch counter also if jump */
+ s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_INCR;
+ s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Else + 1);
+
+ s->Code->inst[branch->Else].inst2 = R500_FC_OP_JUMP
+ | R500_FC_A_OP_NONE /* no address stack */
+ | R500_FC_B_ELSE /* all active pixels want to jump */
+ | R500_FC_B_OP0_NONE /* no counter op if stay */
+ | R500_FC_B_OP1_DECR /* decrement branch counter if jump */
+ | R500_FC_B_POP_CNT(1)
+ ;
+ s->Code->inst[branch->Else].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
+ } else {
+ /* don't touch branch counter on jump */
+ s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_NONE;
+ s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
+ }
+
+ s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP
+ | R500_FC_A_OP_NONE /* no address stack */
+ | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */
+ | R500_FC_B_OP0_DECR /* decrement branch counter if stay */
+ | R500_FC_B_OP1_NONE /* no branch counter if stay */
+ | R500_FC_B_POP_CNT(1)
+ ;
+ s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
+
+ s->CurrentBranchDepth--;
+ } else {
+ rc_error(s->C, "%s: unknown opcode %s\n", __FUNCTION__, rc_get_opcode_info(inst->U.I.Opcode)->Name);
+ }
+}
+
+void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler)
+{
+ struct emit_state s;
+ struct r500_fragment_program_code *code = &compiler->code->code.r500;
+
+ memset(&s, 0, sizeof(s));
+ s.C = &compiler->Base;
+ s.Code = code;
+
+ memset(code, 0, sizeof(*code));
+ code->max_temp_idx = 1;
+ code->inst_end = -1;
+
+ for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
+ inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
+ inst = inst->Next) {
+ if (inst->Type == RC_INSTRUCTION_NORMAL) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ if (opcode->IsFlowControl) {
+ emit_flowcontrol(&s, inst);
+ } else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
+ continue;
+ } else {
+ emit_tex(compiler, &inst->U.I);
+ }
+ } else {
+ emit_paired(compiler, &inst->U.P);
+ }
+ }
+
+ if (code->max_temp_idx >= 128)
+ rc_error(&compiler->Base, "Too many hardware temporaries used");
+
+ if (compiler->Base.Error)
+ return;
+
+ if (code->inst_end == -1 ||
+ (code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
+ /* This may happen when dead-code elimination is disabled or
+ * when most of the fragment program logic is leading to a KIL */
+ if (code->inst_end >= 511) {
+ rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions");
+ return;
+ }
+
+ int ip = ++code->inst_end;
+ code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
+ }
+
+ if (s.MaxBranchDepth >= 4) {
+ if (code->max_temp_idx < 1)
+ code->max_temp_idx = 1;
+
+ code->us_fc_ctrl |= R500_FC_FULL_FC_EN;
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.c b/src/mesa/drivers/dri/r300/compiler/radeon_code.c
new file mode 100644
index 0000000000..0eab18c344
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.c
@@ -0,0 +1,173 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_code.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "radeon_program.h"
+
+void rc_constants_init(struct rc_constant_list * c)
+{
+ memset(c, 0, sizeof(*c));
+}
+
+/**
+ * Copy a constants structure, assuming that the destination structure
+ * is not initialized.
+ */
+void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src)
+{
+ dst->Constants = malloc(sizeof(struct rc_constant) * src->Count);
+ memcpy(dst->Constants, src->Constants, sizeof(struct rc_constant) * src->Count);
+ dst->Count = src->Count;
+ dst->_Reserved = src->Count;
+}
+
+void rc_constants_destroy(struct rc_constant_list * c)
+{
+ free(c->Constants);
+ memset(c, 0, sizeof(*c));
+}
+
+unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant)
+{
+ unsigned index = c->Count;
+
+ if (c->Count >= c->_Reserved) {
+ struct rc_constant * newlist;
+
+ c->_Reserved = c->_Reserved * 2;
+ if (!c->_Reserved)
+ c->_Reserved = 16;
+
+ newlist = malloc(sizeof(struct rc_constant) * c->_Reserved);
+ memcpy(newlist, c->Constants, sizeof(struct rc_constant) * c->Count);
+
+ free(c->Constants);
+ c->Constants = newlist;
+ }
+
+ c->Constants[index] = *constant;
+ c->Count++;
+
+ return index;
+}
+
+
+/**
+ * Add a state vector to the constant list, while trying to avoid duplicates.
+ */
+unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state0, unsigned state1)
+{
+ unsigned index;
+ struct rc_constant constant;
+
+ for(index = 0; index < c->Count; ++index) {
+ if (c->Constants[index].Type == RC_CONSTANT_STATE) {
+ if (c->Constants[index].u.State[0] == state0 &&
+ c->Constants[index].u.State[1] == state1)
+ return index;
+ }
+ }
+
+ memset(&constant, 0, sizeof(constant));
+ constant.Type = RC_CONSTANT_STATE;
+ constant.Size = 4;
+ constant.u.State[0] = state0;
+ constant.u.State[1] = state1;
+
+ return rc_constants_add(c, &constant);
+}
+
+
+/**
+ * Add an immediate vector to the constant list, while trying to avoid
+ * duplicates.
+ */
+unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data)
+{
+ unsigned index;
+ struct rc_constant constant;
+
+ for(index = 0; index < c->Count; ++index) {
+ if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) {
+ if (!memcmp(c->Constants[index].u.Immediate, data, sizeof(float)*4))
+ return index;
+ }
+ }
+
+ memset(&constant, 0, sizeof(constant));
+ constant.Type = RC_CONSTANT_IMMEDIATE;
+ constant.Size = 4;
+ memcpy(constant.u.Immediate, data, sizeof(float) * 4);
+
+ return rc_constants_add(c, &constant);
+}
+
+
+/**
+ * Add an immediate scalar to the constant list, while trying to avoid
+ * duplicates.
+ */
+unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle)
+{
+ unsigned index;
+ int free_index = -1;
+ struct rc_constant constant;
+
+ for(index = 0; index < c->Count; ++index) {
+ if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) {
+ unsigned comp;
+ for(comp = 0; comp < c->Constants[index].Size; ++comp) {
+ if (c->Constants[index].u.Immediate[comp] == data) {
+ *swizzle = RC_MAKE_SWIZZLE_SMEAR(comp);
+ return index;
+ }
+ }
+
+ if (c->Constants[index].Size < 4)
+ free_index = index;
+ }
+ }
+
+ if (free_index >= 0) {
+ unsigned comp = c->Constants[free_index].Size++;
+ c->Constants[free_index].u.Immediate[comp] = data;
+ *swizzle = RC_MAKE_SWIZZLE_SMEAR(comp);
+ return free_index;
+ }
+
+ memset(&constant, 0, sizeof(constant));
+ constant.Type = RC_CONSTANT_IMMEDIATE;
+ constant.Size = 1;
+ constant.u.Immediate[0] = data;
+ *swizzle = RC_SWIZZLE_XXXX;
+
+ return rc_constants_add(c, &constant);
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
new file mode 100644
index 0000000000..1979e7e4e4
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
@@ -0,0 +1,265 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef RADEON_CODE_H
+#define RADEON_CODE_H
+
+#include <stdint.h>
+
+#define R300_PFS_MAX_ALU_INST 64
+#define R300_PFS_MAX_TEX_INST 32
+#define R300_PFS_MAX_TEX_INDIRECT 4
+#define R300_PFS_NUM_TEMP_REGS 32
+#define R300_PFS_NUM_CONST_REGS 32
+
+#define R500_PFS_MAX_INST 512
+#define R500_PFS_NUM_TEMP_REGS 128
+#define R500_PFS_NUM_CONST_REGS 256
+
+
+#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0)
+
+enum {
+ /**
+ * External constants are constants whose meaning is unknown to this
+ * compiler. For example, a Mesa gl_program's constants are turned
+ * into external constants.
+ */
+ RC_CONSTANT_EXTERNAL = 0,
+
+ RC_CONSTANT_IMMEDIATE,
+
+ /**
+ * Constant referring to state that is known by this compiler,
+ * see RC_STATE_xxx, i.e. *not* arbitrary Mesa (or other) state.
+ */
+ RC_CONSTANT_STATE
+};
+
+enum {
+ RC_STATE_SHADOW_AMBIENT = 0,
+
+ RC_STATE_R300_WINDOW_DIMENSION,
+ RC_STATE_R300_TEXRECT_FACTOR,
+ RC_STATE_R300_VIEWPORT_SCALE,
+ RC_STATE_R300_VIEWPORT_OFFSET
+};
+
+struct rc_constant {
+ unsigned Type:2; /**< RC_CONSTANT_xxx */
+ unsigned Size:3;
+
+ union {
+ unsigned External;
+ float Immediate[4];
+ unsigned State[2];
+ } u;
+};
+
+struct rc_constant_list {
+ struct rc_constant * Constants;
+ unsigned Count;
+
+ unsigned _Reserved;
+};
+
+void rc_constants_init(struct rc_constant_list * c);
+void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src);
+void rc_constants_destroy(struct rc_constant_list * c);
+unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant);
+unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state1, unsigned state2);
+unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data);
+unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle);
+
+/**
+ * Compare functions.
+ *
+ * \note By design, RC_COMPARE_FUNC_xxx + GL_NEVER gives you
+ * the correct GL compare function.
+ */
+typedef enum {
+ RC_COMPARE_FUNC_NEVER = 0,
+ RC_COMPARE_FUNC_LESS,
+ RC_COMPARE_FUNC_EQUAL,
+ RC_COMPARE_FUNC_LEQUAL,
+ RC_COMPARE_FUNC_GREATER,
+ RC_COMPARE_FUNC_NOTEQUAL,
+ RC_COMPARE_FUNC_GEQUAL,
+ RC_COMPARE_FUNC_ALWAYS
+} rc_compare_func;
+
+/**
+ * Coordinate wrapping modes.
+ *
+ * These are not quite the same as their GL counterparts yet.
+ */
+typedef enum {
+ RC_WRAP_NONE = 0,
+ RC_WRAP_REPEAT,
+ RC_WRAP_MIRRORED_REPEAT,
+ RC_WRAP_MIRRORED_CLAMP
+} rc_wrap_mode;
+
+/**
+ * Stores state that influences the compilation of a fragment program.
+ */
+struct r300_fragment_program_external_state {
+ struct {
+ /**
+ * If the sampler is used as a shadow sampler,
+ * this field is:
+ * 0 - GL_LUMINANCE
+ * 1 - GL_INTENSITY
+ * 2 - GL_ALPHA
+ * depending on the depth texture mode.
+ */
+ unsigned depth_texture_swizzle:12;
+
+ /**
+ * If the sampler is used as a shadow sampler,
+ * this field specifies the compare function.
+ *
+ * Otherwise, this field is \ref RC_COMPARE_FUNC_NEVER (aka 0).
+ * \sa rc_compare_func
+ */
+ unsigned texture_compare_func : 3;
+
+ /**
+ * No matter what the sampler type is,
+ * this field turns it into a shadow sampler.
+ */
+ unsigned compare_mode_enabled : 1;
+
+ /**
+ * If the sampler needs to fake NPOT, this field is set.
+ */
+ unsigned fake_npot : 1;
+
+ /**
+ * If the sampler will recieve non-normalized coords,
+ * this field is set.
+ */
+ unsigned non_normalized_coords : 1;
+
+ /**
+ * This field specifies wrapping modes for the sampler.
+ *
+ * If this field is \ref RC_WRAP_NONE (aka 0), no wrapping maths
+ * will be performed on the coordinates.
+ */
+ unsigned wrap_mode : 2;
+ } unit[16];
+};
+
+
+
+struct r300_fragment_program_node {
+ int tex_offset; /**< first tex instruction */
+ int tex_end; /**< last tex instruction, relative to tex_offset */
+ int alu_offset; /**< first ALU instruction */
+ int alu_end; /**< last ALU instruction, relative to alu_offset */
+ int flags;
+};
+
+/**
+ * Stores an R300 fragment program in its compiled-to-hardware form.
+ */
+struct r300_fragment_program_code {
+ struct {
+ int length; /**< total # of texture instructions used */
+ uint32_t inst[R300_PFS_MAX_TEX_INST];
+ } tex;
+
+ struct {
+ int length; /**< total # of ALU instructions used */
+ struct {
+ uint32_t rgb_inst;
+ uint32_t rgb_addr;
+ uint32_t alpha_inst;
+ uint32_t alpha_addr;
+ } inst[R300_PFS_MAX_ALU_INST];
+ } alu;
+
+ uint32_t config; /* US_CONFIG */
+ uint32_t pixsize; /* US_PIXSIZE */
+ uint32_t code_offset; /* US_CODE_OFFSET */
+ uint32_t code_addr[4]; /* US_CODE_ADDR */
+};
+
+
+struct r500_fragment_program_code {
+ struct {
+ uint32_t inst0;
+ uint32_t inst1;
+ uint32_t inst2;
+ uint32_t inst3;
+ uint32_t inst4;
+ uint32_t inst5;
+ } inst[R500_PFS_MAX_INST];
+
+ int inst_end; /* Number of instructions - 1; also, last instruction to be executed */
+
+ int max_temp_idx;
+
+ uint32_t us_fc_ctrl;
+};
+
+struct rX00_fragment_program_code {
+ union {
+ struct r300_fragment_program_code r300;
+ struct r500_fragment_program_code r500;
+ } code;
+
+ unsigned writes_depth:1;
+
+ struct rc_constant_list constants;
+};
+
+
+#define VSF_MAX_FRAGMENT_LENGTH (255*4)
+#define VSF_MAX_FRAGMENT_TEMPS (14)
+
+#define VSF_MAX_INPUTS 32
+#define VSF_MAX_OUTPUTS 32
+
+struct r300_vertex_program_code {
+ int length;
+ union {
+ uint32_t d[VSF_MAX_FRAGMENT_LENGTH];
+ float f[VSF_MAX_FRAGMENT_LENGTH];
+ } body;
+
+ int pos_end;
+ int num_temporaries; /* Number of temp vars used by program */
+ int inputs[VSF_MAX_INPUTS];
+ int outputs[VSF_MAX_OUTPUTS];
+
+ struct rc_constant_list constants;
+
+ uint32_t InputsRead;
+ uint32_t OutputsWritten;
+};
+
+void r300_vertex_program_dump(struct r300_vertex_program_code * vs);
+
+#endif /* RADEON_CODE_H */
+
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
new file mode 100644
index 0000000000..1c8ba864a4
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
@@ -0,0 +1,309 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_compiler.h"
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "radeon_program.h"
+
+
+void rc_init(struct radeon_compiler * c)
+{
+ memset(c, 0, sizeof(*c));
+
+ memory_pool_init(&c->Pool);
+ c->Program.Instructions.Prev = &c->Program.Instructions;
+ c->Program.Instructions.Next = &c->Program.Instructions;
+ c->Program.Instructions.U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE;
+}
+
+void rc_destroy(struct radeon_compiler * c)
+{
+ rc_constants_destroy(&c->Program.Constants);
+ memory_pool_destroy(&c->Pool);
+ free(c->ErrorMsg);
+}
+
+void rc_debug(struct radeon_compiler * c, const char * fmt, ...)
+{
+ va_list ap;
+
+ if (!c->Debug)
+ return;
+
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+}
+
+void rc_error(struct radeon_compiler * c, const char * fmt, ...)
+{
+ va_list ap;
+
+ c->Error = 1;
+
+ if (!c->ErrorMsg) {
+ /* Only remember the first error */
+ char buf[1024];
+ int written;
+
+ va_start(ap, fmt);
+ written = vsnprintf(buf, sizeof(buf), fmt, ap);
+ va_end(ap);
+
+ if (written < sizeof(buf)) {
+ c->ErrorMsg = strdup(buf);
+ } else {
+ c->ErrorMsg = malloc(written + 1);
+
+ va_start(ap, fmt);
+ vsnprintf(c->ErrorMsg, written + 1, fmt, ap);
+ va_end(ap);
+ }
+ }
+
+ if (c->Debug) {
+ fprintf(stderr, "r300compiler error: ");
+
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ }
+}
+
+int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion)
+{
+ rc_error(c, "ICE at %s:%i: assertion failed: %s\n", file, line, assertion);
+ return 1;
+}
+
+/**
+ * Recompute c->Program.InputsRead and c->Program.OutputsWritten
+ * based on which inputs and outputs are actually referenced
+ * in program instructions.
+ */
+void rc_calculate_inputs_outputs(struct radeon_compiler * c)
+{
+ struct rc_instruction *inst;
+
+ c->Program.InputsRead = 0;
+ c->Program.OutputsWritten = 0;
+
+ for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next)
+ {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ int i;
+
+ for (i = 0; i < opcode->NumSrcRegs; ++i) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT)
+ c->Program.InputsRead |= 1 << inst->U.I.SrcReg[i].Index;
+ }
+
+ if (opcode->HasDstReg) {
+ if (inst->U.I.DstReg.File == RC_FILE_OUTPUT)
+ c->Program.OutputsWritten |= 1 << inst->U.I.DstReg.Index;
+ }
+ }
+}
+
+/**
+ * Rewrite the program such that everything that source the given input
+ * register will source new_input instead.
+ */
+void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input)
+{
+ struct rc_instruction * inst;
+
+ c->Program.InputsRead &= ~(1 << input);
+
+ for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ unsigned i;
+
+ for(i = 0; i < opcode->NumSrcRegs; ++i) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && inst->U.I.SrcReg[i].Index == input) {
+ inst->U.I.SrcReg[i].File = new_input.File;
+ inst->U.I.SrcReg[i].Index = new_input.Index;
+ inst->U.I.SrcReg[i].Swizzle = combine_swizzles(new_input.Swizzle, inst->U.I.SrcReg[i].Swizzle);
+ if (!inst->U.I.SrcReg[i].Abs) {
+ inst->U.I.SrcReg[i].Negate ^= new_input.Negate;
+ inst->U.I.SrcReg[i].Abs = new_input.Abs;
+ }
+
+ c->Program.InputsRead |= 1 << new_input.Index;
+ }
+ }
+ }
+}
+
+
+/**
+ * Rewrite the program such that everything that writes into the given
+ * output register will instead write to new_output. The new_output
+ * writemask is honoured.
+ */
+void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask)
+{
+ struct rc_instruction * inst;
+
+ c->Program.OutputsWritten &= ~(1 << output);
+
+ for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ if (opcode->HasDstReg) {
+ if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) {
+ inst->U.I.DstReg.Index = new_output;
+ inst->U.I.DstReg.WriteMask &= writemask;
+
+ c->Program.OutputsWritten |= 1 << new_output;
+ }
+ }
+ }
+}
+
+
+/**
+ * Rewrite the program such that a given output is duplicated.
+ */
+void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output)
+{
+ unsigned tempreg = rc_find_free_temporary(c);
+ struct rc_instruction * inst;
+
+ for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ if (opcode->HasDstReg) {
+ if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) {
+ inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst->U.I.DstReg.Index = tempreg;
+ }
+ }
+ }
+
+ inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ inst->U.I.DstReg.File = RC_FILE_OUTPUT;
+ inst->U.I.DstReg.Index = output;
+
+ inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[0].Index = tempreg;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
+
+ inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ inst->U.I.DstReg.File = RC_FILE_OUTPUT;
+ inst->U.I.DstReg.Index = dup_output;
+
+ inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[0].Index = tempreg;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
+
+ c->Program.OutputsWritten |= 1 << dup_output;
+}
+
+
+/**
+ * Introduce standard code fragment to deal with fragment.position.
+ */
+void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input,
+ int full_vtransform)
+{
+ unsigned tempregi = rc_find_free_temporary(c);
+ struct rc_instruction * inst_rcp;
+ struct rc_instruction * inst_mul;
+ struct rc_instruction * inst_mad;
+ struct rc_instruction * inst;
+
+ c->Program.InputsRead &= ~(1 << wpos);
+ c->Program.InputsRead |= 1 << new_input;
+
+ /* perspective divide */
+ inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions);
+ inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
+
+ inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_rcp->U.I.DstReg.Index = tempregi;
+ inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
+
+ inst_rcp->U.I.SrcReg[0].File = RC_FILE_INPUT;
+ inst_rcp->U.I.SrcReg[0].Index = new_input;
+ inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
+
+ inst_mul = rc_insert_new_instruction(c, inst_rcp);
+ inst_mul->U.I.Opcode = RC_OPCODE_MUL;
+
+ inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mul->U.I.DstReg.Index = tempregi;
+ inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+
+ inst_mul->U.I.SrcReg[0].File = RC_FILE_INPUT;
+ inst_mul->U.I.SrcReg[0].Index = new_input;
+
+ inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+ inst_mul->U.I.SrcReg[1].Index = tempregi;
+ inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
+
+ /* viewport transformation */
+ inst_mad = rc_insert_new_instruction(c, inst_mul);
+ inst_mad->U.I.Opcode = RC_OPCODE_MAD;
+
+ inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mad->U.I.DstReg.Index = tempregi;
+ inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+
+ inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst_mad->U.I.SrcReg[0].Index = tempregi;
+ inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
+
+ inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
+ inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;
+
+ inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT;
+ inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZ0;
+
+ if (full_vtransform) {
+ inst_mad->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_SCALE, 0);
+ inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_OFFSET, 0);
+ } else {
+ inst_mad->U.I.SrcReg[1].Index =
+ inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0);
+ }
+
+ for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ unsigned i;
+
+ for(i = 0; i < opcode->NumSrcRegs; i++) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&
+ inst->U.I.SrcReg[i].Index == wpos) {
+ inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[i].Index = tempregi;
+ }
+ }
+ }
+}
+
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
new file mode 100644
index 0000000000..f15905d79d
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef RADEON_COMPILER_H
+#define RADEON_COMPILER_H
+
+#include "../../../../main/compiler.h"
+
+#include "memory_pool.h"
+#include "radeon_code.h"
+#include "radeon_program.h"
+
+struct rc_swizzle_caps;
+
+struct radeon_compiler {
+ struct memory_pool Pool;
+ struct rc_program Program;
+ unsigned Debug:1;
+ unsigned Error:1;
+ char * ErrorMsg;
+
+ /* Hardware specification. */
+ unsigned is_r500;
+ unsigned max_temp_regs;
+
+ /**
+ * Variables used internally, not be touched by callers
+ * of the compiler
+ */
+ /*@{*/
+ struct rc_swizzle_caps * SwizzleCaps;
+ /*@}*/
+};
+
+void rc_init(struct radeon_compiler * c);
+void rc_destroy(struct radeon_compiler * c);
+
+void rc_debug(struct radeon_compiler * c, const char * fmt, ...);
+void rc_error(struct radeon_compiler * c, const char * fmt, ...);
+
+int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion);
+
+/**
+ * This macro acts like an if-statement that can be used to implement
+ * non-aborting assertions in the compiler.
+ *
+ * It checks whether \p cond is true. If not, an internal compiler error is
+ * flagged and the if-clause is run.
+ *
+ * A typical use-case would be:
+ *
+ * if (rc_assert(c, condition-that-must-be-true))
+ * return;
+ */
+#define rc_assert(c, cond) \
+ (!(cond) && rc_if_fail_helper(c, __FILE__, __LINE__, #cond))
+
+void rc_calculate_inputs_outputs(struct radeon_compiler * c);
+
+void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input);
+void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask);
+void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output);
+void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input,
+ int full_vtransform);
+
+struct r300_fragment_program_compiler {
+ struct radeon_compiler Base;
+ struct rX00_fragment_program_code *code;
+ /* Optional transformations and features. */
+ struct r300_fragment_program_external_state state;
+ unsigned enable_shadow_ambient;
+ /* Register corresponding to the depthbuffer. */
+ unsigned OutputDepth;
+ /* Registers corresponding to the four colorbuffers. */
+ unsigned OutputColor[4];
+
+ void * UserData;
+ void (*AllocateHwInputs)(
+ struct r300_fragment_program_compiler * c,
+ void (*allocate)(void * data, unsigned input, unsigned hwreg),
+ void * mydata);
+};
+
+void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c);
+
+
+struct r300_vertex_program_compiler {
+ struct radeon_compiler Base;
+ struct r300_vertex_program_code *code;
+ uint32_t RequiredOutputs;
+
+ void * UserData;
+ void (*SetHwInputOutput)(struct r300_vertex_program_compiler * c);
+};
+
+void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* c);
+
+#endif /* RADEON_COMPILER_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
new file mode 100644
index 0000000000..0e6c62541f
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
@@ -0,0 +1,281 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_dataflow.h"
+
+#include "radeon_program.h"
+
+
+static void reads_normal(struct rc_instruction * fullinst, rc_read_write_chan_fn cb, void * userdata)
+{
+ struct rc_sub_instruction * inst = &fullinst->U.I;
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
+
+ for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
+ unsigned int refmask = 0;
+
+ if (inst->SrcReg[src].File == RC_FILE_NONE)
+ return;
+
+ for(unsigned int chan = 0; chan < 4; ++chan)
+ refmask |= 1 << GET_SWZ(inst->SrcReg[src].Swizzle, chan);
+
+ refmask &= RC_MASK_XYZW;
+
+ if (refmask)
+ cb(userdata, fullinst, inst->SrcReg[src].File, inst->SrcReg[src].Index, refmask);
+
+ if (refmask && inst->SrcReg[src].RelAddr)
+ cb(userdata, fullinst, RC_FILE_ADDRESS, 0, RC_MASK_X);
+ }
+}
+
+static void reads_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata)
+{
+ struct rc_pair_instruction * inst = &fullinst->U.P;
+ unsigned int refmasks[3] = { 0, 0, 0 };
+
+ if (inst->RGB.Opcode != RC_OPCODE_NOP) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->RGB.Opcode);
+
+ for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
+ for(unsigned int chan = 0; chan < 3; ++chan) {
+ unsigned int swz = GET_SWZ(inst->RGB.Arg[arg].Swizzle, chan);
+ if (swz < 4)
+ refmasks[inst->RGB.Arg[arg].Source] |= 1 << swz;
+ }
+ }
+ }
+
+ if (inst->Alpha.Opcode != RC_OPCODE_NOP) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Alpha.Opcode);
+
+ for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
+ if (inst->Alpha.Arg[arg].Swizzle < 4)
+ refmasks[inst->Alpha.Arg[arg].Source] |= 1 << inst->Alpha.Arg[arg].Swizzle;
+ }
+ }
+
+ for(unsigned int src = 0; src < 3; ++src) {
+ if (inst->RGB.Src[src].Used && (refmasks[src] & RC_MASK_XYZ))
+ cb(userdata, fullinst, inst->RGB.Src[src].File, inst->RGB.Src[src].Index,
+ refmasks[src] & RC_MASK_XYZ);
+
+ if (inst->Alpha.Src[src].Used && (refmasks[src] & RC_MASK_W))
+ cb(userdata, fullinst, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, RC_MASK_W);
+ }
+}
+
+/**
+ * Calls a callback function for all register reads.
+ *
+ * This is conservative, i.e. if the same register is referenced multiple times,
+ * the callback may also be called multiple times.
+ * Also, the writemask of the instruction is not taken into account.
+ */
+void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata)
+{
+ if (inst->Type == RC_INSTRUCTION_NORMAL) {
+ reads_normal(inst, cb, userdata);
+ } else {
+ reads_pair(inst, cb, userdata);
+ }
+}
+
+
+
+static void writes_normal(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata)
+{
+ struct rc_sub_instruction * inst = &fullinst->U.I;
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
+
+ if (opcode->HasDstReg && inst->DstReg.WriteMask)
+ cb(userdata, fullinst, inst->DstReg.File, inst->DstReg.Index, inst->DstReg.WriteMask);
+
+ if (inst->WriteALUResult)
+ cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X);
+}
+
+static void writes_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata)
+{
+ struct rc_pair_instruction * inst = &fullinst->U.P;
+
+ if (inst->RGB.WriteMask)
+ cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->RGB.DestIndex, inst->RGB.WriteMask);
+
+ if (inst->Alpha.WriteMask)
+ cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->Alpha.DestIndex, RC_MASK_W);
+
+ if (inst->WriteALUResult)
+ cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X);
+}
+
+/**
+ * Calls a callback function for all register writes in the instruction,
+ * reporting writemasks to the callback function.
+ *
+ * \warning Does not report output registers for paired instructions!
+ */
+void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata)
+{
+ if (inst->Type == RC_INSTRUCTION_NORMAL) {
+ writes_normal(inst, cb, userdata);
+ } else {
+ writes_pair(inst, cb, userdata);
+ }
+}
+
+
+struct mask_to_chan_data {
+ void * UserData;
+ rc_read_write_chan_fn Fn;
+};
+
+static void mask_to_chan_cb(void * data, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int mask)
+{
+ struct mask_to_chan_data * d = data;
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ if (GET_BIT(mask, chan))
+ d->Fn(d->UserData, inst, file, index, chan);
+ }
+}
+
+/**
+ * Calls a callback function for all sourced register channels.
+ *
+ * This is conservative, i.e. channels may be called multiple times,
+ * and the writemask of the instruction is not taken into account.
+ */
+void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata)
+{
+ struct mask_to_chan_data d;
+ d.UserData = userdata;
+ d.Fn = cb;
+ rc_for_all_reads_mask(inst, &mask_to_chan_cb, &d);
+}
+
+/**
+ * Calls a callback function for all written register channels.
+ *
+ * \warning Does not report output registers for paired instructions!
+ */
+void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata)
+{
+ struct mask_to_chan_data d;
+ d.UserData = userdata;
+ d.Fn = cb;
+ rc_for_all_writes_mask(inst, &mask_to_chan_cb, &d);
+}
+
+static void remap_normal_instruction(struct rc_instruction * fullinst,
+ rc_remap_register_fn cb, void * userdata)
+{
+ struct rc_sub_instruction * inst = &fullinst->U.I;
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
+
+ if (opcode->HasDstReg) {
+ rc_register_file file = inst->DstReg.File;
+ unsigned int index = inst->DstReg.Index;
+
+ cb(userdata, fullinst, &file, &index);
+
+ inst->DstReg.File = file;
+ inst->DstReg.Index = index;
+ }
+
+ for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
+ rc_register_file file = inst->SrcReg[src].File;
+ unsigned int index = inst->SrcReg[src].Index;
+
+ cb(userdata, fullinst, &file, &index);
+
+ inst->SrcReg[src].File = file;
+ inst->SrcReg[src].Index = index;
+ }
+}
+
+static void remap_pair_instruction(struct rc_instruction * fullinst,
+ rc_remap_register_fn cb, void * userdata)
+{
+ struct rc_pair_instruction * inst = &fullinst->U.P;
+
+ if (inst->RGB.WriteMask) {
+ rc_register_file file = RC_FILE_TEMPORARY;
+ unsigned int index = inst->RGB.DestIndex;
+
+ cb(userdata, fullinst, &file, &index);
+
+ inst->RGB.DestIndex = index;
+ }
+
+ if (inst->Alpha.WriteMask) {
+ rc_register_file file = RC_FILE_TEMPORARY;
+ unsigned int index = inst->Alpha.DestIndex;
+
+ cb(userdata, fullinst, &file, &index);
+
+ inst->Alpha.DestIndex = index;
+ }
+
+ for(unsigned int src = 0; src < 3; ++src) {
+ if (inst->RGB.Src[src].Used) {
+ rc_register_file file = inst->RGB.Src[src].File;
+ unsigned int index = inst->RGB.Src[src].Index;
+
+ cb(userdata, fullinst, &file, &index);
+
+ inst->RGB.Src[src].File = file;
+ inst->RGB.Src[src].Index = index;
+ }
+
+ if (inst->Alpha.Src[src].Used) {
+ rc_register_file file = inst->Alpha.Src[src].File;
+ unsigned int index = inst->Alpha.Src[src].Index;
+
+ cb(userdata, fullinst, &file, &index);
+
+ inst->Alpha.Src[src].File = file;
+ inst->Alpha.Src[src].Index = index;
+ }
+ }
+}
+
+
+/**
+ * Remap all register accesses according to the given function.
+ * That is, call the function \p cb for each referenced register (both read and written)
+ * and update the given instruction \p inst accordingly
+ * if it modifies its \ref pfile and \ref pindex contents.
+ */
+void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, void * userdata)
+{
+ if (inst->Type == RC_INSTRUCTION_NORMAL)
+ remap_normal_instruction(inst, cb, userdata);
+ else
+ remap_pair_instruction(inst, cb, userdata);
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h
new file mode 100644
index 0000000000..60a6e192a9
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_DATAFLOW_H
+#define RADEON_DATAFLOW_H
+
+#include "radeon_program_constants.h"
+
+struct radeon_compiler;
+struct rc_instruction;
+struct rc_swizzle_caps;
+
+
+/**
+ * Help analyze and modify the register accesses of instructions.
+ */
+/*@{*/
+typedef void (*rc_read_write_chan_fn)(void * userdata, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int chan);
+void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata);
+void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata);
+
+typedef void (*rc_read_write_mask_fn)(void * userdata, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int mask);
+void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata);
+void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata);
+
+typedef void (*rc_remap_register_fn)(void * userdata, struct rc_instruction * inst,
+ rc_register_file * pfile, unsigned int * pindex);
+void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, void * userdata);
+/*@}*/
+
+
+/**
+ * Compiler passes based on dataflow analysis.
+ */
+/*@{*/
+typedef void (*rc_dataflow_mark_outputs_fn)(void * userdata, void * data,
+ void (*mark_fn)(void * data, unsigned int index, unsigned int mask));
+void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_fn dce, void * userdata);
+void rc_dataflow_swizzles(struct radeon_compiler * c);
+/*@}*/
+
+void rc_optimize(struct radeon_compiler * c);
+
+#endif /* RADEON_DATAFLOW_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
new file mode 100644
index 0000000000..e3c2c83c0c
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
@@ -0,0 +1,284 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_dataflow.h"
+
+#include "radeon_compiler.h"
+
+
+struct updatemask_state {
+ unsigned char Output[RC_REGISTER_MAX_INDEX];
+ unsigned char Temporary[RC_REGISTER_MAX_INDEX];
+ unsigned char Address;
+ unsigned char Special[RC_NUM_SPECIAL_REGISTERS];
+};
+
+struct instruction_state {
+ unsigned char WriteMask:4;
+ unsigned char WriteALUResult:1;
+ unsigned char SrcReg[3];
+};
+
+struct branchinfo {
+ unsigned int HaveElse:1;
+
+ struct updatemask_state StoreEndif;
+ struct updatemask_state StoreElse;
+};
+
+struct deadcode_state {
+ struct radeon_compiler * C;
+ struct instruction_state * Instructions;
+
+ struct updatemask_state R;
+
+ struct branchinfo * BranchStack;
+ unsigned int BranchStackSize;
+ unsigned int BranchStackReserved;
+};
+
+
+static void or_updatemasks(
+ struct updatemask_state * dst,
+ struct updatemask_state * a,
+ struct updatemask_state * b)
+{
+ for(unsigned int i = 0; i < RC_REGISTER_MAX_INDEX; ++i) {
+ dst->Output[i] = a->Output[i] | b->Output[i];
+ dst->Temporary[i] = a->Temporary[i] | b->Temporary[i];
+ }
+
+ for(unsigned int i = 0; i < RC_NUM_SPECIAL_REGISTERS; ++i)
+ dst->Special[i] = a->Special[i] | b->Special[i];
+
+ dst->Address = a->Address | b->Address;
+}
+
+static void push_branch(struct deadcode_state * s)
+{
+ memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack,
+ s->BranchStackSize, s->BranchStackReserved, 1);
+
+ struct branchinfo * branch = &s->BranchStack[s->BranchStackSize++];
+ branch->HaveElse = 0;
+ memcpy(&branch->StoreEndif, &s->R, sizeof(s->R));
+}
+
+static unsigned char * get_used_ptr(struct deadcode_state *s, rc_register_file file, unsigned int index)
+{
+ if (file == RC_FILE_OUTPUT || file == RC_FILE_TEMPORARY) {
+ if (index >= RC_REGISTER_MAX_INDEX) {
+ rc_error(s->C, "%s: index %i is out of bounds for file %i\n", __FUNCTION__, index, file);
+ return 0;
+ }
+
+ if (file == RC_FILE_OUTPUT)
+ return &s->R.Output[index];
+ else
+ return &s->R.Temporary[index];
+ } else if (file == RC_FILE_ADDRESS) {
+ return &s->R.Address;
+ } else if (file == RC_FILE_SPECIAL) {
+ if (index >= RC_NUM_SPECIAL_REGISTERS) {
+ rc_error(s->C, "%s: special file index %i out of bounds\n", __FUNCTION__, index);
+ return 0;
+ }
+
+ return &s->R.Special[index];
+ }
+
+ return 0;
+}
+
+static void mark_used(struct deadcode_state * s, rc_register_file file, unsigned int index, unsigned int mask)
+{
+ unsigned char * pused = get_used_ptr(s, file, index);
+ if (pused)
+ *pused |= mask;
+}
+
+static void update_instruction(struct deadcode_state * s, struct rc_instruction * inst)
+{
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ struct instruction_state * insts = &s->Instructions[inst->IP];
+ unsigned int usedmask = 0;
+
+ if (opcode->HasDstReg) {
+ unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index);
+ if (pused) {
+ usedmask = *pused & inst->U.I.DstReg.WriteMask;
+ *pused &= ~usedmask;
+ }
+ }
+
+ insts->WriteMask |= usedmask;
+
+ if (inst->U.I.WriteALUResult) {
+ unsigned char * pused = get_used_ptr(s, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT);
+ if (pused && *pused) {
+ if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
+ usedmask |= RC_MASK_X;
+ else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
+ usedmask |= RC_MASK_W;
+
+ *pused = 0;
+ insts->WriteALUResult = 1;
+ }
+ }
+
+ unsigned int srcmasks[3];
+ rc_compute_sources_for_writemask(inst, usedmask, srcmasks);
+
+ for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
+ unsigned int refmask = 0;
+ unsigned int newsrcmask = srcmasks[src] & ~insts->SrcReg[src];
+ insts->SrcReg[src] |= newsrcmask;
+
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ if (GET_BIT(newsrcmask, chan))
+ refmask |= 1 << GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
+ }
+
+ /* get rid of spurious bits from ZERO, ONE, etc. swizzles */
+ refmask &= RC_MASK_XYZW;
+
+ if (!refmask)
+ continue;
+
+ mark_used(s, inst->U.I.SrcReg[src].File, inst->U.I.SrcReg[src].Index, refmask);
+
+ if (inst->U.I.SrcReg[src].RelAddr)
+ mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X);
+ }
+}
+
+static void mark_output_use(void * data, unsigned int index, unsigned int mask)
+{
+ struct deadcode_state * s = data;
+
+ mark_used(s, RC_FILE_OUTPUT, index, mask);
+}
+
+void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_fn dce, void * userdata)
+{
+ struct deadcode_state s;
+ unsigned int nr_instructions;
+
+ memset(&s, 0, sizeof(s));
+ s.C = c;
+
+ nr_instructions = rc_recompute_ips(c);
+ s.Instructions = memory_pool_malloc(&c->Pool, sizeof(struct instruction_state)*nr_instructions);
+ memset(s.Instructions, 0, sizeof(struct instruction_state)*nr_instructions);
+
+ dce(userdata, &s, &mark_output_use);
+
+ for(struct rc_instruction * inst = c->Program.Instructions.Prev;
+ inst != &c->Program.Instructions;
+ inst = inst->Prev) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ if (opcode->IsFlowControl) {
+ if (opcode->Opcode == RC_OPCODE_ENDIF) {
+ push_branch(&s);
+ } else {
+ if (s.BranchStackSize) {
+ struct branchinfo * branch = &s.BranchStack[s.BranchStackSize-1];
+
+ if (opcode->Opcode == RC_OPCODE_IF) {
+ or_updatemasks(&s.R,
+ &s.R,
+ branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif);
+
+ s.BranchStackSize--;
+ } else if (opcode->Opcode == RC_OPCODE_ELSE) {
+ if (branch->HaveElse) {
+ rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __FUNCTION__);
+ } else {
+ memcpy(&branch->StoreElse, &s.R, sizeof(s.R));
+ memcpy(&s.R, &branch->StoreEndif, sizeof(s.R));
+ branch->HaveElse = 1;
+ }
+ } else {
+ rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name);
+ }
+ } else {
+ rc_error(c, "%s: Unexpected control flow instruction\n", __FUNCTION__);
+ }
+ }
+ }
+
+ update_instruction(&s, inst);
+ }
+
+ unsigned int ip = 0;
+ for(struct rc_instruction * inst = c->Program.Instructions.Next;
+ inst != &c->Program.Instructions;
+ inst = inst->Next, ++ip) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ int dead = 1;
+
+ if (!opcode->HasDstReg) {
+ dead = 0;
+ } else {
+ inst->U.I.DstReg.WriteMask = s.Instructions[ip].WriteMask;
+ if (s.Instructions[ip].WriteMask)
+ dead = 0;
+
+ if (s.Instructions[ip].WriteALUResult)
+ dead = 0;
+ else
+ inst->U.I.WriteALUResult = RC_ALURESULT_NONE;
+ }
+
+ if (dead) {
+ struct rc_instruction * todelete = inst;
+ inst = inst->Prev;
+ rc_remove_instruction(todelete);
+ continue;
+ }
+
+ unsigned int srcmasks[3];
+ unsigned int usemask = s.Instructions[ip].WriteMask;
+
+ if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
+ usemask |= RC_MASK_X;
+ else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
+ usemask |= RC_MASK_W;
+
+ rc_compute_sources_for_writemask(inst, usemask, srcmasks);
+
+ for(unsigned int src = 0; src < 3; ++src) {
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ if (!GET_BIT(srcmasks[src], chan))
+ SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED);
+ }
+ }
+ }
+
+ rc_calculate_inputs_outputs(c);
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c
new file mode 100644
index 0000000000..33acbd30f4
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_dataflow.h"
+
+#include "radeon_compiler.h"
+#include "radeon_swizzle.h"
+
+
+static void rewrite_source(struct radeon_compiler * c,
+ struct rc_instruction * inst, unsigned src)
+{
+ struct rc_swizzle_split split;
+ unsigned int tempreg = rc_find_free_temporary(c);
+ unsigned int usemask;
+
+ usemask = 0;
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) != RC_SWIZZLE_UNUSED)
+ usemask |= 1 << chan;
+ }
+
+ c->SwizzleCaps->Split(inst->U.I.SrcReg[src], usemask, &split);
+
+ for(unsigned int phase = 0; phase < split.NumPhases; ++phase) {
+ struct rc_instruction * mov = rc_insert_new_instruction(c, inst->Prev);
+ unsigned int phase_refmask;
+ unsigned int masked_negate;
+
+ mov->U.I.Opcode = RC_OPCODE_MOV;
+ mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ mov->U.I.DstReg.Index = tempreg;
+ mov->U.I.DstReg.WriteMask = split.Phase[phase];
+ mov->U.I.SrcReg[0] = inst->U.I.SrcReg[src];
+
+ phase_refmask = 0;
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ if (!GET_BIT(split.Phase[phase], chan))
+ SET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan, RC_SWIZZLE_UNUSED);
+ else
+ phase_refmask |= 1 << GET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan);
+ }
+
+ phase_refmask &= RC_MASK_XYZW;
+
+ masked_negate = split.Phase[phase] & mov->U.I.SrcReg[0].Negate;
+ if (masked_negate == 0)
+ mov->U.I.SrcReg[0].Negate = 0;
+ else if (masked_negate == split.Phase[phase])
+ mov->U.I.SrcReg[0].Negate = RC_MASK_XYZW;
+
+ }
+
+ inst->U.I.SrcReg[src].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[src].Index = tempreg;
+ inst->U.I.SrcReg[src].Swizzle = 0;
+ inst->U.I.SrcReg[src].Negate = RC_MASK_NONE;
+ inst->U.I.SrcReg[src].Abs = 0;
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan,
+ GET_BIT(usemask, chan) ? chan : RC_SWIZZLE_UNUSED);
+ }
+}
+
+void rc_dataflow_swizzles(struct radeon_compiler * c)
+{
+ struct rc_instruction * inst;
+
+ for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ unsigned int src;
+
+ for(src = 0; src < opcode->NumSrcRegs; ++src) {
+ if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src]))
+ rewrite_source(c, inst, src);
+ }
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c
new file mode 100644
index 0000000000..863654cf68
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c
@@ -0,0 +1,331 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_emulate_branches.h"
+
+#include <stdio.h>
+
+#include "radeon_compiler.h"
+#include "radeon_dataflow.h"
+
+#define VERBOSE 0
+
+#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
+
+
+struct proxy_info {
+ unsigned int Proxied:1;
+ unsigned int Index:RC_REGISTER_INDEX_BITS;
+};
+
+struct register_proxies {
+ struct proxy_info Temporary[RC_REGISTER_MAX_INDEX];
+};
+
+struct branch_info {
+ struct rc_instruction * If;
+ struct rc_instruction * Else;
+};
+
+struct emulate_branch_state {
+ struct radeon_compiler * C;
+
+ struct branch_info * Branches;
+ unsigned int BranchCount;
+ unsigned int BranchReserved;
+};
+
+
+static void handle_if(struct emulate_branch_state * s, struct rc_instruction * inst)
+{
+ memory_pool_array_reserve(&s->C->Pool, struct branch_info,
+ s->Branches, s->BranchCount, s->BranchReserved, 1);
+
+ DBG("%s\n", __FUNCTION__);
+
+ struct branch_info * branch = &s->Branches[s->BranchCount++];
+ memset(branch, 0, sizeof(struct branch_info));
+ branch->If = inst;
+
+ /* Make a safety copy of the decision register, because we will need
+ * it at ENDIF time and it might be overwritten in both branches. */
+ struct rc_instruction * inst_mov = rc_insert_new_instruction(s->C, inst->Prev);
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.DstReg.Index = rc_find_free_temporary(s->C);
+ inst_mov->U.I.DstReg.WriteMask = RC_MASK_X;
+ inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+
+ inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index;
+ inst->U.I.SrcReg[0].Swizzle = 0;
+ inst->U.I.SrcReg[0].Abs = 0;
+ inst->U.I.SrcReg[0].Negate = 0;
+}
+
+static void handle_else(struct emulate_branch_state * s, struct rc_instruction * inst)
+{
+ if (!s->BranchCount) {
+ rc_error(s->C, "Encountered ELSE outside of branches");
+ return;
+ }
+
+ DBG("%s\n", __FUNCTION__);
+
+ struct branch_info * branch = &s->Branches[s->BranchCount - 1];
+ branch->Else = inst;
+}
+
+
+struct state_and_proxies {
+ struct emulate_branch_state * S;
+ struct register_proxies * Proxies;
+};
+
+static struct proxy_info * get_proxy_info(struct state_and_proxies * sap,
+ rc_register_file file, unsigned int index)
+{
+ if (file == RC_FILE_TEMPORARY) {
+ return &sap->Proxies->Temporary[index];
+ } else {
+ return 0;
+ }
+}
+
+static void scan_write(void * userdata, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int comp)
+{
+ struct state_and_proxies * sap = userdata;
+ struct proxy_info * proxy = get_proxy_info(sap, file, index);
+
+ if (proxy && !proxy->Proxied) {
+ proxy->Proxied = 1;
+ proxy->Index = rc_find_free_temporary(sap->S->C);
+ }
+}
+
+static void remap_proxy_function(void * userdata, struct rc_instruction * inst,
+ rc_register_file * pfile, unsigned int * pindex)
+{
+ struct state_and_proxies * sap = userdata;
+ struct proxy_info * proxy = get_proxy_info(sap, *pfile, *pindex);
+
+ if (proxy && proxy->Proxied) {
+ *pfile = RC_FILE_TEMPORARY;
+ *pindex = proxy->Index;
+ }
+}
+
+/**
+ * Redirect all writes in the instruction range [begin, end) to proxy
+ * temporary registers.
+ */
+static void allocate_and_insert_proxies(struct emulate_branch_state * s,
+ struct register_proxies * proxies,
+ struct rc_instruction * begin,
+ struct rc_instruction * end)
+{
+ struct state_and_proxies sap;
+
+ sap.S = s;
+ sap.Proxies = proxies;
+
+ for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
+ rc_for_all_writes_mask(inst, scan_write, &sap);
+ rc_remap_registers(inst, remap_proxy_function, &sap);
+ }
+
+ for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) {
+ if (proxies->Temporary[index].Proxied) {
+ struct rc_instruction * inst_mov = rc_insert_new_instruction(s->C, begin->Prev);
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.DstReg.Index = proxies->Temporary[index].Index;
+ inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+ inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.SrcReg[0].Index = index;
+ }
+ }
+}
+
+
+static void inject_cmp(struct emulate_branch_state * s,
+ struct rc_instruction * inst_if,
+ struct rc_instruction * inst_endif,
+ rc_register_file file, unsigned int index,
+ struct proxy_info ifproxy,
+ struct proxy_info elseproxy)
+{
+ struct rc_instruction * inst_cmp = rc_insert_new_instruction(s->C, inst_endif);
+ inst_cmp->U.I.Opcode = RC_OPCODE_CMP;
+ inst_cmp->U.I.DstReg.File = file;
+ inst_cmp->U.I.DstReg.Index = index;
+ inst_cmp->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+ inst_cmp->U.I.SrcReg[0] = inst_if->U.I.SrcReg[0];
+ inst_cmp->U.I.SrcReg[0].Abs = 1;
+ inst_cmp->U.I.SrcReg[0].Negate = RC_MASK_XYZW;
+ inst_cmp->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+ inst_cmp->U.I.SrcReg[1].Index = ifproxy.Proxied ? ifproxy.Index : index;
+ inst_cmp->U.I.SrcReg[2].File = RC_FILE_TEMPORARY;
+ inst_cmp->U.I.SrcReg[2].Index = elseproxy.Proxied ? elseproxy.Index : index;
+}
+
+static void handle_endif(struct emulate_branch_state * s, struct rc_instruction * inst)
+{
+ if (!s->BranchCount) {
+ rc_error(s->C, "Encountered ENDIF outside of branches");
+ return;
+ }
+
+ DBG("%s\n", __FUNCTION__);
+
+ struct branch_info * branch = &s->Branches[s->BranchCount - 1];
+ struct register_proxies IfProxies;
+ struct register_proxies ElseProxies;
+
+ memset(&IfProxies, 0, sizeof(IfProxies));
+ memset(&ElseProxies, 0, sizeof(ElseProxies));
+
+ allocate_and_insert_proxies(s, &IfProxies, branch->If->Next, branch->Else ? branch->Else : inst);
+
+ if (branch->Else)
+ allocate_and_insert_proxies(s, &ElseProxies, branch->Else->Next, inst);
+
+ /* Insert the CMP instructions at the end. */
+ for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) {
+ if (IfProxies.Temporary[index].Proxied || ElseProxies.Temporary[index].Proxied) {
+ inject_cmp(s, branch->If, inst, RC_FILE_TEMPORARY, index,
+ IfProxies.Temporary[index], ElseProxies.Temporary[index]);
+ }
+ }
+
+ /* Remove all traces of the branch instructions */
+ rc_remove_instruction(branch->If);
+ if (branch->Else)
+ rc_remove_instruction(branch->Else);
+ rc_remove_instruction(inst);
+
+ s->BranchCount--;
+
+ if (VERBOSE) {
+ DBG("Program after ENDIF handling:\n");
+ rc_print_program(&s->C->Program);
+ }
+}
+
+
+struct remap_output_data {
+ unsigned int Output:RC_REGISTER_INDEX_BITS;
+ unsigned int Temporary:RC_REGISTER_INDEX_BITS;
+};
+
+static void remap_output_function(void * userdata, struct rc_instruction * inst,
+ rc_register_file * pfile, unsigned int * pindex)
+{
+ struct remap_output_data * data = userdata;
+
+ if (*pfile == RC_FILE_OUTPUT && *pindex == data->Output) {
+ *pfile = RC_FILE_TEMPORARY;
+ *pindex = data->Temporary;
+ }
+}
+
+
+/**
+ * Output registers cannot be read from and so cannot be dealt with like
+ * temporary registers.
+ *
+ * We do the simplest thing: If an output registers is written within
+ * a branch, then *all* writes to this register are proxied to a
+ * temporary register, and a final MOV is appended to the end of
+ * the program.
+ */
+static void fix_output_writes(struct emulate_branch_state * s, struct rc_instruction * inst)
+{
+ if (!s->BranchCount)
+ return;
+
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ if (!opcode->HasDstReg)
+ return;
+
+ if (inst->U.I.DstReg.File == RC_FILE_OUTPUT) {
+ struct remap_output_data remap;
+
+ remap.Output = inst->U.I.DstReg.Index;
+ remap.Temporary = rc_find_free_temporary(s->C);
+
+ for(struct rc_instruction * inst = s->C->Program.Instructions.Next;
+ inst != &s->C->Program.Instructions;
+ inst = inst->Next) {
+ rc_remap_registers(inst, &remap_output_function, &remap);
+ }
+
+ struct rc_instruction * inst_mov = rc_insert_new_instruction(s->C, s->C->Program.Instructions.Prev);
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg.File = RC_FILE_OUTPUT;
+ inst_mov->U.I.DstReg.Index = remap.Output;
+ inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+ inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.SrcReg[0].Index = remap.Temporary;
+ }
+}
+
+/**
+ * Remove branch instructions; instead, execute both branches
+ * on different register sets and choose between their results
+ * using CMP instructions in place of the original ENDIF.
+ */
+void rc_emulate_branches(struct radeon_compiler * c)
+{
+ struct emulate_branch_state s;
+
+ memset(&s, 0, sizeof(s));
+ s.C = c;
+
+ /* Untypical loop because we may remove the current instruction */
+ struct rc_instruction * ptr = c->Program.Instructions.Next;
+ while(ptr != &c->Program.Instructions) {
+ struct rc_instruction * inst = ptr;
+ ptr = ptr->Next;
+
+ if (inst->Type == RC_INSTRUCTION_NORMAL) {
+ switch(inst->U.I.Opcode) {
+ case RC_OPCODE_IF:
+ handle_if(&s, inst);
+ break;
+ case RC_OPCODE_ELSE:
+ handle_else(&s, inst);
+ break;
+ case RC_OPCODE_ENDIF:
+ handle_endif(&s, inst);
+ break;
+ default:
+ fix_output_writes(&s, inst);
+ break;
+ }
+ } else {
+ rc_error(c, "%s: unhandled instruction type\n", __FUNCTION__);
+ }
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.h b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.h
new file mode 100644
index 0000000000..e07279f093
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef RADEON_EMULATE_BRANCHES_H
+#define RADEON_EMULATE_BRANCHES_H
+
+struct radeon_compiler;
+
+void rc_emulate_branches(struct radeon_compiler * c);
+
+#endif /* RADEON_EMULATE_BRANCHES_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c
new file mode 100644
index 0000000000..4c5d29f421
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c
@@ -0,0 +1,474 @@
+/*
+ * Copyright 2010 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * \file
+ */
+
+#include "radeon_emulate_loops.h"
+
+#include "radeon_compiler.h"
+#include "radeon_dataflow.h"
+
+#define VERBOSE 0
+
+#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
+
+struct emulate_loop_state {
+ struct radeon_compiler * C;
+ struct loop_info * Loops;
+ unsigned int LoopCount;
+ unsigned int LoopReserved;
+};
+
+struct loop_info {
+ struct rc_instruction * BeginLoop;
+ struct rc_instruction * Cond;
+ struct rc_instruction * If;
+ struct rc_instruction * Brk;
+ struct rc_instruction * EndIf;
+ struct rc_instruction * EndLoop;
+};
+
+struct const_value {
+
+ struct radeon_compiler * C;
+ struct rc_src_register * Src;
+ float Value;
+ int HasValue;
+};
+
+struct count_inst {
+ struct radeon_compiler * C;
+ int Index;
+ rc_swizzle Swz;
+ float Amount;
+ int Unknown;
+};
+
+static float get_constant_value(struct radeon_compiler * c,
+ struct rc_src_register * src,
+ int chan)
+{
+ float base = 1.0f;
+ int swz = GET_SWZ(src->Swizzle, chan);
+ if(swz >= 4 || src->Index >= c->Program.Constants.Count ){
+ rc_error(c, "get_constant_value: Can't find a value.\n");
+ return 0.0f;
+ }
+ if(GET_BIT(src->Negate, chan)){
+ base = -1.0f;
+ }
+ return base *
+ c->Program.Constants.Constants[src->Index].u.Immediate[swz];
+}
+
+static int src_reg_is_immediate(struct rc_src_register * src,
+ struct radeon_compiler * c)
+{
+ return src->File == RC_FILE_CONSTANT &&
+ c->Program.Constants.Constants[src->Index].Type==RC_CONSTANT_IMMEDIATE;
+}
+
+static unsigned int loop_count_instructions(struct loop_info * loop)
+{
+ unsigned int count = 0;
+ struct rc_instruction * inst = loop->BeginLoop->Next;
+ while(inst != loop->EndLoop){
+ count++;
+ inst = inst->Next;
+ }
+ return count;
+}
+
+static unsigned int loop_calc_iterations(struct loop_info * loop,
+ unsigned int loop_count, unsigned int max_instructions)
+{
+ unsigned int icount = loop_count_instructions(loop);
+ return max_instructions / (loop_count * icount);
+}
+
+static void loop_unroll(struct emulate_loop_state * s,
+ struct loop_info *loop, unsigned int iterations)
+{
+ unsigned int i;
+ struct rc_instruction * ptr;
+ struct rc_instruction * first = loop->BeginLoop->Next;
+ struct rc_instruction * last = loop->EndLoop->Prev;
+ struct rc_instruction * append_to = last;
+ rc_remove_instruction(loop->BeginLoop);
+ rc_remove_instruction(loop->EndLoop);
+ for( i = 1; i < iterations; i++){
+ for(ptr = first; ptr != last->Next; ptr = ptr->Next){
+ struct rc_instruction *new = rc_alloc_instruction(s->C);
+ memcpy(new, ptr, sizeof(struct rc_instruction));
+ rc_insert_instruction(append_to, new);
+ append_to = new;
+ }
+ }
+}
+
+
+static void update_const_value(void * data, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int mask)
+{
+ struct const_value * value = data;
+ if(value->Src->File != file ||
+ value->Src->Index != index ||
+ !(1 << GET_SWZ(value->Src->Swizzle, 0) & mask)){
+ return;
+ }
+ switch(inst->U.I.Opcode){
+ case RC_OPCODE_MOV:
+ if(!src_reg_is_immediate(&inst->U.I.SrcReg[0], value->C)){
+ return;
+ }
+ value->HasValue = 1;
+ value->Value =
+ get_constant_value(value->C, &inst->U.I.SrcReg[0], 0);
+ break;
+ }
+}
+
+static void get_incr_amount(void * data, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int mask)
+{
+ struct count_inst * count_inst = data;
+ int amnt_src_index;
+ const struct rc_opcode_info * opcode;
+ float amount;
+
+ if(file != RC_FILE_TEMPORARY ||
+ count_inst->Index != index ||
+ (1 << GET_SWZ(count_inst->Swz,0) != mask)){
+ return;
+ }
+ /* Find the index of the counter register. */
+ opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ if(opcode->NumSrcRegs != 2){
+ count_inst->Unknown = 1;
+ return;
+ }
+ if(inst->U.I.SrcReg[0].File == RC_FILE_TEMPORARY &&
+ inst->U.I.SrcReg[0].Index == count_inst->Index &&
+ inst->U.I.SrcReg[0].Swizzle == count_inst->Swz){
+ amnt_src_index = 1;
+ } else if( inst->U.I.SrcReg[1].File == RC_FILE_TEMPORARY &&
+ inst->U.I.SrcReg[1].Index == count_inst->Index &&
+ inst->U.I.SrcReg[1].Swizzle == count_inst->Swz){
+ amnt_src_index = 0;
+ }
+ else{
+ count_inst->Unknown = 1;
+ return;
+ }
+ if(src_reg_is_immediate(&inst->U.I.SrcReg[amnt_src_index],
+ count_inst->C)){
+ amount = get_constant_value(count_inst->C,
+ &inst->U.I.SrcReg[amnt_src_index], 0);
+ }
+ else{
+ count_inst->Unknown = 1 ;
+ return;
+ }
+ switch(inst->U.I.Opcode){
+ case RC_OPCODE_ADD:
+ count_inst->Amount += amount;
+ break;
+ case RC_OPCODE_SUB:
+ if(amnt_src_index == 0){
+ count_inst->Unknown = 0;
+ return;
+ }
+ count_inst->Amount -= amount;
+ break;
+ default:
+ count_inst->Unknown = 1;
+ return;
+ }
+
+}
+
+static int transform_const_loop(struct emulate_loop_state * s,
+ struct loop_info * loop,
+ struct rc_instruction * cond)
+{
+ int end_loops = 1;
+ int iterations;
+ struct count_inst count_inst;
+ float limit_value;
+ struct rc_src_register * counter;
+ struct rc_src_register * limit;
+ struct const_value counter_value;
+ struct rc_instruction * inst;
+
+ /* Find the counter and the upper limit */
+
+ if(src_reg_is_immediate(&cond->U.I.SrcReg[0], s->C)){
+ limit = &cond->U.I.SrcReg[0];
+ counter = &cond->U.I.SrcReg[1];
+ }
+ else if(src_reg_is_immediate(&cond->U.I.SrcReg[1], s->C)){
+ limit = &cond->U.I.SrcReg[1];
+ counter = &cond->U.I.SrcReg[0];
+ }
+ else{
+ DBG("No constant limit.\n");
+ return 0;
+ }
+
+ /* Find the initial value of the counter */
+ counter_value.Src = counter;
+ counter_value.Value = 0.0f;
+ counter_value.HasValue = 0;
+ counter_value.C = s->C;
+ for(inst = s->C->Program.Instructions.Next; inst != loop->BeginLoop;
+ inst = inst->Next){
+ rc_for_all_writes_mask(inst, update_const_value, &counter_value);
+ }
+ if(!counter_value.HasValue){
+ DBG("Initial counter value cannot be determined.\n");
+ return 0;
+ }
+ DBG("Initial counter value is %f\n", counter_value.Value);
+ /* Determine how the counter is modified each loop */
+ count_inst.C = s->C;
+ count_inst.Index = counter->Index;
+ count_inst.Swz = counter->Swizzle;
+ count_inst.Amount = 0.0f;
+ count_inst.Unknown = 0;
+ for(inst = loop->BeginLoop->Next; end_loops > 0; inst = inst->Next){
+ switch(inst->U.I.Opcode){
+ /* XXX In the future we might want to try to unroll nested
+ * loops here.*/
+ case RC_OPCODE_BGNLOOP:
+ end_loops++;
+ break;
+ case RC_OPCODE_ENDLOOP:
+ loop->EndLoop = inst;
+ end_loops--;
+ break;
+ /* XXX Check if the counter is modified within an if statement.
+ */
+ case RC_OPCODE_IF:
+ break;
+ default:
+ rc_for_all_writes_mask(inst, get_incr_amount, &count_inst);
+ if(count_inst.Unknown){
+ return 0;
+ }
+ break;
+ }
+ }
+ /* Infinite loop */
+ if(count_inst.Amount == 0.0f){
+ return 0;
+ }
+ DBG("Counter is increased by %f each iteration.\n", count_inst.Amount);
+ /* Calculate the number of iterations of this loop. Keeping this
+ * simple, since we only support increment and decrement loops.
+ */
+ limit_value = get_constant_value(s->C, limit, 0);
+ iterations = (int) ((limit_value - counter_value.Value) /
+ count_inst.Amount);
+
+ DBG("Loop will have %d iterations.\n", iterations);
+
+ /* Prepare loop for unrolling */
+ rc_remove_instruction(loop->Cond);
+ rc_remove_instruction(loop->If);
+ rc_remove_instruction(loop->Brk);
+ rc_remove_instruction(loop->EndIf);
+
+ loop_unroll(s, loop, iterations);
+ loop->EndLoop = NULL;
+ return 1;
+}
+
+/**
+ * This function prepares a loop to be unrolled by converting it into an if
+ * statement. Here is an outline of the conversion process:
+ * BGNLOOP; -> BGNLOOP;
+ * <Additional conditional code> -> <Additional conditional code>
+ * SGE/SLT temp[0], temp[1], temp[2]; -> SLT/SGE temp[0], temp[1], temp[2];
+ * IF temp[0]; -> IF temp[0];
+ * BRK; ->
+ * ENDIF; -> <Loop Body>
+ * <Loop Body> -> ENDIF;
+ * ENDLOOP; -> ENDLOOP
+ *
+ * @param inst A pointer to a BGNLOOP instruction.
+ * @return If the loop can be unrolled, a pointer to the first instruction of
+ * the unrolled loop.
+ * Otherwise, A pointer to the ENDLOOP instruction.
+ * Null if there is an error.
+ */
+static struct rc_instruction * transform_loop(struct emulate_loop_state * s,
+ struct rc_instruction * inst)
+{
+ struct loop_info *loop;
+ struct rc_instruction * ptr;
+
+ memory_pool_array_reserve(&s->C->Pool, struct loop_info,
+ s->Loops, s->LoopCount, s->LoopReserved, 1);
+
+ loop = &s->Loops[s->LoopCount++];
+ memset(loop, 0, sizeof(struct loop_info));
+ if(inst->U.I.Opcode != RC_OPCODE_BGNLOOP){
+ rc_error(s->C, "expected BGNLOOP\n", __FUNCTION__);
+ return NULL;
+ }
+ loop->BeginLoop = inst;
+
+ for(ptr = loop->BeginLoop->Next; !loop->EndLoop; ptr = ptr->Next){
+ switch(ptr->U.I.Opcode){
+ case RC_OPCODE_BGNLOOP:
+ /* Nested loop */
+ ptr = transform_loop(s, ptr);
+ if(!ptr){
+ return NULL;
+ }
+ break;
+ case RC_OPCODE_BRK:
+ loop->Brk = ptr;
+ if(ptr->Next->U.I.Opcode != RC_OPCODE_ENDIF){
+ rc_error(s->C,
+ "%s: expected ENDIF\n",__FUNCTION__);
+ return NULL;
+ }
+ loop->EndIf = ptr->Next;
+ if(ptr->Prev->U.I.Opcode != RC_OPCODE_IF){
+ rc_error(s->C,
+ "%s: expected IF\n", __FUNCTION__);
+ return NULL;
+ }
+ loop->If = ptr->Prev;
+ switch(loop->If->Prev->U.I.Opcode){
+ case RC_OPCODE_SLT:
+ case RC_OPCODE_SGE:
+ case RC_OPCODE_SGT:
+ case RC_OPCODE_SLE:
+ case RC_OPCODE_SEQ:
+ case RC_OPCODE_SNE:
+ break;
+ default:
+ rc_error(s->C, "%s expected conditional\n",
+ __FUNCTION__);
+ return NULL;
+ }
+ loop->Cond = loop->If->Prev;
+ ptr = loop->EndIf;
+ break;
+ case RC_OPCODE_ENDLOOP:
+ loop->EndLoop = ptr;
+ break;
+ }
+ }
+ /* Reverse the conditional instruction */
+ switch(loop->Cond->U.I.Opcode){
+ case RC_OPCODE_SGE:
+ loop->Cond->U.I.Opcode = RC_OPCODE_SLT;
+ break;
+ case RC_OPCODE_SLT:
+ loop->Cond->U.I.Opcode = RC_OPCODE_SGE;
+ break;
+ case RC_OPCODE_SLE:
+ loop->Cond->U.I.Opcode = RC_OPCODE_SGT;
+ break;
+ case RC_OPCODE_SGT:
+ loop->Cond->U.I.Opcode = RC_OPCODE_SLE;
+ break;
+ case RC_OPCODE_SEQ:
+ loop->Cond->U.I.Opcode = RC_OPCODE_SNE;
+ break;
+ case RC_OPCODE_SNE:
+ loop->Cond->U.I.Opcode = RC_OPCODE_SEQ;
+ break;
+ default:
+ rc_error(s->C, "loop->Cond is not a conditional.\n");
+ return NULL;
+ }
+
+ /* Check if the number of loops is known at compile time. */
+ if(transform_const_loop(s, loop, ptr)){
+ return loop->BeginLoop->Next;
+ }
+
+ /* Prepare the loop to be unrolled */
+ rc_remove_instruction(loop->Brk);
+ rc_remove_instruction(loop->EndIf);
+ rc_insert_instruction(loop->EndLoop->Prev, loop->EndIf);
+ return loop->EndLoop;
+}
+
+static void rc_transform_loops(struct emulate_loop_state * s)
+{
+ struct rc_instruction * ptr = s->C->Program.Instructions.Next;
+ while(ptr != &s->C->Program.Instructions) {
+ if(ptr->Type == RC_INSTRUCTION_NORMAL &&
+ ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){
+ ptr = transform_loop(s, ptr);
+ if(!ptr){
+ return;
+ }
+ }
+ ptr = ptr->Next;
+ }
+}
+
+static void rc_unroll_loops(struct emulate_loop_state *s,
+ unsigned int max_instructions)
+{
+ int i;
+ /* Iterate backwards of the list of loops so that loops that nested
+ * loops are unrolled first.
+ */
+ for( i = s->LoopCount - 1; i >= 0; i-- ){
+ if(!s->Loops[i].EndLoop){
+ continue;
+ }
+ unsigned int iterations = loop_calc_iterations(&s->Loops[i],
+ s->LoopCount, max_instructions);
+ loop_unroll(s, &s->Loops[i], iterations);
+ }
+}
+
+void rc_emulate_loops(struct radeon_compiler *c, unsigned int max_instructions)
+{
+ struct emulate_loop_state s;
+
+ memset(&s, 0, sizeof(struct emulate_loop_state));
+ s.C = c;
+
+ /* We may need to move these two operations to r3xx_(vert|frag)prog.c
+ * and run the optimization passes between them in order to increase
+ * the number of unrolls we can do for each loop.
+ */
+ rc_transform_loops(&s);
+
+ rc_unroll_loops(&s, max_instructions);
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h
new file mode 100644
index 0000000000..ddcf1c0fab
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h
@@ -0,0 +1,12 @@
+
+
+#ifndef RADEON_EMULATE_LOOPS_H
+#define RADEON_EMULATE_LOOPS_H
+
+#define MAX_ITERATIONS 8
+
+struct radeon_compiler;
+
+void rc_emulate_loops(struct radeon_compiler *c, unsigned int max_instructions);
+
+#endif /* RADEON_EMULATE_LOOPS_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
new file mode 100644
index 0000000000..1dc16855dc
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
@@ -0,0 +1,472 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_opcodes.h"
+#include "radeon_program.h"
+
+#include "radeon_program_constants.h"
+
+struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
+ {
+ .Opcode = RC_OPCODE_NOP,
+ .Name = "NOP"
+ },
+ {
+ .Opcode = RC_OPCODE_ILLEGAL_OPCODE,
+ .Name = "ILLEGAL OPCODE"
+ },
+ {
+ .Opcode = RC_OPCODE_ABS,
+ .Name = "ABS",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_ADD,
+ .Name = "ADD",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_ARL,
+ .Name = "ARL",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_CEIL,
+ .Name = "CEIL",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_CMP,
+ .Name = "CMP",
+ .NumSrcRegs = 3,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_COS,
+ .Name = "COS",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsStandardScalar = 1
+ },
+ {
+ .Opcode = RC_OPCODE_DDX,
+ .Name = "DDX",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_DDY,
+ .Name = "DDY",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_DP3,
+ .Name = "DP3",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_DP4,
+ .Name = "DP4",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_DPH,
+ .Name = "DPH",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_DST,
+ .Name = "DST",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_EX2,
+ .Name = "EX2",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsStandardScalar = 1
+ },
+ {
+ .Opcode = RC_OPCODE_EXP,
+ .Name = "EXP",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_FLR,
+ .Name = "FLR",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_FRC,
+ .Name = "FRC",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_KIL,
+ .Name = "KIL",
+ .NumSrcRegs = 1
+ },
+ {
+ .Opcode = RC_OPCODE_LG2,
+ .Name = "LG2",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsStandardScalar = 1
+ },
+ {
+ .Opcode = RC_OPCODE_LIT,
+ .Name = "LIT",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_LOG,
+ .Name = "LOG",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_LRP,
+ .Name = "LRP",
+ .NumSrcRegs = 3,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_MAD,
+ .Name = "MAD",
+ .NumSrcRegs = 3,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_MAX,
+ .Name = "MAX",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_MIN,
+ .Name = "MIN",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_MOV,
+ .Name = "MOV",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_MUL,
+ .Name = "MUL",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_POW,
+ .Name = "POW",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsStandardScalar = 1
+ },
+ {
+ .Opcode = RC_OPCODE_RCP,
+ .Name = "RCP",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsStandardScalar = 1
+ },
+ {
+ .Opcode = RC_OPCODE_RSQ,
+ .Name = "RSQ",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsStandardScalar = 1
+ },
+ {
+ .Opcode = RC_OPCODE_SCS,
+ .Name = "SCS",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_SEQ,
+ .Name = "SEQ",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_SFL,
+ .Name = "SFL",
+ .NumSrcRegs = 0,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_SGE,
+ .Name = "SGE",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_SGT,
+ .Name = "SGT",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_SIN,
+ .Name = "SIN",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsStandardScalar = 1
+ },
+ {
+ .Opcode = RC_OPCODE_SLE,
+ .Name = "SLE",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_SLT,
+ .Name = "SLT",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_SNE,
+ .Name = "SNE",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_SUB,
+ .Name = "SUB",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_SWZ,
+ .Name = "SWZ",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_XPD,
+ .Name = "XPD",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_TEX,
+ .Name = "TEX",
+ .HasTexture = 1,
+ .NumSrcRegs = 1,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_TXB,
+ .Name = "TXB",
+ .HasTexture = 1,
+ .NumSrcRegs = 1,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_TXD,
+ .Name = "TXD",
+ .HasTexture = 1,
+ .NumSrcRegs = 3,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_TXL,
+ .Name = "TXL",
+ .HasTexture = 1,
+ .NumSrcRegs = 1,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_TXP,
+ .Name = "TXP",
+ .HasTexture = 1,
+ .NumSrcRegs = 1,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_IF,
+ .Name = "IF",
+ .IsFlowControl = 1,
+ .NumSrcRegs = 1
+ },
+ {
+ .Opcode = RC_OPCODE_ELSE,
+ .Name = "ELSE",
+ .IsFlowControl = 1,
+ .NumSrcRegs = 0
+ },
+ {
+ .Opcode = RC_OPCODE_ENDIF,
+ .Name = "ENDIF",
+ .IsFlowControl = 1,
+ .NumSrcRegs = 0
+ },
+ {
+ .Opcode = RC_OPCODE_BGNLOOP,
+ .Name = "BGNLOOP",
+ .IsFlowControl = 1,
+ .NumSrcRegs = 0
+ },
+ {
+ .Opcode = RC_OPCODE_BRK,
+ .Name = "BRK",
+ .IsFlowControl = 1,
+ .NumSrcRegs = 0
+ },
+ {
+ .Opcode = RC_OPCODE_ENDLOOP,
+ .Name = "ENDLOOP",
+ .IsFlowControl = 1,
+ .NumSrcRegs = 0,
+ },
+ {
+ .Opcode = RC_OPCODE_REPL_ALPHA,
+ .Name = "REPL_ALPHA",
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_BEGIN_TEX,
+ .Name = "BEGIN_TEX"
+ }
+};
+
+void rc_compute_sources_for_writemask(
+ const struct rc_instruction *inst,
+ unsigned int writemask,
+ unsigned int *srcmasks)
+{
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ srcmasks[0] = 0;
+ srcmasks[1] = 0;
+ srcmasks[2] = 0;
+
+ if (opcode->Opcode == RC_OPCODE_KIL)
+ srcmasks[0] |= RC_MASK_XYZW;
+ else if (opcode->Opcode == RC_OPCODE_IF)
+ srcmasks[0] |= RC_MASK_X;
+
+ if (!writemask)
+ return;
+
+ if (opcode->IsComponentwise) {
+ for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src)
+ srcmasks[src] |= writemask;
+ } else if (opcode->IsStandardScalar) {
+ for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src)
+ srcmasks[src] |= RC_MASK_X;
+ } else {
+ switch(opcode->Opcode) {
+ case RC_OPCODE_ARL:
+ srcmasks[0] |= RC_MASK_X;
+ break;
+ case RC_OPCODE_DP3:
+ srcmasks[0] |= RC_MASK_XYZ;
+ srcmasks[1] |= RC_MASK_XYZ;
+ break;
+ case RC_OPCODE_DP4:
+ srcmasks[0] |= RC_MASK_XYZW;
+ srcmasks[1] |= RC_MASK_XYZW;
+ break;
+ case RC_OPCODE_TXB:
+ case RC_OPCODE_TXP:
+ srcmasks[0] |= RC_MASK_W;
+ /* Fall through */
+ case RC_OPCODE_TEX:
+ switch (inst->U.I.TexSrcTarget) {
+ case RC_TEXTURE_1D:
+ srcmasks[0] |= RC_MASK_X;
+ break;
+ case RC_TEXTURE_2D:
+ case RC_TEXTURE_RECT:
+ case RC_TEXTURE_1D_ARRAY:
+ srcmasks[0] |= RC_MASK_XY;
+ break;
+ case RC_TEXTURE_3D:
+ case RC_TEXTURE_CUBE:
+ case RC_TEXTURE_2D_ARRAY:
+ srcmasks[0] |= RC_MASK_XYZ;
+ break;
+ }
+ break;
+ case RC_OPCODE_DST:
+ srcmasks[0] |= RC_MASK_Y | RC_MASK_Z;
+ srcmasks[1] |= RC_MASK_Y | RC_MASK_W;
+ break;
+ case RC_OPCODE_EXP:
+ case RC_OPCODE_LOG:
+ srcmasks[0] |= RC_MASK_XY;
+ break;
+ case RC_OPCODE_LIT:
+ srcmasks[0] |= RC_MASK_X | RC_MASK_Y | RC_MASK_W;
+ break;
+ default:
+ break;
+ }
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
new file mode 100644
index 0000000000..91c82ac089
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
@@ -0,0 +1,246 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_OPCODES_H
+#define RADEON_OPCODES_H
+
+#include <assert.h>
+
+/**
+ * Opcodes understood by the Radeon compiler.
+ */
+typedef enum {
+ RC_OPCODE_NOP = 0,
+ RC_OPCODE_ILLEGAL_OPCODE,
+
+ /** vec4 instruction: dst.c = abs(src0.c); */
+ RC_OPCODE_ABS,
+
+ /** vec4 instruction: dst.c = src0.c + src1.c; */
+ RC_OPCODE_ADD,
+
+ /** special instruction: load address register
+ * dst.x = floor(src.x), where dst must be an address register */
+ RC_OPCODE_ARL,
+
+ /** vec4 instruction: dst.c = ceil(src0.c) */
+ RC_OPCODE_CEIL,
+
+ /** vec4 instruction: dst.c = src0.c < 0.0 ? src1.c : src2.c */
+ RC_OPCODE_CMP,
+
+ /** scalar instruction: dst = cos(src0.x) */
+ RC_OPCODE_COS,
+
+ /** special instruction: take vec4 partial derivative in X direction
+ * dst.c = d src0.c / dx */
+ RC_OPCODE_DDX,
+
+ /** special instruction: take vec4 partial derivative in Y direction
+ * dst.c = d src0.c / dy */
+ RC_OPCODE_DDY,
+
+ /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z */
+ RC_OPCODE_DP3,
+
+ /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z + src0.w*src1.w */
+ RC_OPCODE_DP4,
+
+ /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z + src1.w */
+ RC_OPCODE_DPH,
+
+ /** special instruction, see ARB_fragment_program */
+ RC_OPCODE_DST,
+
+ /** scalar instruction: dst = 2**src0.x */
+ RC_OPCODE_EX2,
+
+ /** special instruction, see ARB_vertex_program */
+ RC_OPCODE_EXP,
+
+ /** vec4 instruction: dst.c = floor(src0.c) */
+ RC_OPCODE_FLR,
+
+ /** vec4 instruction: dst.c = src0.c - floor(src0.c) */
+ RC_OPCODE_FRC,
+
+ /** special instruction: stop execution if any component of src0 is negative */
+ RC_OPCODE_KIL,
+
+ /** scalar instruction: dst = log_2(src0.x) */
+ RC_OPCODE_LG2,
+
+ /** special instruction, see ARB_vertex_program */
+ RC_OPCODE_LIT,
+
+ /** special instruction, see ARB_vertex_program */
+ RC_OPCODE_LOG,
+
+ /** vec4 instruction: dst.c = src0.c*src1.c + (1 - src0.c)*src2.c */
+ RC_OPCODE_LRP,
+
+ /** vec4 instruction: dst.c = src0.c*src1.c + src2.c */
+ RC_OPCODE_MAD,
+
+ /** vec4 instruction: dst.c = max(src0.c, src1.c) */
+ RC_OPCODE_MAX,
+
+ /** vec4 instruction: dst.c = min(src0.c, src1.c) */
+ RC_OPCODE_MIN,
+
+ /** vec4 instruction: dst.c = src0.c */
+ RC_OPCODE_MOV,
+
+ /** vec4 instruction: dst.c = src0.c*src1.c */
+ RC_OPCODE_MUL,
+
+ /** scalar instruction: dst = src0.x ** src1.x */
+ RC_OPCODE_POW,
+
+ /** scalar instruction: dst = 1 / src0.x */
+ RC_OPCODE_RCP,
+
+ /** scalar instruction: dst = 1 / sqrt(src0.x) */
+ RC_OPCODE_RSQ,
+
+ /** special instruction, see ARB_fragment_program */
+ RC_OPCODE_SCS,
+
+ /** vec4 instruction: dst.c = (src0.c == src1.c) ? 1.0 : 0.0 */
+ RC_OPCODE_SEQ,
+
+ /** vec4 instruction: dst.c = 0.0 */
+ RC_OPCODE_SFL,
+
+ /** vec4 instruction: dst.c = (src0.c >= src1.c) ? 1.0 : 0.0 */
+ RC_OPCODE_SGE,
+
+ /** vec4 instruction: dst.c = (src0.c > src1.c) ? 1.0 : 0.0 */
+ RC_OPCODE_SGT,
+
+ /** scalar instruction: dst = sin(src0.x) */
+ RC_OPCODE_SIN,
+
+ /** vec4 instruction: dst.c = (src0.c <= src1.c) ? 1.0 : 0.0 */
+ RC_OPCODE_SLE,
+
+ /** vec4 instruction: dst.c = (src0.c < src1.c) ? 1.0 : 0.0 */
+ RC_OPCODE_SLT,
+
+ /** vec4 instruction: dst.c = (src0.c != src1.c) ? 1.0 : 0.0 */
+ RC_OPCODE_SNE,
+
+ /** vec4 instruction: dst.c = src0.c - src1.c */
+ RC_OPCODE_SUB,
+
+ /** vec4 instruction: dst.c = src0.c */
+ RC_OPCODE_SWZ,
+
+ /** special instruction, see ARB_fragment_program */
+ RC_OPCODE_XPD,
+
+ RC_OPCODE_TEX,
+ RC_OPCODE_TXB,
+ RC_OPCODE_TXD,
+ RC_OPCODE_TXL,
+ RC_OPCODE_TXP,
+
+ /** branch instruction:
+ * If src0.x != 0.0, continue with the next instruction;
+ * otherwise, jump to matching RC_OPCODE_ELSE or RC_OPCODE_ENDIF.
+ */
+ RC_OPCODE_IF,
+
+ /** branch instruction: jump to matching RC_OPCODE_ENDIF */
+ RC_OPCODE_ELSE,
+
+ /** branch instruction: has no effect */
+ RC_OPCODE_ENDIF,
+
+ RC_OPCODE_BGNLOOP,
+
+ RC_OPCODE_BRK,
+
+ RC_OPCODE_ENDLOOP,
+
+ /** special instruction, used in R300-R500 fragment program pair instructions
+ * indicates that the result of the alpha operation shall be replicated
+ * across all other channels */
+ RC_OPCODE_REPL_ALPHA,
+
+ /** special instruction, used in R300-R500 fragment programs
+ * to indicate the start of a block of texture instructions that
+ * can run simultaneously. */
+ RC_OPCODE_BEGIN_TEX,
+
+ MAX_RC_OPCODE
+} rc_opcode;
+
+
+struct rc_opcode_info {
+ rc_opcode Opcode;
+ const char * Name;
+
+ /** true if the instruction reads from a texture.
+ *
+ * \note This is false for the KIL instruction, even though KIL is
+ * a texture instruction from a hardware point of view. */
+ unsigned int HasTexture:1;
+
+ unsigned int NumSrcRegs:2;
+ unsigned int HasDstReg:1;
+
+ /** true if this instruction affects control flow */
+ unsigned int IsFlowControl:1;
+
+ /** true if this is a vector instruction that operates on components in parallel
+ * without any cross-component interaction */
+ unsigned int IsComponentwise:1;
+
+ /** true if this instruction sources only its operands X components
+ * to compute one result which is smeared across all output channels */
+ unsigned int IsStandardScalar:1;
+};
+
+extern struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE];
+
+static inline const struct rc_opcode_info * rc_get_opcode_info(rc_opcode opcode)
+{
+ assert((unsigned int)opcode < MAX_RC_OPCODE);
+ assert(rc_opcodes[opcode].Opcode == opcode);
+
+ return &rc_opcodes[opcode];
+}
+
+struct rc_instruction;
+
+void rc_compute_sources_for_writemask(
+ const struct rc_instruction *inst,
+ unsigned int writemask,
+ unsigned int *srcmasks);
+
+#endif /* RADEON_OPCODES_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
new file mode 100644
index 0000000000..21d7210888
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
@@ -0,0 +1,446 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_dataflow.h"
+
+#include "radeon_compiler.h"
+#include "radeon_swizzle.h"
+
+
+static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
+{
+ struct rc_src_register combine;
+ combine.File = inner.File;
+ combine.Index = inner.Index;
+ combine.RelAddr = inner.RelAddr;
+ if (outer.Abs) {
+ combine.Abs = 1;
+ combine.Negate = outer.Negate;
+ } else {
+ combine.Abs = inner.Abs;
+ combine.Negate = 0;
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ unsigned int swz = GET_SWZ(outer.Swizzle, chan);
+ if (swz < 4)
+ combine.Negate |= GET_BIT(inner.Negate, swz) << chan;
+ }
+ combine.Negate ^= outer.Negate;
+ }
+ combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle);
+ return combine;
+}
+
+struct peephole_state {
+ struct radeon_compiler * C;
+ struct rc_instruction * Mov;
+ unsigned int Conflict:1;
+
+ /** Whether Mov's source has been clobbered */
+ unsigned int SourceClobbered:1;
+
+ /** Which components of Mov's destination register are still from that Mov? */
+ unsigned int MovMask:4;
+
+ /** Which components of Mov's destination register are clearly *not* from that Mov */
+ unsigned int DefinedMask:4;
+
+ /** Which components of Mov's source register are sourced */
+ unsigned int SourcedMask:4;
+
+ /** Branch depth beyond Mov; negative value indicates we left the Mov's block */
+ int BranchDepth;
+};
+
+static void peephole_scan_read(void * data, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int mask)
+{
+ struct peephole_state * s = data;
+
+ if (file != RC_FILE_TEMPORARY || index != s->Mov->U.I.DstReg.Index)
+ return;
+
+ /* These instructions cannot read from the constants file.
+ * see radeonTransformTEX()
+ */
+ if(s->Mov->U.I.SrcReg[0].File != RC_FILE_TEMPORARY &&
+ s->Mov->U.I.SrcReg[0].File != RC_FILE_INPUT &&
+ (inst->U.I.Opcode == RC_OPCODE_TEX ||
+ inst->U.I.Opcode == RC_OPCODE_TXB ||
+ inst->U.I.Opcode == RC_OPCODE_TXP ||
+ inst->U.I.Opcode == RC_OPCODE_KIL)){
+ s->Conflict = 1;
+ return;
+ }
+ if ((mask & s->MovMask) == mask) {
+ if (s->SourceClobbered) {
+ s->Conflict = 1;
+ }
+ } else if ((mask & s->DefinedMask) == mask) {
+ /* read from something entirely written by other instruction: this is okay */
+ } else {
+ /* read from component combination that is not well-defined without
+ * the MOV: cannot remove it */
+ s->Conflict = 1;
+ }
+}
+
+static void peephole_scan_write(void * data, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int mask)
+{
+ struct peephole_state * s = data;
+
+ if (s->BranchDepth < 0)
+ return;
+
+ if (file == s->Mov->U.I.DstReg.File && index == s->Mov->U.I.DstReg.Index) {
+ s->MovMask &= ~mask;
+ if (s->BranchDepth == 0)
+ s->DefinedMask |= mask;
+ else
+ s->DefinedMask &= ~mask;
+ }
+ if (file == s->Mov->U.I.SrcReg[0].File && index == s->Mov->U.I.SrcReg[0].Index) {
+ if (mask & s->SourcedMask)
+ s->SourceClobbered = 1;
+ } else if (s->Mov->U.I.SrcReg[0].RelAddr && file == RC_FILE_ADDRESS) {
+ s->SourceClobbered = 1;
+ }
+}
+
+static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mov)
+{
+ struct peephole_state s;
+
+ if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY || inst_mov->U.I.WriteALUResult)
+ return;
+
+ memset(&s, 0, sizeof(s));
+ s.C = c;
+ s.Mov = inst_mov;
+ s.MovMask = inst_mov->U.I.DstReg.WriteMask;
+ s.DefinedMask = RC_MASK_XYZW & ~s.MovMask;
+
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ unsigned int swz = GET_SWZ(inst_mov->U.I.SrcReg[0].Swizzle, chan);
+ s.SourcedMask |= (1 << swz) & RC_MASK_XYZW;
+ }
+
+ /* 1st pass: Check whether all subsequent readers can be changed */
+ for(struct rc_instruction * inst = inst_mov->Next;
+ inst != &c->Program.Instructions;
+ inst = inst->Next) {
+ rc_for_all_reads_mask(inst, peephole_scan_read, &s);
+ rc_for_all_writes_mask(inst, peephole_scan_write, &s);
+ if (s.Conflict)
+ return;
+
+ if (s.BranchDepth >= 0) {
+ if (inst->U.I.Opcode == RC_OPCODE_IF) {
+ s.BranchDepth++;
+ } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) {
+ s.BranchDepth--;
+ if (s.BranchDepth < 0) {
+ s.DefinedMask &= ~s.MovMask;
+ s.MovMask = 0;
+ }
+ }
+ }
+ }
+
+ if (s.Conflict)
+ return;
+
+ /* 2nd pass: We can satisfy all readers, so switch them over all at once */
+ s.MovMask = inst_mov->U.I.DstReg.WriteMask;
+ s.BranchDepth = 0;
+
+ for(struct rc_instruction * inst = inst_mov->Next;
+ inst != &c->Program.Instructions;
+ inst = inst->Next) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
+ if (inst->U.I.SrcReg[src].File == RC_FILE_TEMPORARY &&
+ inst->U.I.SrcReg[src].Index == s.Mov->U.I.DstReg.Index) {
+ unsigned int refmask = 0;
+
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ unsigned int swz = GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
+ refmask |= (1 << swz) & RC_MASK_XYZW;
+ }
+
+ if ((refmask & s.MovMask) == refmask)
+ inst->U.I.SrcReg[src] = chain_srcregs(inst->U.I.SrcReg[src], s.Mov->U.I.SrcReg[0]);
+ }
+ }
+
+ if (opcode->HasDstReg) {
+ if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY &&
+ inst->U.I.DstReg.Index == s.Mov->U.I.DstReg.Index) {
+ s.MovMask &= ~inst->U.I.DstReg.WriteMask;
+ }
+ }
+
+ if (s.BranchDepth >= 0) {
+ if (inst->U.I.Opcode == RC_OPCODE_IF) {
+ s.BranchDepth++;
+ } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) {
+ s.BranchDepth--;
+ if (s.BranchDepth < 0)
+ break; /* no more readers after this point */
+ }
+ }
+ }
+
+ /* Finally, remove the original MOV instruction */
+ rc_remove_instruction(inst_mov);
+}
+
+/**
+ * Check if a source register is actually always the same
+ * swizzle constant.
+ */
+static int is_src_uniform_constant(struct rc_src_register src,
+ rc_swizzle * pswz, unsigned int * pnegate)
+{
+ int have_used = 0;
+
+ if (src.File != RC_FILE_NONE) {
+ *pswz = 0;
+ return 0;
+ }
+
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ unsigned int swz = GET_SWZ(src.Swizzle, chan);
+ if (swz < 4) {
+ *pswz = 0;
+ return 0;
+ }
+ if (swz == RC_SWIZZLE_UNUSED)
+ continue;
+
+ if (!have_used) {
+ *pswz = swz;
+ *pnegate = GET_BIT(src.Negate, chan);
+ have_used = 1;
+ } else {
+ if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) {
+ *pswz = 0;
+ return 0;
+ }
+ }
+ }
+
+ return 1;
+}
+
+
+static void constant_folding_mad(struct rc_instruction * inst)
+{
+ rc_swizzle swz;
+ unsigned int negate;
+
+ if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) {
+ if (swz == RC_SWIZZLE_ZERO) {
+ inst->U.I.Opcode = RC_OPCODE_MUL;
+ return;
+ }
+ }
+
+ if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
+ if (swz == RC_SWIZZLE_ONE) {
+ inst->U.I.Opcode = RC_OPCODE_ADD;
+ if (negate)
+ inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
+ inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2];
+ return;
+ } else if (swz == RC_SWIZZLE_ZERO) {
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
+ return;
+ }
+ }
+
+ if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
+ if (swz == RC_SWIZZLE_ONE) {
+ inst->U.I.Opcode = RC_OPCODE_ADD;
+ if (negate)
+ inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
+ inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
+ return;
+ } else if (swz == RC_SWIZZLE_ZERO) {
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
+ return;
+ }
+ }
+}
+
+static void constant_folding_mul(struct rc_instruction * inst)
+{
+ rc_swizzle swz;
+ unsigned int negate;
+
+ if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
+ if (swz == RC_SWIZZLE_ONE) {
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
+ if (negate)
+ inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
+ return;
+ } else if (swz == RC_SWIZZLE_ZERO) {
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
+ return;
+ }
+ }
+
+ if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
+ if (swz == RC_SWIZZLE_ONE) {
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ if (negate)
+ inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
+ return;
+ } else if (swz == RC_SWIZZLE_ZERO) {
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
+ return;
+ }
+ }
+}
+
+static void constant_folding_add(struct rc_instruction * inst)
+{
+ rc_swizzle swz;
+ unsigned int negate;
+
+ if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
+ if (swz == RC_SWIZZLE_ZERO) {
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
+ return;
+ }
+ }
+
+ if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
+ if (swz == RC_SWIZZLE_ZERO) {
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ return;
+ }
+ }
+}
+
+
+/**
+ * Replace 0.0, 1.0 and 0.5 immediate constants by their
+ * respective swizzles. Simplify instructions like ADD dst, src, 0;
+ */
+static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst)
+{
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
+ for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
+ if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT ||
+ inst->U.I.SrcReg[src].RelAddr ||
+ inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count)
+ continue;
+
+ struct rc_constant * constant =
+ &c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index];
+
+ if (constant->Type != RC_CONSTANT_IMMEDIATE)
+ continue;
+
+ struct rc_src_register newsrc = inst->U.I.SrcReg[src];
+ int have_real_reference = 0;
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ unsigned int swz = GET_SWZ(newsrc.Swizzle, chan);
+ if (swz >= 4)
+ continue;
+
+ unsigned int newswz;
+ float imm = constant->u.Immediate[swz];
+ float baseimm = imm;
+ if (imm < 0.0)
+ baseimm = -baseimm;
+
+ if (baseimm == 0.0) {
+ newswz = RC_SWIZZLE_ZERO;
+ } else if (baseimm == 1.0) {
+ newswz = RC_SWIZZLE_ONE;
+ } else if (baseimm == 0.5) {
+ newswz = RC_SWIZZLE_HALF;
+ } else {
+ have_real_reference = 1;
+ continue;
+ }
+
+ SET_SWZ(newsrc.Swizzle, chan, newswz);
+ if (imm < 0.0 && !newsrc.Abs)
+ newsrc.Negate ^= 1 << chan;
+ }
+
+ if (!have_real_reference) {
+ newsrc.File = RC_FILE_NONE;
+ newsrc.Index = 0;
+ }
+
+ /* don't make the swizzle worse */
+ if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) &&
+ c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src]))
+ continue;
+
+ inst->U.I.SrcReg[src] = newsrc;
+ }
+
+ /* Simplify instructions based on constants */
+ if (inst->U.I.Opcode == RC_OPCODE_MAD)
+ constant_folding_mad(inst);
+
+ /* note: MAD can simplify to MUL or ADD */
+ if (inst->U.I.Opcode == RC_OPCODE_MUL)
+ constant_folding_mul(inst);
+ else if (inst->U.I.Opcode == RC_OPCODE_ADD)
+ constant_folding_add(inst);
+}
+
+void rc_optimize(struct radeon_compiler * c)
+{
+ struct rc_instruction * inst = c->Program.Instructions.Next;
+ while(inst != &c->Program.Instructions) {
+ struct rc_instruction * cur = inst;
+ inst = inst->Next;
+
+ constant_folding(c, cur);
+
+ if (cur->U.I.Opcode == RC_OPCODE_MOV) {
+ peephole(c, cur);
+ /* cur may no longer be part of the program */
+ }
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
new file mode 100644
index 0000000000..8a912da461
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
@@ -0,0 +1,280 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_program_pair.h"
+
+#include <stdio.h>
+
+#include "radeon_compiler.h"
+#include "radeon_dataflow.h"
+
+
+#define VERBOSE 0
+
+#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
+
+
+struct live_intervals {
+ int Start;
+ int End;
+ struct live_intervals * Next;
+};
+
+struct register_info {
+ struct live_intervals Live;
+
+ unsigned int Used:1;
+ unsigned int Allocated:1;
+ unsigned int File:3;
+ unsigned int Index:RC_REGISTER_INDEX_BITS;
+};
+
+struct hardware_register {
+ struct live_intervals * Used;
+};
+
+struct regalloc_state {
+ struct radeon_compiler * C;
+
+ struct register_info Input[RC_REGISTER_MAX_INDEX];
+ struct register_info Temporary[RC_REGISTER_MAX_INDEX];
+
+ struct hardware_register * HwTemporary;
+ unsigned int NumHwTemporaries;
+};
+
+static void print_live_intervals(struct live_intervals * src)
+{
+ if (!src) {
+ DBG("(null)");
+ return;
+ }
+
+ while(src) {
+ DBG("(%i,%i)", src->Start, src->End);
+ src = src->Next;
+ }
+}
+
+static void add_live_intervals(struct regalloc_state * s,
+ struct live_intervals ** dst, struct live_intervals * src)
+{
+ struct live_intervals ** dst_backup = dst;
+
+ if (VERBOSE) {
+ DBG("add_live_intervals: ");
+ print_live_intervals(*dst);
+ DBG(" to ");
+ print_live_intervals(src);
+ DBG("\n");
+ }
+
+ while(src) {
+ if (*dst && (*dst)->End < src->Start) {
+ dst = &(*dst)->Next;
+ } else if (!*dst || (*dst)->Start > src->End) {
+ struct live_intervals * li = memory_pool_malloc(&s->C->Pool, sizeof(*li));
+ li->Start = src->Start;
+ li->End = src->End;
+ li->Next = *dst;
+ *dst = li;
+ src = src->Next;
+ } else {
+ if (src->End > (*dst)->End)
+ (*dst)->End = src->End;
+ if (src->Start < (*dst)->Start)
+ (*dst)->Start = src->Start;
+ src = src->Next;
+ }
+ }
+
+ if (VERBOSE) {
+ DBG(" result: ");
+ print_live_intervals(*dst_backup);
+ DBG("\n");
+ }
+}
+
+static int overlap_live_intervals(struct live_intervals * dst, struct live_intervals * src)
+{
+ if (VERBOSE) {
+ DBG("overlap_live_intervals: ");
+ print_live_intervals(dst);
+ DBG(" to ");
+ print_live_intervals(src);
+ DBG("\n");
+ }
+
+ while(src && dst) {
+ if (dst->End <= src->Start) {
+ dst = dst->Next;
+ } else if (dst->End <= src->End) {
+ DBG(" overlap\n");
+ return 1;
+ } else if (dst->Start < src->End) {
+ DBG(" overlap\n");
+ return 1;
+ } else {
+ src = src->Next;
+ }
+ }
+
+ DBG(" no overlap\n");
+
+ return 0;
+}
+
+static int try_add_live_intervals(struct regalloc_state * s,
+ struct live_intervals ** dst, struct live_intervals * src)
+{
+ if (overlap_live_intervals(*dst, src))
+ return 0;
+
+ add_live_intervals(s, dst, src);
+ return 1;
+}
+
+static void scan_callback(void * data, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int mask)
+{
+ struct regalloc_state * s = data;
+ struct register_info * reg;
+
+ if (file == RC_FILE_TEMPORARY)
+ reg = &s->Temporary[index];
+ else if (file == RC_FILE_INPUT)
+ reg = &s->Input[index];
+ else
+ return;
+
+ if (!reg->Used) {
+ reg->Used = 1;
+ if (file == RC_FILE_INPUT)
+ reg->Live.Start = -1;
+ else
+ reg->Live.Start = inst->IP;
+ reg->Live.End = inst->IP;
+ } else {
+ if (inst->IP > reg->Live.End)
+ reg->Live.End = inst->IP;
+ }
+}
+
+static void compute_live_intervals(struct regalloc_state * s)
+{
+ rc_recompute_ips(s->C);
+
+ for(struct rc_instruction * inst = s->C->Program.Instructions.Next;
+ inst != &s->C->Program.Instructions;
+ inst = inst->Next) {
+ rc_for_all_reads_mask(inst, scan_callback, s);
+ rc_for_all_writes_mask(inst, scan_callback, s);
+ }
+}
+
+static void remap_register(void * data, struct rc_instruction * inst,
+ rc_register_file * file, unsigned int * index)
+{
+ struct regalloc_state * s = data;
+ const struct register_info * reg;
+
+ if (*file == RC_FILE_TEMPORARY)
+ reg = &s->Temporary[*index];
+ else if (*file == RC_FILE_INPUT)
+ reg = &s->Input[*index];
+ else
+ return;
+
+ if (reg->Allocated) {
+ *file = reg->File;
+ *index = reg->Index;
+ }
+}
+
+static void do_regalloc(struct regalloc_state * s)
+{
+ /* Simple and stupid greedy register allocation */
+ for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) {
+ struct register_info * reg = &s->Temporary[index];
+
+ if (!reg->Used)
+ continue;
+
+ for(unsigned int hwreg = 0; hwreg < s->NumHwTemporaries; ++hwreg) {
+ if (try_add_live_intervals(s, &s->HwTemporary[hwreg].Used, &reg->Live)) {
+ reg->Allocated = 1;
+ reg->File = RC_FILE_TEMPORARY;
+ reg->Index = hwreg;
+ goto success;
+ }
+ }
+
+ rc_error(s->C, "Ran out of hardware temporaries\n");
+ return;
+
+ success:;
+ }
+
+ /* Rewrite all instructions based on the translation table we built */
+ for(struct rc_instruction * inst = s->C->Program.Instructions.Next;
+ inst != &s->C->Program.Instructions;
+ inst = inst->Next) {
+ rc_remap_registers(inst, &remap_register, s);
+ }
+}
+
+static void alloc_input(void * data, unsigned int input, unsigned int hwreg)
+{
+ struct regalloc_state * s = data;
+
+ if (!s->Input[input].Used)
+ return;
+
+ add_live_intervals(s, &s->HwTemporary[hwreg].Used, &s->Input[input].Live);
+
+ s->Input[input].Allocated = 1;
+ s->Input[input].File = RC_FILE_TEMPORARY;
+ s->Input[input].Index = hwreg;
+
+}
+
+void rc_pair_regalloc(struct r300_fragment_program_compiler *c, unsigned maxtemps)
+{
+ struct regalloc_state s;
+
+ memset(&s, 0, sizeof(s));
+ s.C = &c->Base;
+ s.NumHwTemporaries = maxtemps;
+ s.HwTemporary = memory_pool_malloc(&s.C->Pool, maxtemps*sizeof(struct hardware_register));
+ memset(s.HwTemporary, 0, maxtemps*sizeof(struct hardware_register));
+
+ compute_live_intervals(&s);
+
+ c->AllocateHwInputs(c, &alloc_input, &s);
+
+ do_regalloc(&s);
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
new file mode 100644
index 0000000000..a279549ff8
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
@@ -0,0 +1,501 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_program_pair.h"
+
+#include <stdio.h>
+
+#include "radeon_compiler.h"
+#include "radeon_dataflow.h"
+
+
+#define VERBOSE 0
+
+#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
+
+struct schedule_instruction {
+ struct rc_instruction * Instruction;
+
+ /** Next instruction in the linked list of ready instructions. */
+ struct schedule_instruction *NextReady;
+
+ /** Values that this instruction reads and writes */
+ struct reg_value * WriteValues[4];
+ struct reg_value * ReadValues[12];
+ unsigned int NumWriteValues:3;
+ unsigned int NumReadValues:4;
+
+ /**
+ * Number of (read and write) dependencies that must be resolved before
+ * this instruction can be scheduled.
+ */
+ unsigned int NumDependencies:5;
+};
+
+
+/**
+ * Used to keep track of which instructions read a value.
+ */
+struct reg_value_reader {
+ struct schedule_instruction *Reader;
+ struct reg_value_reader *Next;
+};
+
+/**
+ * Used to keep track which values are stored in each component of a
+ * RC_FILE_TEMPORARY.
+ */
+struct reg_value {
+ struct schedule_instruction * Writer;
+
+ /**
+ * Unordered linked list of instructions that read from this value.
+ * When this value becomes available, we increase all readers'
+ * dependency count.
+ */
+ struct reg_value_reader *Readers;
+
+ /**
+ * Number of readers of this value. This is decremented each time
+ * a reader of the value is committed.
+ * When the reader cound reaches zero, the dependency count
+ * of the instruction writing \ref Next is decremented.
+ */
+ unsigned int NumReaders;
+
+ struct reg_value *Next; /**< Pointer to the next value to be written to the same register */
+};
+
+struct register_state {
+ struct reg_value * Values[4];
+};
+
+struct schedule_state {
+ struct radeon_compiler * C;
+ struct schedule_instruction * Current;
+
+ struct register_state Temporary[RC_REGISTER_MAX_INDEX];
+
+ /**
+ * Linked lists of instructions that can be scheduled right now,
+ * based on which ALU/TEX resources they require.
+ */
+ /*@{*/
+ struct schedule_instruction *ReadyFullALU;
+ struct schedule_instruction *ReadyRGB;
+ struct schedule_instruction *ReadyAlpha;
+ struct schedule_instruction *ReadyTEX;
+ /*@}*/
+};
+
+static struct reg_value ** get_reg_valuep(struct schedule_state * s,
+ rc_register_file file, unsigned int index, unsigned int chan)
+{
+ if (file != RC_FILE_TEMPORARY)
+ return 0;
+
+ if (index >= RC_REGISTER_MAX_INDEX) {
+ rc_error(s->C, "%s: index %i out of bounds\n", __FUNCTION__, index);
+ return 0;
+ }
+
+ return &s->Temporary[index].Values[chan];
+}
+
+static struct reg_value * get_reg_value(struct schedule_state * s,
+ rc_register_file file, unsigned int index, unsigned int chan)
+{
+ struct reg_value ** pv = get_reg_valuep(s, file, index, chan);
+ if (!pv)
+ return 0;
+ return *pv;
+}
+
+static void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst)
+{
+ inst->NextReady = *list;
+ *list = inst;
+}
+
+static void instruction_ready(struct schedule_state * s, struct schedule_instruction * sinst)
+{
+ DBG("%i is now ready\n", sinst->Instruction->IP);
+
+ if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL)
+ add_inst_to_list(&s->ReadyTEX, sinst);
+ else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP)
+ add_inst_to_list(&s->ReadyRGB, sinst);
+ else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP)
+ add_inst_to_list(&s->ReadyAlpha, sinst);
+ else
+ add_inst_to_list(&s->ReadyFullALU, sinst);
+}
+
+static void decrease_dependencies(struct schedule_state * s, struct schedule_instruction * sinst)
+{
+ assert(sinst->NumDependencies > 0);
+ sinst->NumDependencies--;
+ if (!sinst->NumDependencies)
+ instruction_ready(s, sinst);
+}
+
+static void commit_instruction(struct schedule_state * s, struct schedule_instruction * sinst)
+{
+ DBG("%i: commit\n", sinst->Instruction->IP);
+
+ for(unsigned int i = 0; i < sinst->NumReadValues; ++i) {
+ struct reg_value * v = sinst->ReadValues[i];
+ assert(v->NumReaders > 0);
+ v->NumReaders--;
+ if (!v->NumReaders) {
+ if (v->Next)
+ decrease_dependencies(s, v->Next->Writer);
+ }
+ }
+
+ for(unsigned int i = 0; i < sinst->NumWriteValues; ++i) {
+ struct reg_value * v = sinst->WriteValues[i];
+ if (v->NumReaders) {
+ for(struct reg_value_reader * r = v->Readers; r; r = r->Next) {
+ decrease_dependencies(s, r->Reader);
+ }
+ } else {
+ /* This happens in instruction sequences of the type
+ * OP r.x, ...;
+ * OP r.x, r.x, ...;
+ * See also the subtlety in how instructions that both
+ * read and write the same register are scanned.
+ */
+ if (v->Next)
+ decrease_dependencies(s, v->Next->Writer);
+ }
+ }
+}
+
+/**
+ * Emit all ready texture instructions in a single block.
+ *
+ * Emit as a single block to (hopefully) sample many textures in parallel,
+ * and to avoid hardware indirections on R300.
+ */
+static void emit_all_tex(struct schedule_state * s, struct rc_instruction * before)
+{
+ struct schedule_instruction *readytex;
+
+ assert(s->ReadyTEX);
+
+ /* Don't let the ready list change under us! */
+ readytex = s->ReadyTEX;
+ s->ReadyTEX = 0;
+
+ /* Node marker for R300 */
+ struct rc_instruction * inst_begin = rc_insert_new_instruction(s->C, before->Prev);
+ inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX;
+
+ /* Link texture instructions back in */
+ while(readytex) {
+ struct schedule_instruction * tex = readytex;
+ readytex = readytex->NextReady;
+
+ rc_insert_instruction(before->Prev, tex->Instruction);
+ commit_instruction(s, tex);
+ }
+}
+
+
+static int destructive_merge_instructions(
+ struct rc_pair_instruction * rgb,
+ struct rc_pair_instruction * alpha)
+{
+ assert(rgb->Alpha.Opcode == RC_OPCODE_NOP);
+ assert(alpha->RGB.Opcode == RC_OPCODE_NOP);
+
+ /* Copy alpha args into rgb */
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(alpha->Alpha.Opcode);
+
+ for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
+ unsigned int srcrgb = 0;
+ unsigned int srcalpha = 0;
+ unsigned int oldsrc = alpha->Alpha.Arg[arg].Source;
+ rc_register_file file = 0;
+ unsigned int index = 0;
+
+ if (alpha->Alpha.Arg[arg].Swizzle < 3) {
+ srcrgb = 1;
+ file = alpha->RGB.Src[oldsrc].File;
+ index = alpha->RGB.Src[oldsrc].Index;
+ } else if (alpha->Alpha.Arg[arg].Swizzle < 4) {
+ srcalpha = 1;
+ file = alpha->Alpha.Src[oldsrc].File;
+ index = alpha->Alpha.Src[oldsrc].Index;
+ }
+
+ int source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index);
+ if (source < 0)
+ return 0;
+
+ rgb->Alpha.Arg[arg].Source = source;
+ rgb->Alpha.Arg[arg].Swizzle = alpha->Alpha.Arg[arg].Swizzle;
+ rgb->Alpha.Arg[arg].Abs = alpha->Alpha.Arg[arg].Abs;
+ rgb->Alpha.Arg[arg].Negate = alpha->Alpha.Arg[arg].Negate;
+ }
+
+ /* Copy alpha opcode into rgb */
+ rgb->Alpha.Opcode = alpha->Alpha.Opcode;
+ rgb->Alpha.DestIndex = alpha->Alpha.DestIndex;
+ rgb->Alpha.WriteMask = alpha->Alpha.WriteMask;
+ rgb->Alpha.OutputWriteMask = alpha->Alpha.OutputWriteMask;
+ rgb->Alpha.DepthWriteMask = alpha->Alpha.DepthWriteMask;
+ rgb->Alpha.Saturate = alpha->Alpha.Saturate;
+
+ /* Merge ALU result writing */
+ if (alpha->WriteALUResult) {
+ if (rgb->WriteALUResult)
+ return 0;
+
+ rgb->WriteALUResult = alpha->WriteALUResult;
+ rgb->ALUResultCompare = alpha->ALUResultCompare;
+ }
+
+ return 1;
+}
+
+/**
+ * Try to merge the given instructions into the rgb instructions.
+ *
+ * Return true on success; on failure, return false, and keep
+ * the instructions untouched.
+ */
+static int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_instruction * alpha)
+{
+ struct rc_pair_instruction backup;
+
+ memcpy(&backup, rgb, sizeof(struct rc_pair_instruction));
+
+ if (destructive_merge_instructions(rgb, alpha))
+ return 1;
+
+ memcpy(rgb, &backup, sizeof(struct rc_pair_instruction));
+ return 0;
+}
+
+
+/**
+ * Find a good ALU instruction or pair of ALU instruction and emit it.
+ *
+ * Prefer emitting full ALU instructions, so that when we reach a point
+ * where no full ALU instruction can be emitted, we have more candidates
+ * for RGB/Alpha pairing.
+ */
+static void emit_one_alu(struct schedule_state *s, struct rc_instruction * before)
+{
+ struct schedule_instruction * sinst;
+
+ if (s->ReadyFullALU || !(s->ReadyRGB && s->ReadyAlpha)) {
+ if (s->ReadyFullALU) {
+ sinst = s->ReadyFullALU;
+ s->ReadyFullALU = s->ReadyFullALU->NextReady;
+ } else if (s->ReadyRGB) {
+ sinst = s->ReadyRGB;
+ s->ReadyRGB = s->ReadyRGB->NextReady;
+ } else {
+ sinst = s->ReadyAlpha;
+ s->ReadyAlpha = s->ReadyAlpha->NextReady;
+ }
+
+ rc_insert_instruction(before->Prev, sinst->Instruction);
+ commit_instruction(s, sinst);
+ } else {
+ struct schedule_instruction **prgb;
+ struct schedule_instruction **palpha;
+
+ /* Some pairings might fail because they require too
+ * many source slots; try all possible pairings if necessary */
+ for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) {
+ for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) {
+ struct schedule_instruction * psirgb = *prgb;
+ struct schedule_instruction * psialpha = *palpha;
+
+ if (!merge_instructions(&psirgb->Instruction->U.P, &psialpha->Instruction->U.P))
+ continue;
+
+ *prgb = (*prgb)->NextReady;
+ *palpha = (*palpha)->NextReady;
+ rc_insert_instruction(before->Prev, psirgb->Instruction);
+ commit_instruction(s, psirgb);
+ commit_instruction(s, psialpha);
+ goto success;
+ }
+ }
+
+ /* No success in pairing; just take the first RGB instruction */
+ sinst = s->ReadyRGB;
+ s->ReadyRGB = s->ReadyRGB->NextReady;
+
+ rc_insert_instruction(before->Prev, sinst->Instruction);
+ commit_instruction(s, sinst);
+ success: ;
+ }
+}
+
+static void scan_read(void * data, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int chan)
+{
+ struct schedule_state * s = data;
+ struct reg_value * v = get_reg_value(s, file, index, chan);
+
+ if (!v)
+ return;
+
+ if (v->Writer == s->Current) {
+ /* The instruction reads and writes to a register component.
+ * In this case, we only want to increment dependencies by one.
+ */
+ return;
+ }
+
+ DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
+
+ struct reg_value_reader * reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader));
+ reader->Reader = s->Current;
+ reader->Next = v->Readers;
+ v->Readers = reader;
+ v->NumReaders++;
+
+ s->Current->NumDependencies++;
+
+ if (s->Current->NumReadValues >= 12) {
+ rc_error(s->C, "%s: NumReadValues overflow\n", __FUNCTION__);
+ } else {
+ s->Current->ReadValues[s->Current->NumReadValues++] = v;
+ }
+}
+
+static void scan_write(void * data, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int chan)
+{
+ struct schedule_state * s = data;
+ struct reg_value ** pv = get_reg_valuep(s, file, index, chan);
+
+ if (!pv)
+ return;
+
+ DBG("%i: write %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
+
+ struct reg_value * newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv));
+ memset(newv, 0, sizeof(*newv));
+
+ newv->Writer = s->Current;
+
+ if (*pv) {
+ (*pv)->Next = newv;
+ s->Current->NumDependencies++;
+ }
+
+ *pv = newv;
+
+ if (s->Current->NumWriteValues >= 4) {
+ rc_error(s->C, "%s: NumWriteValues overflow\n", __FUNCTION__);
+ } else {
+ s->Current->WriteValues[s->Current->NumWriteValues++] = newv;
+ }
+}
+
+static void schedule_block(struct r300_fragment_program_compiler * c,
+ struct rc_instruction * begin, struct rc_instruction * end)
+{
+ struct schedule_state s;
+
+ memset(&s, 0, sizeof(s));
+ s.C = &c->Base;
+
+ /* Scan instructions for data dependencies */
+ unsigned int ip = 0;
+ for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
+ s.Current = memory_pool_malloc(&c->Base.Pool, sizeof(*s.Current));
+ memset(s.Current, 0, sizeof(struct schedule_instruction));
+
+ s.Current->Instruction = inst;
+ inst->IP = ip++;
+
+ DBG("%i: Scanning\n", inst->IP);
+
+ /* The order of things here is subtle and maybe slightly
+ * counter-intuitive, to account for the case where an
+ * instruction writes to the same register as it reads
+ * from. */
+ rc_for_all_writes_chan(inst, &scan_write, &s);
+ rc_for_all_reads_chan(inst, &scan_read, &s);
+
+ DBG("%i: Has %i dependencies\n", inst->IP, s.Current->NumDependencies);
+
+ if (!s.Current->NumDependencies)
+ instruction_ready(&s, s.Current);
+ }
+
+ /* Temporarily unlink all instructions */
+ begin->Prev->Next = end;
+ end->Prev = begin->Prev;
+
+ /* Schedule instructions back */
+ while(!s.C->Error &&
+ (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) {
+ if (s.ReadyTEX)
+ emit_all_tex(&s, end);
+
+ while(!s.C->Error && (s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha))
+ emit_one_alu(&s, end);
+ }
+}
+
+static int is_controlflow(struct rc_instruction * inst)
+{
+ if (inst->Type == RC_INSTRUCTION_NORMAL) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ return opcode->IsFlowControl;
+ }
+ return 0;
+}
+
+void rc_pair_schedule(struct r300_fragment_program_compiler *c)
+{
+ struct rc_instruction * inst = c->Base.Program.Instructions.Next;
+ while(inst != &c->Base.Program.Instructions) {
+ if (is_controlflow(inst)) {
+ inst = inst->Next;
+ continue;
+ }
+
+ struct rc_instruction * first = inst;
+
+ while(inst != &c->Base.Program.Instructions && !is_controlflow(inst))
+ inst = inst->Next;
+
+ DBG("Schedule one block\n");
+ schedule_block(c, first, inst);
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
new file mode 100644
index 0000000000..407a0a55ee
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
@@ -0,0 +1,256 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_program_pair.h"
+
+#include "radeon_compiler.h"
+
+
+/**
+ * Finally rewrite ADD, MOV, MUL as the appropriate native instruction
+ * and reverse the order of arguments for CMP.
+ */
+static void final_rewrite(struct rc_sub_instruction *inst)
+{
+ struct rc_src_register tmp;
+
+ switch(inst->Opcode) {
+ case RC_OPCODE_ADD:
+ inst->SrcReg[2] = inst->SrcReg[1];
+ inst->SrcReg[1].File = RC_FILE_NONE;
+ inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
+ inst->SrcReg[1].Negate = RC_MASK_NONE;
+ inst->Opcode = RC_OPCODE_MAD;
+ break;
+ case RC_OPCODE_CMP:
+ tmp = inst->SrcReg[2];
+ inst->SrcReg[2] = inst->SrcReg[0];
+ inst->SrcReg[0] = tmp;
+ break;
+ case RC_OPCODE_MOV:
+ /* AMD say we should use CMP.
+ * However, when we transform
+ * KIL -r0;
+ * into
+ * CMP tmp, -r0, -r0, 0;
+ * KIL tmp;
+ * we get incorrect behaviour on R500 when r0 == 0.0.
+ * It appears that the R500 KIL hardware treats -0.0 as less
+ * than zero.
+ */
+ inst->SrcReg[1].File = RC_FILE_NONE;
+ inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
+ inst->SrcReg[2].File = RC_FILE_NONE;
+ inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
+ inst->Opcode = RC_OPCODE_MAD;
+ break;
+ case RC_OPCODE_MUL:
+ inst->SrcReg[2].File = RC_FILE_NONE;
+ inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
+ inst->Opcode = RC_OPCODE_MAD;
+ break;
+ default:
+ /* nothing to do */
+ break;
+ }
+}
+
+
+/**
+ * Classify an instruction according to which ALUs etc. it needs
+ */
+static void classify_instruction(struct rc_sub_instruction * inst,
+ int * needrgb, int * needalpha, int * istranscendent)
+{
+ *needrgb = (inst->DstReg.WriteMask & RC_MASK_XYZ) ? 1 : 0;
+ *needalpha = (inst->DstReg.WriteMask & RC_MASK_W) ? 1 : 0;
+ *istranscendent = 0;
+
+ if (inst->WriteALUResult == RC_ALURESULT_X)
+ *needrgb = 1;
+ else if (inst->WriteALUResult == RC_ALURESULT_W)
+ *needalpha = 1;
+
+ switch(inst->Opcode) {
+ case RC_OPCODE_ADD:
+ case RC_OPCODE_CMP:
+ case RC_OPCODE_DDX:
+ case RC_OPCODE_DDY:
+ case RC_OPCODE_FRC:
+ case RC_OPCODE_MAD:
+ case RC_OPCODE_MAX:
+ case RC_OPCODE_MIN:
+ case RC_OPCODE_MOV:
+ case RC_OPCODE_MUL:
+ break;
+ case RC_OPCODE_COS:
+ case RC_OPCODE_EX2:
+ case RC_OPCODE_LG2:
+ case RC_OPCODE_RCP:
+ case RC_OPCODE_RSQ:
+ case RC_OPCODE_SIN:
+ *istranscendent = 1;
+ *needalpha = 1;
+ break;
+ case RC_OPCODE_DP4:
+ *needalpha = 1;
+ /* fall through */
+ case RC_OPCODE_DP3:
+ *needrgb = 1;
+ break;
+ default:
+ break;
+ }
+}
+
+
+/**
+ * Fill the given ALU instruction's opcodes and source operands into the given pair,
+ * if possible.
+ */
+static void set_pair_instruction(struct r300_fragment_program_compiler *c,
+ struct rc_pair_instruction * pair,
+ struct rc_sub_instruction * inst)
+{
+ memset(pair, 0, sizeof(struct rc_pair_instruction));
+
+ int needrgb, needalpha, istranscendent;
+ classify_instruction(inst, &needrgb, &needalpha, &istranscendent);
+
+ if (needrgb) {
+ if (istranscendent)
+ pair->RGB.Opcode = RC_OPCODE_REPL_ALPHA;
+ else
+ pair->RGB.Opcode = inst->Opcode;
+ if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
+ pair->RGB.Saturate = 1;
+ }
+ if (needalpha) {
+ pair->Alpha.Opcode = inst->Opcode;
+ if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
+ pair->Alpha.Saturate = 1;
+ }
+
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
+ int i;
+
+ for(i = 0; i < opcode->NumSrcRegs; ++i) {
+ int source;
+ if (needrgb && !istranscendent) {
+ unsigned int srcrgb = 0;
+ unsigned int srcalpha = 0;
+ int j;
+ for(j = 0; j < 3; ++j) {
+ unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
+ if (swz < 3)
+ srcrgb = 1;
+ else if (swz < 4)
+ srcalpha = 1;
+ }
+ source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
+ inst->SrcReg[i].File, inst->SrcReg[i].Index);
+ pair->RGB.Arg[i].Source = source;
+ pair->RGB.Arg[i].Swizzle = inst->SrcReg[i].Swizzle & 0x1ff;
+ pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs;
+ pair->RGB.Arg[i].Negate = !!(inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z));
+ }
+ if (needalpha) {
+ unsigned int srcrgb = 0;
+ unsigned int srcalpha = 0;
+ unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, istranscendent ? 0 : 3);
+ if (swz < 3)
+ srcrgb = 1;
+ else if (swz < 4)
+ srcalpha = 1;
+ source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
+ inst->SrcReg[i].File, inst->SrcReg[i].Index);
+ pair->Alpha.Arg[i].Source = source;
+ pair->Alpha.Arg[i].Swizzle = swz;
+ pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;
+ pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & RC_MASK_W);
+ }
+ }
+
+ /* Destination handling */
+ if (inst->DstReg.File == RC_FILE_OUTPUT) {
+ if (inst->DstReg.Index == c->OutputDepth) {
+ pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
+ } else {
+ for (i = 0; i < 4; i++) {
+ if (inst->DstReg.Index == c->OutputColor[i]) {
+ pair->RGB.Target = i;
+ pair->Alpha.Target = i;
+ pair->RGB.OutputWriteMask |=
+ inst->DstReg.WriteMask & RC_MASK_XYZ;
+ pair->Alpha.OutputWriteMask |=
+ GET_BIT(inst->DstReg.WriteMask, 3);
+ break;
+ }
+ }
+ }
+ } else {
+ if (needrgb) {
+ pair->RGB.DestIndex = inst->DstReg.Index;
+ pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ;
+ }
+ if (needalpha) {
+ pair->Alpha.DestIndex = inst->DstReg.Index;
+ pair->Alpha.WriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
+ }
+ }
+
+ if (inst->WriteALUResult) {
+ pair->WriteALUResult = inst->WriteALUResult;
+ pair->ALUResultCompare = inst->ALUResultCompare;
+ }
+}
+
+
+/**
+ * Translate all ALU instructions into corresponding pair instructions,
+ * performing no other changes.
+ */
+void rc_pair_translate(struct r300_fragment_program_compiler *c)
+{
+ for(struct rc_instruction * inst = c->Base.Program.Instructions.Next;
+ inst != &c->Base.Program.Instructions;
+ inst = inst->Next) {
+ if (inst->Type != RC_INSTRUCTION_NORMAL)
+ continue;
+
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ if (opcode->HasTexture || opcode->IsFlowControl || opcode->Opcode == RC_OPCODE_KIL)
+ continue;
+
+ struct rc_sub_instruction copy = inst->U.I;
+
+ final_rewrite(&copy);
+ inst->Type = RC_INSTRUCTION_PAIR;
+ set_pair_instruction(c, &inst->U.P, &copy);
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.c b/src/mesa/drivers/dri/r300/compiler/radeon_program.c
new file mode 100644
index 0000000000..a3c41d7bd4
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.c
@@ -0,0 +1,183 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_program.h"
+
+#include <stdio.h>
+
+#include "radeon_compiler.h"
+
+
+/**
+ * Transform the given clause in the following way:
+ * 1. Replace it with an empty clause
+ * 2. For every instruction in the original clause, try the given
+ * transformations in order.
+ * 3. If one of the transformations returns GL_TRUE, assume that it
+ * has emitted the appropriate instruction(s) into the new clause;
+ * otherwise, copy the instruction verbatim.
+ *
+ * \note The transformation is currently not recursive; in other words,
+ * instructions emitted by transformations are not transformed.
+ *
+ * \note The transform is called 'local' because it can only look at
+ * one instruction at a time.
+ */
+void radeonLocalTransform(
+ struct radeon_compiler * c,
+ int num_transformations,
+ struct radeon_program_transformation* transformations)
+{
+ struct rc_instruction * inst = c->Program.Instructions.Next;
+
+ while(inst != &c->Program.Instructions) {
+ struct rc_instruction * current = inst;
+ int i;
+
+ inst = inst->Next;
+
+ for(i = 0; i < num_transformations; ++i) {
+ struct radeon_program_transformation* t = transformations + i;
+
+ if (t->function(c, current, t->userData))
+ break;
+ }
+ }
+}
+
+/**
+ * Left multiplication of a register with a swizzle
+ */
+struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg)
+{
+ struct rc_src_register tmp = srcreg;
+ int i;
+ tmp.Swizzle = 0;
+ tmp.Negate = 0;
+ for(i = 0; i < 4; ++i) {
+ rc_swizzle swz = GET_SWZ(swizzle, i);
+ if (swz < 4) {
+ tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3);
+ tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i;
+ } else {
+ tmp.Swizzle |= swz << (i*3);
+ }
+ }
+ return tmp;
+}
+
+unsigned int rc_find_free_temporary(struct radeon_compiler * c)
+{
+ char used[RC_REGISTER_MAX_INDEX];
+ unsigned int i;
+ struct rc_instruction * rcinst;
+
+ memset(used, 0, sizeof(used));
+
+ for (rcinst = c->Program.Instructions.Next; rcinst != &c->Program.Instructions; rcinst = rcinst->Next) {
+ const struct rc_sub_instruction *inst = &rcinst->U.I;
+ const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->Opcode);
+ unsigned int k;
+
+ for (k = 0; k < opcode->NumSrcRegs; k++) {
+ if (inst->SrcReg[k].File == RC_FILE_TEMPORARY)
+ used[inst->SrcReg[k].Index] = 1;
+ }
+
+ if (opcode->HasDstReg) {
+ if (inst->DstReg.File == RC_FILE_TEMPORARY)
+ used[inst->DstReg.Index] = 1;
+ }
+ }
+
+ for (i = 0; i < RC_REGISTER_MAX_INDEX; i++) {
+ if (!used[i])
+ return i;
+ }
+
+ rc_error(c, "Ran out of temporary registers\n");
+ return 0;
+}
+
+
+struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c)
+{
+ struct rc_instruction * inst = memory_pool_malloc(&c->Pool, sizeof(struct rc_instruction));
+
+ memset(inst, 0, sizeof(struct rc_instruction));
+
+ inst->U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE;
+ inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
+ inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZW;
+ inst->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZW;
+
+ return inst;
+}
+
+void rc_insert_instruction(struct rc_instruction * after, struct rc_instruction * inst)
+{
+ inst->Prev = after;
+ inst->Next = after->Next;
+
+ inst->Prev->Next = inst;
+ inst->Next->Prev = inst;
+}
+
+struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after)
+{
+ struct rc_instruction * inst = rc_alloc_instruction(c);
+
+ rc_insert_instruction(after, inst);
+
+ return inst;
+}
+
+void rc_remove_instruction(struct rc_instruction * inst)
+{
+ inst->Prev->Next = inst->Next;
+ inst->Next->Prev = inst->Prev;
+}
+
+/**
+ * Return the number of instructions in the program.
+ */
+unsigned int rc_recompute_ips(struct radeon_compiler * c)
+{
+ unsigned int ip = 0;
+ struct rc_instruction * inst;
+
+ for(inst = c->Program.Instructions.Next;
+ inst != &c->Program.Instructions;
+ inst = inst->Next) {
+ inst->IP = ip++;
+ }
+
+ c->Program.Instructions.IP = 0xcafedead;
+
+ return ip;
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
new file mode 100644
index 0000000000..e318867696
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
@@ -0,0 +1,233 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __RADEON_PROGRAM_H_
+#define __RADEON_PROGRAM_H_
+
+#include <stdint.h>
+#include <string.h>
+
+#include "radeon_opcodes.h"
+#include "radeon_code.h"
+#include "radeon_program_constants.h"
+#include "radeon_program_pair.h"
+
+struct radeon_compiler;
+
+struct rc_src_register {
+ unsigned int File:3;
+
+ /** Negative values may be used for relative addressing. */
+ signed int Index:(RC_REGISTER_INDEX_BITS+1);
+ unsigned int RelAddr:1;
+
+ unsigned int Swizzle:12;
+
+ /** Take the component-wise absolute value */
+ unsigned int Abs:1;
+
+ /** Post-Abs negation. */
+ unsigned int Negate:4;
+};
+
+struct rc_dst_register {
+ unsigned int File:3;
+
+ /** Negative values may be used for relative addressing. */
+ signed int Index:(RC_REGISTER_INDEX_BITS+1);
+ unsigned int RelAddr:1;
+
+ unsigned int WriteMask:4;
+};
+
+/**
+ * Instructions are maintained by the compiler in a doubly linked list
+ * of these structures.
+ *
+ * This instruction format is intended to be expanded for hardware-specific
+ * trickery. At different stages of compilation, a different set of
+ * instruction types may be valid.
+ */
+struct rc_sub_instruction {
+ struct rc_src_register SrcReg[3];
+ struct rc_dst_register DstReg;
+
+ /**
+ * Opcode of this instruction, according to \ref rc_opcode enums.
+ */
+ unsigned int Opcode:8;
+
+ /**
+ * Saturate each value of the result to the range [0,1] or [-1,1],
+ * according to \ref rc_saturate_mode enums.
+ */
+ unsigned int SaturateMode:2;
+
+ /**
+ * Writing to the special register RC_SPECIAL_ALU_RESULT
+ */
+ /*@{*/
+ unsigned int WriteALUResult:2;
+ unsigned int ALUResultCompare:3;
+ /*@}*/
+
+ /**
+ * \name Extra fields for TEX, TXB, TXD, TXL, TXP instructions.
+ */
+ /*@{*/
+ /** Source texture unit. */
+ unsigned int TexSrcUnit:5;
+
+ /** Source texture target, one of the \ref rc_texture_target enums */
+ unsigned int TexSrcTarget:3;
+
+ /** True if tex instruction should do shadow comparison */
+ unsigned int TexShadow:1;
+ /*@}*/
+};
+
+typedef enum {
+ RC_INSTRUCTION_NORMAL = 0,
+ RC_INSTRUCTION_PAIR
+} rc_instruction_type;
+
+struct rc_instruction {
+ struct rc_instruction * Prev;
+ struct rc_instruction * Next;
+
+ rc_instruction_type Type;
+ union {
+ struct rc_sub_instruction I;
+ struct rc_pair_instruction P;
+ } U;
+
+ /**
+ * Warning: IPs are not stable. If you want to use them,
+ * you need to recompute them at the beginning of each pass
+ * using \ref rc_recompute_ips
+ */
+ unsigned int IP;
+};
+
+struct rc_program {
+ /**
+ * Instructions.Next points to the first instruction,
+ * Instructions.Prev points to the last instruction.
+ */
+ struct rc_instruction Instructions;
+
+ /* Long term, we should probably remove InputsRead & OutputsWritten,
+ * since updating dependent state can be fragile, and they aren't
+ * actually used very often. */
+ uint32_t InputsRead;
+ uint32_t OutputsWritten;
+ uint32_t ShadowSamplers; /**< Texture units used for shadow sampling. */
+
+ struct rc_constant_list Constants;
+};
+
+enum {
+ OPCODE_REPL_ALPHA = MAX_RC_OPCODE /**< used in paired instructions */
+};
+
+
+static inline rc_swizzle get_swz(unsigned int swz, rc_swizzle idx)
+{
+ if (idx & 0x4)
+ return idx;
+ return GET_SWZ(swz, idx);
+}
+
+static inline unsigned int combine_swizzles4(unsigned int src,
+ rc_swizzle swz_x, rc_swizzle swz_y, rc_swizzle swz_z, rc_swizzle swz_w)
+{
+ unsigned int ret = 0;
+
+ ret |= get_swz(src, swz_x);
+ ret |= get_swz(src, swz_y) << 3;
+ ret |= get_swz(src, swz_z) << 6;
+ ret |= get_swz(src, swz_w) << 9;
+
+ return ret;
+}
+
+static inline unsigned int combine_swizzles(unsigned int src, unsigned int swz)
+{
+ unsigned int ret = 0;
+
+ ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_X));
+ ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Y)) << 3;
+ ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Z)) << 6;
+ ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_W)) << 9;
+
+ return ret;
+}
+
+struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg);
+
+static inline void reset_srcreg(struct rc_src_register* reg)
+{
+ memset(reg, 0, sizeof(struct rc_src_register));
+ reg->Swizzle = RC_SWIZZLE_XYZW;
+}
+
+
+/**
+ * A transformation that can be passed to \ref radeonLocalTransform.
+ *
+ * The function will be called once for each instruction.
+ * It has to either emit the appropriate transformed code for the instruction
+ * and return true, or return false if it doesn't understand the
+ * instruction.
+ *
+ * The function gets passed the userData as last parameter.
+ */
+struct radeon_program_transformation {
+ int (*function)(
+ struct radeon_compiler*,
+ struct rc_instruction*,
+ void*);
+ void *userData;
+};
+
+void radeonLocalTransform(
+ struct radeon_compiler *c,
+ int num_transformations,
+ struct radeon_program_transformation* transformations);
+
+unsigned int rc_find_free_temporary(struct radeon_compiler * c);
+
+struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c);
+struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after);
+void rc_insert_instruction(struct rc_instruction * after, struct rc_instruction * inst);
+void rc_remove_instruction(struct rc_instruction * inst);
+
+unsigned int rc_recompute_ips(struct radeon_compiler * c);
+
+void rc_print_program(const struct rc_program *prog);
+
+#endif
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
new file mode 100644
index 0000000000..c922d3d9a4
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
@@ -0,0 +1,975 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * @file
+ *
+ * Shareable transformations that transform "special" ALU instructions
+ * into ALU instructions that are supported by hardware.
+ *
+ */
+
+#include "radeon_program_alu.h"
+
+#include "radeon_compiler.h"
+
+
+static struct rc_instruction *emit1(
+ struct radeon_compiler * c, struct rc_instruction * after,
+ rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg,
+ struct rc_src_register SrcReg)
+{
+ struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
+
+ fpi->U.I.Opcode = Opcode;
+ fpi->U.I.SaturateMode = Saturate;
+ fpi->U.I.DstReg = DstReg;
+ fpi->U.I.SrcReg[0] = SrcReg;
+ return fpi;
+}
+
+static struct rc_instruction *emit2(
+ struct radeon_compiler * c, struct rc_instruction * after,
+ rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg,
+ struct rc_src_register SrcReg0, struct rc_src_register SrcReg1)
+{
+ struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
+
+ fpi->U.I.Opcode = Opcode;
+ fpi->U.I.SaturateMode = Saturate;
+ fpi->U.I.DstReg = DstReg;
+ fpi->U.I.SrcReg[0] = SrcReg0;
+ fpi->U.I.SrcReg[1] = SrcReg1;
+ return fpi;
+}
+
+static struct rc_instruction *emit3(
+ struct radeon_compiler * c, struct rc_instruction * after,
+ rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg,
+ struct rc_src_register SrcReg0, struct rc_src_register SrcReg1,
+ struct rc_src_register SrcReg2)
+{
+ struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
+
+ fpi->U.I.Opcode = Opcode;
+ fpi->U.I.SaturateMode = Saturate;
+ fpi->U.I.DstReg = DstReg;
+ fpi->U.I.SrcReg[0] = SrcReg0;
+ fpi->U.I.SrcReg[1] = SrcReg1;
+ fpi->U.I.SrcReg[2] = SrcReg2;
+ return fpi;
+}
+
+static struct rc_dst_register dstreg(int file, int index)
+{
+ struct rc_dst_register dst;
+ dst.File = file;
+ dst.Index = index;
+ dst.WriteMask = RC_MASK_XYZW;
+ dst.RelAddr = 0;
+ return dst;
+}
+
+static struct rc_dst_register dstregtmpmask(int index, int mask)
+{
+ struct rc_dst_register dst = {0};
+ dst.File = RC_FILE_TEMPORARY;
+ dst.Index = index;
+ dst.WriteMask = mask;
+ dst.RelAddr = 0;
+ return dst;
+}
+
+static const struct rc_src_register builtin_zero = {
+ .File = RC_FILE_NONE,
+ .Index = 0,
+ .Swizzle = RC_SWIZZLE_0000
+};
+static const struct rc_src_register builtin_one = {
+ .File = RC_FILE_NONE,
+ .Index = 0,
+ .Swizzle = RC_SWIZZLE_1111
+};
+static const struct rc_src_register srcreg_undefined = {
+ .File = RC_FILE_NONE,
+ .Index = 0,
+ .Swizzle = RC_SWIZZLE_XYZW
+};
+
+static struct rc_src_register srcreg(int file, int index)
+{
+ struct rc_src_register src = srcreg_undefined;
+ src.File = file;
+ src.Index = index;
+ return src;
+}
+
+static struct rc_src_register srcregswz(int file, int index, int swz)
+{
+ struct rc_src_register src = srcreg_undefined;
+ src.File = file;
+ src.Index = index;
+ src.Swizzle = swz;
+ return src;
+}
+
+static struct rc_src_register absolute(struct rc_src_register reg)
+{
+ struct rc_src_register newreg = reg;
+ newreg.Abs = 1;
+ newreg.Negate = RC_MASK_NONE;
+ return newreg;
+}
+
+static struct rc_src_register negate(struct rc_src_register reg)
+{
+ struct rc_src_register newreg = reg;
+ newreg.Negate = newreg.Negate ^ RC_MASK_XYZW;
+ return newreg;
+}
+
+static struct rc_src_register swizzle(struct rc_src_register reg,
+ rc_swizzle x, rc_swizzle y, rc_swizzle z, rc_swizzle w)
+{
+ struct rc_src_register swizzled = reg;
+ swizzled.Swizzle = combine_swizzles4(reg.Swizzle, x, y, z, w);
+ return swizzled;
+}
+
+static struct rc_src_register swizzle_smear(struct rc_src_register reg,
+ rc_swizzle x)
+{
+ return swizzle(reg, x, x, x, x);
+}
+
+static struct rc_src_register swizzle_xxxx(struct rc_src_register reg)
+{
+ return swizzle_smear(reg, RC_SWIZZLE_X);
+}
+
+static struct rc_src_register swizzle_yyyy(struct rc_src_register reg)
+{
+ return swizzle_smear(reg, RC_SWIZZLE_Y);
+}
+
+static struct rc_src_register swizzle_zzzz(struct rc_src_register reg)
+{
+ return swizzle_smear(reg, RC_SWIZZLE_Z);
+}
+
+static struct rc_src_register swizzle_wwww(struct rc_src_register reg)
+{
+ return swizzle_smear(reg, RC_SWIZZLE_W);
+}
+
+static void transform_ABS(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ struct rc_src_register src = inst->U.I.SrcReg[0];
+ src.Abs = 1;
+ src.Negate = RC_MASK_NONE;
+ emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, src);
+ rc_remove_instruction(inst);
+}
+
+static void transform_CEIL(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ /* Assuming:
+ * ceil(x) = -floor(-x)
+ *
+ * After inlining floor:
+ * ceil(x) = -(-x-frac(-x))
+ *
+ * After simplification:
+ * ceil(x) = x+frac(-x)
+ */
+
+ int tempreg = rc_find_free_temporary(c);
+ emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0]));
+ emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, tempreg));
+ rc_remove_instruction(inst);
+}
+
+static void transform_DP3(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ struct rc_src_register src0 = inst->U.I.SrcReg[0];
+ struct rc_src_register src1 = inst->U.I.SrcReg[1];
+ src0.Negate &= ~RC_MASK_W;
+ src0.Swizzle &= ~(7 << (3 * 3));
+ src0.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
+ src1.Negate &= ~RC_MASK_W;
+ src1.Swizzle &= ~(7 << (3 * 3));
+ src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
+ emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1);
+ rc_remove_instruction(inst);
+}
+
+static void transform_DPH(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ struct rc_src_register src0 = inst->U.I.SrcReg[0];
+ src0.Negate &= ~RC_MASK_W;
+ src0.Swizzle &= ~(7 << (3 * 3));
+ src0.Swizzle |= RC_SWIZZLE_ONE << (3 * 3);
+ emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, inst->U.I.SrcReg[1]);
+ rc_remove_instruction(inst);
+}
+
+/**
+ * [1, src0.y*src1.y, src0.z, src1.w]
+ * So basically MUL with lotsa swizzling.
+ */
+static void transform_DST(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ emit2(c, inst->Prev, RC_OPCODE_MUL, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ONE),
+ swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_ONE, RC_SWIZZLE_W));
+ rc_remove_instruction(inst);
+}
+
+static void transform_FLR(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ int tempreg = rc_find_free_temporary(c);
+ emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0]);
+ emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, tempreg)));
+ rc_remove_instruction(inst);
+}
+
+/**
+ * Definition of LIT (from ARB_fragment_program):
+ *
+ * tmp = VectorLoad(op0);
+ * if (tmp.x < 0) tmp.x = 0;
+ * if (tmp.y < 0) tmp.y = 0;
+ * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
+ * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
+ * result.x = 1.0;
+ * result.y = tmp.x;
+ * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
+ * result.w = 1.0;
+ *
+ * The longest path of computation is the one leading to result.z,
+ * consisting of 5 operations. This implementation of LIT takes
+ * 5 slots, if the subsequent optimization passes are clever enough
+ * to pair instructions correctly.
+ */
+static void transform_LIT(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ unsigned int constant;
+ unsigned int constant_swizzle;
+ unsigned int temp;
+ struct rc_src_register srctemp;
+
+ constant = rc_constants_add_immediate_scalar(&c->Program.Constants, -127.999999, &constant_swizzle);
+
+ if (inst->U.I.DstReg.WriteMask != RC_MASK_XYZW || inst->U.I.DstReg.File != RC_FILE_TEMPORARY) {
+ struct rc_instruction * inst_mov;
+
+ inst_mov = emit1(c, inst,
+ RC_OPCODE_MOV, 0, inst->U.I.DstReg,
+ srcreg(RC_FILE_TEMPORARY, rc_find_free_temporary(c)));
+
+ inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index;
+ inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+ }
+
+ temp = inst->U.I.DstReg.Index;
+ srctemp = srcreg(RC_FILE_TEMPORARY, temp);
+
+ /* tmp.x = max(0.0, Src.x); */
+ /* tmp.y = max(0.0, Src.y); */
+ /* tmp.w = clamp(Src.z, -128+eps, 128-eps); */
+ emit2(c, inst->Prev, RC_OPCODE_MAX, 0,
+ dstregtmpmask(temp, RC_MASK_XYW),
+ inst->U.I.SrcReg[0],
+ swizzle(srcreg(RC_FILE_CONSTANT, constant),
+ RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, constant_swizzle&3));
+ emit2(c, inst->Prev, RC_OPCODE_MIN, 0,
+ dstregtmpmask(temp, RC_MASK_Z),
+ swizzle_wwww(srctemp),
+ negate(srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle)));
+
+ /* tmp.w = Pow(tmp.y, tmp.w) */
+ emit1(c, inst->Prev, RC_OPCODE_LG2, 0,
+ dstregtmpmask(temp, RC_MASK_W),
+ swizzle_yyyy(srctemp));
+ emit2(c, inst->Prev, RC_OPCODE_MUL, 0,
+ dstregtmpmask(temp, RC_MASK_W),
+ swizzle_wwww(srctemp),
+ swizzle_zzzz(srctemp));
+ emit1(c, inst->Prev, RC_OPCODE_EX2, 0,
+ dstregtmpmask(temp, RC_MASK_W),
+ swizzle_wwww(srctemp));
+
+ /* tmp.z = (tmp.x > 0) ? tmp.w : 0.0 */
+ emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode,
+ dstregtmpmask(temp, RC_MASK_Z),
+ negate(swizzle_xxxx(srctemp)),
+ swizzle_wwww(srctemp),
+ builtin_zero);
+
+ /* tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0 */
+ emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode,
+ dstregtmpmask(temp, RC_MASK_XYW),
+ swizzle(srctemp, RC_SWIZZLE_ONE, RC_SWIZZLE_X, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE));
+
+ rc_remove_instruction(inst);
+}
+
+static void transform_LRP(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ int tempreg = rc_find_free_temporary(c);
+
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0,
+ dstreg(RC_FILE_TEMPORARY, tempreg),
+ inst->U.I.SrcReg[1], negate(inst->U.I.SrcReg[2]));
+ emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode,
+ inst->U.I.DstReg,
+ inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[2]);
+
+ rc_remove_instruction(inst);
+}
+
+static void transform_POW(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ int tempreg = rc_find_free_temporary(c);
+ struct rc_dst_register tempdst = dstreg(RC_FILE_TEMPORARY, tempreg);
+ struct rc_src_register tempsrc = srcreg(RC_FILE_TEMPORARY, tempreg);
+ tempdst.WriteMask = RC_MASK_W;
+ tempsrc.Swizzle = RC_SWIZZLE_WWWW;
+
+ emit1(c, inst->Prev, RC_OPCODE_LG2, 0, tempdst, swizzle_xxxx(inst->U.I.SrcReg[0]));
+ emit2(c, inst->Prev, RC_OPCODE_MUL, 0, tempdst, tempsrc, swizzle_xxxx(inst->U.I.SrcReg[1]));
+ emit1(c, inst->Prev, RC_OPCODE_EX2, inst->U.I.SaturateMode, inst->U.I.DstReg, tempsrc);
+
+ rc_remove_instruction(inst);
+}
+
+static void transform_RSQ(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ inst->U.I.SrcReg[0] = absolute(inst->U.I.SrcReg[0]);
+}
+
+static void transform_SEQ(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ int tempreg = rc_find_free_temporary(c);
+
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
+ emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ negate(absolute(srcreg(RC_FILE_TEMPORARY, tempreg))), builtin_zero, builtin_one);
+
+ rc_remove_instruction(inst);
+}
+
+static void transform_SFL(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, builtin_zero);
+ rc_remove_instruction(inst);
+}
+
+static void transform_SGE(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ int tempreg = rc_find_free_temporary(c);
+
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
+ emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ srcreg(RC_FILE_TEMPORARY, tempreg), builtin_zero, builtin_one);
+
+ rc_remove_instruction(inst);
+}
+
+static void transform_SGT(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ int tempreg = rc_find_free_temporary(c);
+
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]);
+ emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ srcreg(RC_FILE_TEMPORARY, tempreg), builtin_one, builtin_zero);
+
+ rc_remove_instruction(inst);
+}
+
+static void transform_SLE(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ int tempreg = rc_find_free_temporary(c);
+
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]);
+ emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ srcreg(RC_FILE_TEMPORARY, tempreg), builtin_zero, builtin_one);
+
+ rc_remove_instruction(inst);
+}
+
+static void transform_SLT(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ int tempreg = rc_find_free_temporary(c);
+
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
+ emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ srcreg(RC_FILE_TEMPORARY, tempreg), builtin_one, builtin_zero);
+
+ rc_remove_instruction(inst);
+}
+
+static void transform_SNE(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ int tempreg = rc_find_free_temporary(c);
+
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
+ emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ negate(absolute(srcreg(RC_FILE_TEMPORARY, tempreg))), builtin_one, builtin_zero);
+
+ rc_remove_instruction(inst);
+}
+
+static void transform_SUB(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ inst->U.I.Opcode = RC_OPCODE_ADD;
+ inst->U.I.SrcReg[1] = negate(inst->U.I.SrcReg[1]);
+}
+
+static void transform_SWZ(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+}
+
+static void transform_XPD(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ int tempreg = rc_find_free_temporary(c);
+
+ emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstreg(RC_FILE_TEMPORARY, tempreg),
+ swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W),
+ swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W));
+ emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W),
+ swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W),
+ negate(srcreg(RC_FILE_TEMPORARY, tempreg)));
+
+ rc_remove_instruction(inst);
+}
+
+
+/**
+ * Can be used as a transformation for @ref radeonClauseLocalTransform,
+ * no userData necessary.
+ *
+ * Eliminates the following ALU instructions:
+ * ABS, CEIL, DPH, DST, FLR, LIT, LRP, POW, SEQ, SFL, SGE, SGT, SLE, SLT, SNE, SUB, SWZ, XPD
+ * using:
+ * MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP
+ *
+ * Transforms RSQ to Radeon's native RSQ by explicitly setting
+ * absolute value.
+ *
+ * @note should be applicable to R300 and R500 fragment programs.
+ */
+int radeonTransformALU(
+ struct radeon_compiler * c,
+ struct rc_instruction* inst,
+ void* unused)
+{
+ switch(inst->U.I.Opcode) {
+ case RC_OPCODE_ABS: transform_ABS(c, inst); return 1;
+ case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1;
+ case RC_OPCODE_DPH: transform_DPH(c, inst); return 1;
+ case RC_OPCODE_DST: transform_DST(c, inst); return 1;
+ case RC_OPCODE_FLR: transform_FLR(c, inst); return 1;
+ case RC_OPCODE_LIT: transform_LIT(c, inst); return 1;
+ case RC_OPCODE_LRP: transform_LRP(c, inst); return 1;
+ case RC_OPCODE_POW: transform_POW(c, inst); return 1;
+ case RC_OPCODE_RSQ: transform_RSQ(c, inst); return 1;
+ case RC_OPCODE_SEQ: transform_SEQ(c, inst); return 1;
+ case RC_OPCODE_SFL: transform_SFL(c, inst); return 1;
+ case RC_OPCODE_SGE: transform_SGE(c, inst); return 1;
+ case RC_OPCODE_SGT: transform_SGT(c, inst); return 1;
+ case RC_OPCODE_SLE: transform_SLE(c, inst); return 1;
+ case RC_OPCODE_SLT: transform_SLT(c, inst); return 1;
+ case RC_OPCODE_SNE: transform_SNE(c, inst); return 1;
+ case RC_OPCODE_SUB: transform_SUB(c, inst); return 1;
+ case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1;
+ case RC_OPCODE_XPD: transform_XPD(c, inst); return 1;
+ default:
+ return 0;
+ }
+}
+
+
+static void transform_r300_vertex_ABS(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ /* Note: r500 can take absolute values, but r300 cannot. */
+ inst->U.I.Opcode = RC_OPCODE_MAX;
+ inst->U.I.SrcReg[1] = inst->U.I.SrcReg[0];
+ inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
+}
+
+static void transform_r300_vertex_CMP(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ /* There is no decent CMP available, so let's rig one up.
+ * CMP is defined as dst = src0 < 0.0 ? src1 : src2
+ * The following sequence consumes two temps and two extra slots
+ * (the second temp and the second slot is consumed by transform_LRP),
+ * but should be equivalent:
+ *
+ * SLT tmp0, src0, 0.0
+ * LRP dst, tmp0, src1, src2
+ *
+ * Yes, I know, I'm a mad scientist. ~ C. & M. */
+ int tempreg0 = rc_find_free_temporary(c);
+
+ /* SLT tmp0, src0, 0.0 */
+ emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
+ dstreg(RC_FILE_TEMPORARY, tempreg0),
+ inst->U.I.SrcReg[0], builtin_zero);
+
+ /* LRP dst, tmp0, src1, src2 */
+ transform_LRP(c,
+ emit3(c, inst->Prev, RC_OPCODE_LRP, 0,
+ inst->U.I.DstReg,
+ srcreg(RC_FILE_TEMPORARY, tempreg0), inst->U.I.SrcReg[1], inst->U.I.SrcReg[2]));
+
+ rc_remove_instruction(inst);
+}
+
+static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ int tempreg = rc_find_free_temporary(c);
+ unsigned constant_swizzle;
+ int constant = rc_constants_add_immediate_scalar(&c->Program.Constants,
+ 0.0000000000000000001,
+ &constant_swizzle);
+
+ /* MOV dst, src */
+ emit1(c, inst->Prev, RC_OPCODE_MOV, 0,
+ dstreg(RC_FILE_TEMPORARY, tempreg),
+ inst->U.I.SrcReg[0]);
+
+ /* MAX dst.z, src, 0.00...001 */
+ emit2(c, inst->Prev, RC_OPCODE_MAX, 0,
+ dstregtmpmask(tempreg, RC_MASK_Y),
+ srcreg(RC_FILE_TEMPORARY, tempreg),
+ srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle));
+
+ inst->U.I.SrcReg[0] = srcreg(RC_FILE_TEMPORARY, tempreg);
+}
+
+static void transform_r300_vertex_SEQ(struct radeon_compiler *c,
+ struct rc_instruction *inst)
+{
+ /* x = y <==> x >= y && y >= x */
+ int tmp = rc_find_free_temporary(c);
+
+ /* x <= y */
+ emit2(c, inst->Prev, RC_OPCODE_SGE, 0,
+ dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask),
+ inst->U.I.SrcReg[0],
+ inst->U.I.SrcReg[1]);
+
+ /* y <= x */
+ emit2(c, inst->Prev, RC_OPCODE_SGE, 0,
+ inst->U.I.DstReg,
+ inst->U.I.SrcReg[1],
+ inst->U.I.SrcReg[0]);
+
+ /* x && y = x * y */
+ emit2(c, inst->Prev, RC_OPCODE_MUL, 0,
+ inst->U.I.DstReg,
+ srcreg(RC_FILE_TEMPORARY, tmp),
+ srcreg(inst->U.I.DstReg.File, inst->U.I.DstReg.Index));
+
+ rc_remove_instruction(inst);
+}
+
+static void transform_r300_vertex_SNE(struct radeon_compiler *c,
+ struct rc_instruction *inst)
+{
+ /* x != y <==> x < y || y < x */
+ int tmp = rc_find_free_temporary(c);
+
+ /* x < y */
+ emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
+ dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask),
+ inst->U.I.SrcReg[0],
+ inst->U.I.SrcReg[1]);
+
+ /* y < x */
+ emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
+ inst->U.I.DstReg,
+ inst->U.I.SrcReg[1],
+ inst->U.I.SrcReg[0]);
+
+ /* x || y = max(x, y) */
+ emit2(c, inst->Prev, RC_OPCODE_MAX, 0,
+ inst->U.I.DstReg,
+ srcreg(RC_FILE_TEMPORARY, tmp),
+ srcreg(inst->U.I.DstReg.File, inst->U.I.DstReg.Index));
+
+ rc_remove_instruction(inst);
+}
+
+static void transform_r300_vertex_SGT(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ /* x > y <==> -x < -y */
+ inst->U.I.Opcode = RC_OPCODE_SLT;
+ inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
+ inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
+}
+
+static void transform_r300_vertex_SLE(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ /* x <= y <==> -x >= -y */
+ inst->U.I.Opcode = RC_OPCODE_SGE;
+ inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
+ inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
+}
+
+/**
+ * For use with radeonLocalTransform, this transforms non-native ALU
+ * instructions of the r300 up to r500 vertex engine.
+ */
+int r300_transform_vertex_alu(
+ struct radeon_compiler * c,
+ struct rc_instruction* inst,
+ void* unused)
+{
+ switch(inst->U.I.Opcode) {
+ case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1;
+ case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1;
+ case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1;
+ case RC_OPCODE_DP3: transform_DP3(c, inst); return 1;
+ case RC_OPCODE_DPH: transform_DPH(c, inst); return 1;
+ case RC_OPCODE_FLR: transform_FLR(c, inst); return 1;
+ case RC_OPCODE_LIT: transform_r300_vertex_fix_LIT(c, inst); return 1;
+ case RC_OPCODE_LRP: transform_LRP(c, inst); return 1;
+ case RC_OPCODE_SEQ:
+ if (!c->is_r500) {
+ transform_r300_vertex_SEQ(c, inst);
+ return 1;
+ }
+ return 0;
+ case RC_OPCODE_SFL: transform_SFL(c, inst); return 1;
+ case RC_OPCODE_SGT: transform_r300_vertex_SGT(c, inst); return 1;
+ case RC_OPCODE_SLE: transform_r300_vertex_SLE(c, inst); return 1;
+ case RC_OPCODE_SNE:
+ if (!c->is_r500) {
+ transform_r300_vertex_SNE(c, inst);
+ return 1;
+ }
+ return 0;
+ case RC_OPCODE_SUB: transform_SUB(c, inst); return 1;
+ case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1;
+ case RC_OPCODE_XPD: transform_XPD(c, inst); return 1;
+ default:
+ return 0;
+ }
+}
+
+static void sincos_constants(struct radeon_compiler* c, unsigned int *constants)
+{
+ static const float SinCosConsts[2][4] = {
+ {
+ 1.273239545, /* 4/PI */
+ -0.405284735, /* -4/(PI*PI) */
+ 3.141592654, /* PI */
+ 0.2225 /* weight */
+ },
+ {
+ 0.75,
+ 0.5,
+ 0.159154943, /* 1/(2*PI) */
+ 6.283185307 /* 2*PI */
+ }
+ };
+ int i;
+
+ for(i = 0; i < 2; ++i)
+ constants[i] = rc_constants_add_immediate_vec4(&c->Program.Constants, SinCosConsts[i]);
+}
+
+/**
+ * Approximate sin(x), where x is clamped to (-pi/2, pi/2).
+ *
+ * MUL tmp.xy, src, { 4/PI, -4/(PI^2) }
+ * MAD tmp.x, tmp.y, |src|, tmp.x
+ * MAD tmp.y, tmp.x, |tmp.x|, -tmp.x
+ * MAD dest, tmp.y, weight, tmp.x
+ */
+static void sin_approx(
+ struct radeon_compiler* c, struct rc_instruction * inst,
+ struct rc_dst_register dst, struct rc_src_register src, const unsigned int* constants)
+{
+ unsigned int tempreg = rc_find_free_temporary(c);
+
+ emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(tempreg, RC_MASK_XY),
+ swizzle_xxxx(src),
+ srcreg(RC_FILE_CONSTANT, constants[0]));
+ emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_X),
+ swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)),
+ absolute(swizzle_xxxx(src)),
+ swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)));
+ emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_Y),
+ swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)),
+ absolute(swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))),
+ negate(swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))));
+ emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dst,
+ swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)),
+ swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[0])),
+ swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)));
+}
+
+/**
+ * Translate the trigonometric functions COS, SIN, and SCS
+ * using only the basic instructions
+ * MOV, ADD, MUL, MAD, FRC
+ */
+int radeonTransformTrigSimple(struct radeon_compiler* c,
+ struct rc_instruction* inst,
+ void* unused)
+{
+ if (inst->U.I.Opcode != RC_OPCODE_COS &&
+ inst->U.I.Opcode != RC_OPCODE_SIN &&
+ inst->U.I.Opcode != RC_OPCODE_SCS)
+ return 0;
+
+ unsigned int constants[2];
+ unsigned int tempreg = rc_find_free_temporary(c);
+
+ sincos_constants(c, constants);
+
+ if (inst->U.I.Opcode == RC_OPCODE_COS) {
+ /* MAD tmp.x, src, 1/(2*PI), 0.75 */
+ /* FRC tmp.x, tmp.x */
+ /* MAD tmp.z, tmp.x, 2*PI, -PI */
+ emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
+ swizzle_xxxx(inst->U.I.SrcReg[0]),
+ swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])),
+ swizzle_xxxx(srcreg(RC_FILE_CONSTANT, constants[1])));
+ emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W),
+ swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)));
+ emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
+ swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)),
+ swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])),
+ negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0]))));
+
+ sin_approx(c, inst, inst->U.I.DstReg,
+ swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)),
+ constants);
+ } else if (inst->U.I.Opcode == RC_OPCODE_SIN) {
+ emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
+ swizzle_xxxx(inst->U.I.SrcReg[0]),
+ swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])),
+ swizzle_yyyy(srcreg(RC_FILE_CONSTANT, constants[1])));
+ emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W),
+ swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)));
+ emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
+ swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)),
+ swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])),
+ negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0]))));
+
+ sin_approx(c, inst, inst->U.I.DstReg,
+ swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)),
+ constants);
+ } else {
+ emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY),
+ swizzle_xxxx(inst->U.I.SrcReg[0]),
+ swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])),
+ swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W));
+ emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_XY),
+ srcreg(RC_FILE_TEMPORARY, tempreg));
+ emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY),
+ srcreg(RC_FILE_TEMPORARY, tempreg),
+ swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])),
+ negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0]))));
+
+ struct rc_dst_register dst = inst->U.I.DstReg;
+
+ dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_X;
+ sin_approx(c, inst, dst,
+ swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)),
+ constants);
+
+ dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_Y;
+ sin_approx(c, inst, dst,
+ swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)),
+ constants);
+ }
+
+ rc_remove_instruction(inst);
+
+ return 1;
+}
+
+static void r300_transform_SIN_COS_SCS(struct radeon_compiler *c,
+ struct rc_instruction *inst,
+ unsigned srctmp)
+{
+ if (inst->U.I.Opcode == RC_OPCODE_COS) {
+ emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
+ } else if (inst->U.I.Opcode == RC_OPCODE_SIN) {
+ emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode,
+ inst->U.I.DstReg, srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
+ } else if (inst->U.I.Opcode == RC_OPCODE_SCS) {
+ struct rc_dst_register moddst = inst->U.I.DstReg;
+
+ if (inst->U.I.DstReg.WriteMask & RC_MASK_X) {
+ moddst.WriteMask = RC_MASK_X;
+ emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, moddst,
+ srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
+ }
+ if (inst->U.I.DstReg.WriteMask & RC_MASK_Y) {
+ moddst.WriteMask = RC_MASK_Y;
+ emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, moddst,
+ srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
+ }
+ }
+
+ rc_remove_instruction(inst);
+}
+
+
+/**
+ * Transform the trigonometric functions COS, SIN, and SCS
+ * to include pre-scaling by 1/(2*PI) and taking the fractional
+ * part, so that the input to COS and SIN is always in the range [0,1).
+ * SCS is replaced by one COS and one SIN instruction.
+ *
+ * @warning This transformation implicitly changes the semantics of SIN and COS!
+ */
+int radeonTransformTrigScale(struct radeon_compiler* c,
+ struct rc_instruction* inst,
+ void* unused)
+{
+ if (inst->U.I.Opcode != RC_OPCODE_COS &&
+ inst->U.I.Opcode != RC_OPCODE_SIN &&
+ inst->U.I.Opcode != RC_OPCODE_SCS)
+ return 0;
+
+ static const float RCP_2PI = 0.15915494309189535;
+ unsigned int temp;
+ unsigned int constant;
+ unsigned int constant_swizzle;
+
+ temp = rc_find_free_temporary(c);
+ constant = rc_constants_add_immediate_scalar(&c->Program.Constants, RCP_2PI, &constant_swizzle);
+
+ emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(temp, RC_MASK_W),
+ swizzle_xxxx(inst->U.I.SrcReg[0]),
+ srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle));
+ emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W),
+ srcreg(RC_FILE_TEMPORARY, temp));
+
+ r300_transform_SIN_COS_SCS(c, inst, temp);
+ return 1;
+}
+
+/**
+ * Transform the trigonometric functions COS, SIN, and SCS
+ * so that the input to COS and SIN is always in the range [-PI, PI].
+ * SCS is replaced by one COS and one SIN instruction.
+ */
+int r300_transform_trig_scale_vertex(struct radeon_compiler *c,
+ struct rc_instruction *inst,
+ void *unused)
+{
+ if (inst->U.I.Opcode != RC_OPCODE_COS &&
+ inst->U.I.Opcode != RC_OPCODE_SIN &&
+ inst->U.I.Opcode != RC_OPCODE_SCS)
+ return 0;
+
+ /* Repeat x in the range [-PI, PI]:
+ *
+ * repeat(x) = frac(x / 2PI + 0.5) * 2PI - PI
+ */
+
+ static const float cons[4] = {0.15915494309189535, 0.5, 6.28318530717959, -3.14159265358979};
+ unsigned int temp;
+ unsigned int constant;
+
+ temp = rc_find_free_temporary(c);
+ constant = rc_constants_add_immediate_vec4(&c->Program.Constants, cons);
+
+ emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W),
+ swizzle_xxxx(inst->U.I.SrcReg[0]),
+ srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_XXXX),
+ srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_YYYY));
+ emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W),
+ srcreg(RC_FILE_TEMPORARY, temp));
+ emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W),
+ srcreg(RC_FILE_TEMPORARY, temp),
+ srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_ZZZZ),
+ srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_WWWW));
+
+ r300_transform_SIN_COS_SCS(c, inst, temp);
+ return 1;
+}
+
+/**
+ * Rewrite DDX/DDY instructions to properly work with r5xx shaders.
+ * The r5xx MDH/MDV instruction provides per-quad partial derivatives.
+ * It takes the form A*B+C. A and C are set by setting src0. B should be -1.
+ *
+ * @warning This explicitly changes the form of DDX and DDY!
+ */
+
+int radeonTransformDeriv(struct radeon_compiler* c,
+ struct rc_instruction* inst,
+ void* unused)
+{
+ if (inst->U.I.Opcode != RC_OPCODE_DDX && inst->U.I.Opcode != RC_OPCODE_DDY)
+ return 0;
+
+ inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_1111;
+ inst->U.I.SrcReg[1].Negate = RC_MASK_XYZW;
+
+ return 1;
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h
new file mode 100644
index 0000000000..77d444476f
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __RADEON_PROGRAM_ALU_H_
+#define __RADEON_PROGRAM_ALU_H_
+
+#include "radeon_program.h"
+
+int radeonTransformALU(
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
+ void*);
+
+int r300_transform_vertex_alu(
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
+ void*);
+
+int radeonTransformTrigSimple(
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
+ void*);
+
+int radeonTransformTrigScale(
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
+ void*);
+
+int r300_transform_trig_scale_vertex(
+ struct radeon_compiler *c,
+ struct rc_instruction *inst,
+ void*);
+
+int radeonTransformDeriv(
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
+ void*);
+
+#endif /* __RADEON_PROGRAM_ALU_H_ */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h
new file mode 100644
index 0000000000..2ddf60b677
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h
@@ -0,0 +1,150 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_PROGRAM_CONSTANTS_H
+#define RADEON_PROGRAM_CONSTANTS_H
+
+typedef enum {
+ RC_SATURATE_NONE = 0,
+ RC_SATURATE_ZERO_ONE,
+ RC_SATURATE_MINUS_PLUS_ONE
+} rc_saturate_mode;
+
+typedef enum {
+ RC_TEXTURE_2D_ARRAY,
+ RC_TEXTURE_1D_ARRAY,
+ RC_TEXTURE_CUBE,
+ RC_TEXTURE_3D,
+ RC_TEXTURE_RECT,
+ RC_TEXTURE_2D,
+ RC_TEXTURE_1D
+} rc_texture_target;
+
+typedef enum {
+ /**
+ * Used to indicate unused register descriptions and
+ * source register that use a constant swizzle.
+ */
+ RC_FILE_NONE = 0,
+ RC_FILE_TEMPORARY,
+
+ /**
+ * Input register.
+ *
+ * \note The compiler attaches no implicit semantics to input registers.
+ * Fragment/vertex program specific semantics must be defined explicitly
+ * using the appropriate compiler interfaces.
+ */
+ RC_FILE_INPUT,
+
+ /**
+ * Output register.
+ *
+ * \note The compiler attaches no implicit semantics to input registers.
+ * Fragment/vertex program specific semantics must be defined explicitly
+ * using the appropriate compiler interfaces.
+ */
+ RC_FILE_OUTPUT,
+ RC_FILE_ADDRESS,
+
+ /**
+ * Indicates a constant from the \ref rc_constant_list .
+ */
+ RC_FILE_CONSTANT,
+
+ /**
+ * Indicates a special register, see RC_SPECIAL_xxx.
+ */
+ RC_FILE_SPECIAL
+} rc_register_file;
+
+enum {
+ /** R500 fragment program ALU result "register" */
+ RC_SPECIAL_ALU_RESULT = 0,
+
+ /** Must be last */
+ RC_NUM_SPECIAL_REGISTERS
+};
+
+#define RC_REGISTER_INDEX_BITS 10
+#define RC_REGISTER_MAX_INDEX (1 << RC_REGISTER_INDEX_BITS)
+
+typedef enum {
+ RC_SWIZZLE_X = 0,
+ RC_SWIZZLE_Y,
+ RC_SWIZZLE_Z,
+ RC_SWIZZLE_W,
+ RC_SWIZZLE_ZERO,
+ RC_SWIZZLE_ONE,
+ RC_SWIZZLE_HALF,
+ RC_SWIZZLE_UNUSED
+} rc_swizzle;
+
+#define RC_MAKE_SWIZZLE(a,b,c,d) (((a)<<0) | ((b)<<3) | ((c)<<6) | ((d)<<9))
+#define RC_MAKE_SWIZZLE_SMEAR(a) RC_MAKE_SWIZZLE((a),(a),(a),(a))
+#define GET_SWZ(swz, idx) (((swz) >> ((idx)*3)) & 0x7)
+#define GET_BIT(msk, idx) (((msk) >> (idx)) & 0x1)
+#define SET_SWZ(swz, idx, newv) \
+ do { \
+ (swz) = ((swz) & ~(7 << ((idx)*3))) | ((newv) << ((idx)*3)); \
+ } while(0)
+
+#define RC_SWIZZLE_XYZW RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W)
+#define RC_SWIZZLE_XYZ0 RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO)
+#define RC_SWIZZLE_XYZZ RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_Z)
+#define RC_SWIZZLE_XXXX RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_X)
+#define RC_SWIZZLE_YYYY RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Y)
+#define RC_SWIZZLE_ZZZZ RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Z)
+#define RC_SWIZZLE_WWWW RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_W)
+#define RC_SWIZZLE_0000 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ZERO)
+#define RC_SWIZZLE_1111 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ONE)
+#define RC_SWIZZLE_HHHH RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_HALF)
+
+/**
+ * \name Bitmasks for components of vectors.
+ *
+ * Used for write masks, negation masks, etc.
+ */
+/*@{*/
+#define RC_MASK_NONE 0
+#define RC_MASK_X 1
+#define RC_MASK_Y 2
+#define RC_MASK_Z 4
+#define RC_MASK_W 8
+#define RC_MASK_XY (RC_MASK_X|RC_MASK_Y)
+#define RC_MASK_XYZ (RC_MASK_X|RC_MASK_Y|RC_MASK_Z)
+#define RC_MASK_XYW (RC_MASK_X|RC_MASK_Y|RC_MASK_W)
+#define RC_MASK_XYZW (RC_MASK_X|RC_MASK_Y|RC_MASK_Z|RC_MASK_W)
+/*@}*/
+
+typedef enum {
+ RC_ALURESULT_NONE = 0,
+ RC_ALURESULT_X,
+ RC_ALURESULT_W
+} rc_write_aluresult;
+
+#endif /* RADEON_PROGRAM_CONSTANTS_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
new file mode 100644
index 0000000000..ee839596aa
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2008-2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_program_pair.h"
+
+
+/**
+ * Return the source slot where we installed the given register access,
+ * or -1 if no slot was free anymore.
+ */
+int rc_pair_alloc_source(struct rc_pair_instruction *pair,
+ unsigned int rgb, unsigned int alpha,
+ rc_register_file file, unsigned int index)
+{
+ int candidate = -1;
+ int candidate_quality = -1;
+ int i;
+
+ if ((!rgb && !alpha) || file == RC_FILE_NONE)
+ return 0;
+
+ for(i = 0; i < 3; ++i) {
+ int q = 0;
+ if (rgb) {
+ if (pair->RGB.Src[i].Used) {
+ if (pair->RGB.Src[i].File != file ||
+ pair->RGB.Src[i].Index != index)
+ continue;
+ q++;
+ }
+ }
+ if (alpha) {
+ if (pair->Alpha.Src[i].Used) {
+ if (pair->Alpha.Src[i].File != file ||
+ pair->Alpha.Src[i].Index != index)
+ continue;
+ q++;
+ }
+ }
+ if (q > candidate_quality) {
+ candidate_quality = q;
+ candidate = i;
+ }
+ }
+
+ if (candidate >= 0) {
+ if (rgb) {
+ pair->RGB.Src[candidate].Used = 1;
+ pair->RGB.Src[candidate].File = file;
+ pair->RGB.Src[candidate].Index = index;
+ }
+ if (alpha) {
+ pair->Alpha.Src[candidate].Used = 1;
+ pair->Alpha.Src[candidate].File = file;
+ pair->Alpha.Src[candidate].Index = index;
+ }
+ }
+
+ return candidate;
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
new file mode 100644
index 0000000000..511cc707a3
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __RADEON_PROGRAM_PAIR_H_
+#define __RADEON_PROGRAM_PAIR_H_
+
+#include "radeon_code.h"
+#include "radeon_opcodes.h"
+#include "radeon_program_constants.h"
+
+struct r300_fragment_program_compiler;
+
+
+/**
+ * \file
+ * Represents a paired ALU instruction, as found in R300 and R500
+ * fragment programs.
+ *
+ * Note that this representation is taking some liberties as far
+ * as register files are concerned, to allow separate register
+ * allocation.
+ *
+ * Also note that there are some subtleties in that the semantics
+ * of certain opcodes are implicitly changed in this representation;
+ * see \ref rc_pair_translate
+ */
+
+
+struct radeon_pair_instruction_source {
+ unsigned int Used:1;
+ unsigned int File:3;
+ unsigned int Index:RC_REGISTER_INDEX_BITS;
+};
+
+struct radeon_pair_instruction_rgb {
+ unsigned int Opcode:8;
+ unsigned int DestIndex:RC_REGISTER_INDEX_BITS;
+ unsigned int WriteMask:3;
+ unsigned int Target:2;
+ unsigned int OutputWriteMask:3;
+ unsigned int Saturate:1;
+
+ struct radeon_pair_instruction_source Src[3];
+
+ struct {
+ unsigned int Source:2;
+ unsigned int Swizzle:9;
+ unsigned int Abs:1;
+ unsigned int Negate:1;
+ } Arg[3];
+};
+
+struct radeon_pair_instruction_alpha {
+ unsigned int Opcode:8;
+ unsigned int DestIndex:RC_REGISTER_INDEX_BITS;
+ unsigned int WriteMask:1;
+ unsigned int Target:2;
+ unsigned int OutputWriteMask:1;
+ unsigned int DepthWriteMask:1;
+ unsigned int Saturate:1;
+
+ struct radeon_pair_instruction_source Src[3];
+
+ struct {
+ unsigned int Source:2;
+ unsigned int Swizzle:3;
+ unsigned int Abs:1;
+ unsigned int Negate:1;
+ } Arg[3];
+};
+
+struct rc_pair_instruction {
+ struct radeon_pair_instruction_rgb RGB;
+ struct radeon_pair_instruction_alpha Alpha;
+
+ unsigned int WriteALUResult:2;
+ unsigned int ALUResultCompare:3;
+};
+
+
+/**
+ * General helper functions for dealing with the paired instruction format.
+ */
+/*@{*/
+int rc_pair_alloc_source(struct rc_pair_instruction *pair,
+ unsigned int rgb, unsigned int alpha,
+ rc_register_file file, unsigned int index);
+/*@}*/
+
+
+/**
+ * Compiler passes that operate with the paired format.
+ */
+/*@{*/
+struct radeon_pair_handler;
+
+void rc_pair_translate(struct r300_fragment_program_compiler *c);
+void rc_pair_schedule(struct r300_fragment_program_compiler *c);
+void rc_pair_regalloc(struct r300_fragment_program_compiler *c, unsigned maxtemps);
+/*@}*/
+
+#endif /* __RADEON_PROGRAM_PAIR_H_ */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c
new file mode 100644
index 0000000000..28fb9eae92
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c
@@ -0,0 +1,300 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_program.h"
+
+#include <stdio.h>
+
+static const char * textarget_to_string(rc_texture_target target)
+{
+ switch(target) {
+ case RC_TEXTURE_2D_ARRAY: return "2D_ARRAY";
+ case RC_TEXTURE_1D_ARRAY: return "1D_ARRAY";
+ case RC_TEXTURE_CUBE: return "CUBE";
+ case RC_TEXTURE_3D: return "3D";
+ case RC_TEXTURE_RECT: return "RECT";
+ case RC_TEXTURE_2D: return "2D";
+ case RC_TEXTURE_1D: return "1D";
+ default: return "BAD_TEXTURE_TARGET";
+ }
+}
+
+static void rc_print_comparefunc(FILE * f, const char * lhs, rc_compare_func func, const char * rhs)
+{
+ if (func == RC_COMPARE_FUNC_NEVER) {
+ fprintf(f, "false");
+ } else if (func == RC_COMPARE_FUNC_ALWAYS) {
+ fprintf(f, "true");
+ } else {
+ const char * op;
+ switch(func) {
+ case RC_COMPARE_FUNC_LESS: op = "<"; break;
+ case RC_COMPARE_FUNC_EQUAL: op = "=="; break;
+ case RC_COMPARE_FUNC_LEQUAL: op = "<="; break;
+ case RC_COMPARE_FUNC_GREATER: op = ">"; break;
+ case RC_COMPARE_FUNC_NOTEQUAL: op = "!="; break;
+ case RC_COMPARE_FUNC_GEQUAL: op = ">="; break;
+ default: op = "???"; break;
+ }
+ fprintf(f, "%s %s %s", lhs, op, rhs);
+ }
+}
+
+static void rc_print_register(FILE * f, rc_register_file file, int index, unsigned int reladdr)
+{
+ if (file == RC_FILE_NONE) {
+ fprintf(f, "none");
+ } else if (file == RC_FILE_SPECIAL) {
+ switch(index) {
+ case RC_SPECIAL_ALU_RESULT: fprintf(f, "aluresult"); break;
+ default: fprintf(f, "special[%i]", index); break;
+ }
+ } else {
+ const char * filename;
+ switch(file) {
+ case RC_FILE_TEMPORARY: filename = "temp"; break;
+ case RC_FILE_INPUT: filename = "input"; break;
+ case RC_FILE_OUTPUT: filename = "output"; break;
+ case RC_FILE_ADDRESS: filename = "addr"; break;
+ case RC_FILE_CONSTANT: filename = "const"; break;
+ default: filename = "BAD FILE"; break;
+ }
+ fprintf(f, "%s[%i%s]", filename, index, reladdr ? " + addr[0]" : "");
+ }
+}
+
+static void rc_print_mask(FILE * f, unsigned int mask)
+{
+ if (mask & RC_MASK_X) fprintf(f, "x");
+ if (mask & RC_MASK_Y) fprintf(f, "y");
+ if (mask & RC_MASK_Z) fprintf(f, "z");
+ if (mask & RC_MASK_W) fprintf(f, "w");
+}
+
+static void rc_print_dst_register(FILE * f, struct rc_dst_register dst)
+{
+ rc_print_register(f, dst.File, dst.Index, dst.RelAddr);
+ if (dst.WriteMask != RC_MASK_XYZW) {
+ fprintf(f, ".");
+ rc_print_mask(f, dst.WriteMask);
+ }
+}
+
+static char rc_swizzle_char(unsigned int swz)
+{
+ switch(swz) {
+ case RC_SWIZZLE_X: return 'x';
+ case RC_SWIZZLE_Y: return 'y';
+ case RC_SWIZZLE_Z: return 'z';
+ case RC_SWIZZLE_W: return 'w';
+ case RC_SWIZZLE_ZERO: return '0';
+ case RC_SWIZZLE_ONE: return '1';
+ case RC_SWIZZLE_HALF: return 'H';
+ case RC_SWIZZLE_UNUSED: return '_';
+ }
+ return '?';
+}
+
+static void rc_print_swizzle(FILE * f, unsigned int swizzle, unsigned int negate)
+{
+ unsigned int comp;
+ for(comp = 0; comp < 4; ++comp) {
+ rc_swizzle swz = GET_SWZ(swizzle, comp);
+ if (GET_BIT(negate, comp))
+ fprintf(f, "-");
+ fprintf(f, "%c", rc_swizzle_char(swz));
+ }
+}
+
+static void rc_print_src_register(FILE * f, struct rc_src_register src)
+{
+ int trivial_negate = (src.Negate == RC_MASK_NONE || src.Negate == RC_MASK_XYZW);
+
+ if (src.Negate == RC_MASK_XYZW)
+ fprintf(f, "-");
+ if (src.Abs)
+ fprintf(f, "|");
+
+ rc_print_register(f, src.File, src.Index, src.RelAddr);
+
+ if (src.Abs && !trivial_negate)
+ fprintf(f, "|");
+
+ if (src.Swizzle != RC_SWIZZLE_XYZW || !trivial_negate) {
+ fprintf(f, ".");
+ rc_print_swizzle(f, src.Swizzle, trivial_negate ? 0 : src.Negate);
+ }
+
+ if (src.Abs && trivial_negate)
+ fprintf(f, "|");
+}
+
+static void rc_print_normal_instruction(FILE * f, struct rc_instruction * inst)
+{
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ unsigned int reg;
+
+ fprintf(f, "%s", opcode->Name);
+
+ switch(inst->U.I.SaturateMode) {
+ case RC_SATURATE_NONE: break;
+ case RC_SATURATE_ZERO_ONE: fprintf(f, "_SAT"); break;
+ case RC_SATURATE_MINUS_PLUS_ONE: fprintf(f, "_SAT2"); break;
+ default: fprintf(f, "_BAD_SAT"); break;
+ }
+
+ if (opcode->HasDstReg) {
+ fprintf(f, " ");
+ rc_print_dst_register(f, inst->U.I.DstReg);
+ if (opcode->NumSrcRegs)
+ fprintf(f, ",");
+ }
+
+ for(reg = 0; reg < opcode->NumSrcRegs; ++reg) {
+ if (reg > 0)
+ fprintf(f, ",");
+ fprintf(f, " ");
+ rc_print_src_register(f, inst->U.I.SrcReg[reg]);
+ }
+
+ if (opcode->HasTexture) {
+ fprintf(f, ", %s%s[%u]",
+ textarget_to_string(inst->U.I.TexSrcTarget),
+ inst->U.I.TexShadow ? "SHADOW" : "",
+ inst->U.I.TexSrcUnit);
+ }
+
+ fprintf(f, ";");
+
+ if (inst->U.I.WriteALUResult) {
+ fprintf(f, " [aluresult = (");
+ rc_print_comparefunc(f,
+ (inst->U.I.WriteALUResult == RC_ALURESULT_X) ? "x" : "w",
+ inst->U.I.ALUResultCompare, "0");
+ fprintf(f, ")]");
+ }
+
+ fprintf(f, "\n");
+}
+
+static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst)
+{
+ struct rc_pair_instruction * inst = &fullinst->U.P;
+ int printedsrc = 0;
+
+ for(unsigned int src = 0; src < 3; ++src) {
+ if (inst->RGB.Src[src].Used) {
+ if (printedsrc)
+ fprintf(f, ", ");
+ fprintf(f, "src%i.xyz = ", src);
+ rc_print_register(f, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, 0);
+ printedsrc = 1;
+ }
+ if (inst->Alpha.Src[src].Used) {
+ if (printedsrc)
+ fprintf(f, ", ");
+ fprintf(f, "src%i.w = ", src);
+ rc_print_register(f, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, 0);
+ printedsrc = 1;
+ }
+ }
+ fprintf(f, "\n");
+
+ if (inst->RGB.Opcode != RC_OPCODE_NOP) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->RGB.Opcode);
+
+ fprintf(f, " %s%s", opcode->Name, inst->RGB.Saturate ? "_SAT" : "");
+ if (inst->RGB.WriteMask)
+ fprintf(f, " temp[%i].%s%s%s", inst->RGB.DestIndex,
+ (inst->RGB.WriteMask & 1) ? "x" : "",
+ (inst->RGB.WriteMask & 2) ? "y" : "",
+ (inst->RGB.WriteMask & 4) ? "z" : "");
+ if (inst->RGB.OutputWriteMask)
+ fprintf(f, " color[%i].%s%s%s", inst->RGB.Target,
+ (inst->RGB.OutputWriteMask & 1) ? "x" : "",
+ (inst->RGB.OutputWriteMask & 2) ? "y" : "",
+ (inst->RGB.OutputWriteMask & 4) ? "z" : "");
+ if (inst->WriteALUResult == RC_ALURESULT_X)
+ fprintf(f, " aluresult");
+
+ for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
+ const char* abs = inst->RGB.Arg[arg].Abs ? "|" : "";
+ const char* neg = inst->RGB.Arg[arg].Negate ? "-" : "";
+ fprintf(f, ", %s%ssrc%i.%c%c%c%s", neg, abs, inst->RGB.Arg[arg].Source,
+ rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 0)),
+ rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 1)),
+ rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 2)),
+ abs);
+ }
+ fprintf(f, "\n");
+ }
+
+ if (inst->Alpha.Opcode != RC_OPCODE_NOP) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Alpha.Opcode);
+
+ fprintf(f, " %s%s", opcode->Name, inst->Alpha.Saturate ? "_SAT" : "");
+ if (inst->Alpha.WriteMask)
+ fprintf(f, " temp[%i].w", inst->Alpha.DestIndex);
+ if (inst->Alpha.OutputWriteMask)
+ fprintf(f, " color[%i].w", inst->Alpha.Target);
+ if (inst->Alpha.DepthWriteMask)
+ fprintf(f, " depth.w");
+ if (inst->WriteALUResult == RC_ALURESULT_W)
+ fprintf(f, " aluresult");
+
+ for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
+ const char* abs = inst->Alpha.Arg[arg].Abs ? "|" : "";
+ const char* neg = inst->Alpha.Arg[arg].Negate ? "-" : "";
+ fprintf(f, ", %s%ssrc%i.%c%s", neg, abs, inst->Alpha.Arg[arg].Source,
+ rc_swizzle_char(inst->Alpha.Arg[arg].Swizzle), abs);
+ }
+ fprintf(f, "\n");
+ }
+
+ if (inst->WriteALUResult) {
+ fprintf(f, " [aluresult = (");
+ rc_print_comparefunc(f, "result", inst->ALUResultCompare, "0");
+ fprintf(f, ")]\n");
+ }
+}
+
+/**
+ * Print program to stderr, default options.
+ */
+void rc_print_program(const struct rc_program *prog)
+{
+ unsigned int linenum = 0;
+ struct rc_instruction *inst;
+
+ fprintf(stderr, "# Radeon Compiler Program\n");
+
+ for(inst = prog->Instructions.Next; inst != &prog->Instructions; inst = inst->Next) {
+ fprintf(stderr, "%3d: ", linenum);
+
+ if (inst->Type == RC_INSTRUCTION_PAIR)
+ rc_print_pair_instruction(stderr, inst);
+ else
+ rc_print_normal_instruction(stderr, inst);
+
+ linenum++;
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c
new file mode 100644
index 0000000000..9c4b65f4c0
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c
@@ -0,0 +1,389 @@
+/*
+ * Copyright (C) 2010 Corbin Simpson
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_program_tex.h"
+
+/* Series of transformations to be done on textures. */
+
+static struct rc_src_register shadow_ambient(struct r300_fragment_program_compiler *compiler,
+ int tmu)
+{
+ struct rc_src_register reg = { 0, };
+
+ if (compiler->enable_shadow_ambient) {
+ reg.File = RC_FILE_CONSTANT;
+ reg.Index = rc_constants_add_state(&compiler->Base.Program.Constants,
+ RC_STATE_SHADOW_AMBIENT, tmu);
+ reg.Swizzle = RC_SWIZZLE_WWWW;
+ } else {
+ reg.File = RC_FILE_NONE;
+ reg.Swizzle = RC_SWIZZLE_0000;
+ }
+ return reg;
+}
+
+static void lower_texture_rect(struct r300_fragment_program_compiler *compiler,
+ struct rc_instruction *inst)
+{
+ struct rc_instruction *inst_rect;
+ unsigned temp = rc_find_free_temporary(&compiler->Base);
+
+ if (inst->U.I.TexSrcTarget == RC_TEXTURE_RECT ||
+ compiler->state.unit[inst->U.I.TexSrcUnit].non_normalized_coords) {
+ inst_rect = rc_insert_new_instruction(&compiler->Base, inst->Prev);
+
+ inst_rect->U.I.Opcode = RC_OPCODE_MUL;
+ inst_rect->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_rect->U.I.DstReg.Index = temp;
+ inst_rect->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+ inst_rect->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
+ inst_rect->U.I.SrcReg[1].Index =
+ rc_constants_add_state(&compiler->Base.Program.Constants,
+ RC_STATE_R300_TEXRECT_FACTOR, inst->U.I.TexSrcUnit);
+
+ reset_srcreg(&inst->U.I.SrcReg[0]);
+ inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[0].Index = temp;
+
+ inst->U.I.TexSrcTarget = RC_TEXTURE_2D;
+ }
+}
+
+/**
+ * Transform TEX, TXP, TXB, and KIL instructions in the following ways:
+ * - implement texture compare (shadow extensions)
+ * - extract non-native source / destination operands
+ * - premultiply texture coordinates for RECT
+ * - extract operand swizzles
+ * - introduce a temporary register when write masks are needed
+ */
+int radeonTransformTEX(
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
+ void* data)
+{
+ struct r300_fragment_program_compiler *compiler =
+ (struct r300_fragment_program_compiler*)data;
+
+ if (inst->U.I.Opcode != RC_OPCODE_TEX &&
+ inst->U.I.Opcode != RC_OPCODE_TXB &&
+ inst->U.I.Opcode != RC_OPCODE_TXP &&
+ inst->U.I.Opcode != RC_OPCODE_KIL)
+ return 0;
+
+ /* ARB_shadow & EXT_shadow_funcs */
+ if (inst->U.I.Opcode != RC_OPCODE_KIL &&
+ ((c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) ||
+ (compiler->state.unit[inst->U.I.TexSrcUnit].compare_mode_enabled))) {
+ rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func;
+
+ if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) {
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+
+ if (comparefunc == RC_COMPARE_FUNC_ALWAYS) {
+ inst->U.I.SrcReg[0].File = RC_FILE_NONE;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
+ } else {
+ inst->U.I.SrcReg[0] = shadow_ambient(compiler, inst->U.I.TexSrcUnit);
+ }
+
+ return 1;
+ } else {
+ rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func;
+ struct rc_instruction * inst_rcp = NULL;
+ struct rc_instruction * inst_mad;
+ struct rc_instruction * inst_cmp;
+ unsigned tmp_texsample = rc_find_free_temporary(c);
+ unsigned tmp_sum = rc_find_free_temporary(c);
+ unsigned tmp_recip_w = 0;
+ int pass, fail, tex;
+
+ /* Save the output register. */
+ struct rc_dst_register output_reg = inst->U.I.DstReg;
+
+ /* Redirect TEX to a new temp. */
+ inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst->U.I.DstReg.Index = tmp_texsample;
+ inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+
+ if (inst->U.I.Opcode == RC_OPCODE_TXP) {
+ tmp_recip_w = rc_find_free_temporary(c);
+
+ /* Compute 1/W. */
+ inst_rcp = rc_insert_new_instruction(c, inst);
+ inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
+ inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_rcp->U.I.DstReg.Index = tmp_recip_w;
+ inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
+ inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+ inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
+ }
+
+ /* Perspective-divide r by W (if it's TXP) and add the texture sample (see below). */
+ inst_mad = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst);
+ inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mad->U.I.DstReg.Index = tmp_sum;
+ inst_mad->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+ inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_ZZZZ;
+ if (inst->U.I.Opcode == RC_OPCODE_TXP) {
+ inst_mad->U.I.Opcode = RC_OPCODE_MAD;
+ inst_mad->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+ inst_mad->U.I.SrcReg[1].Index = tmp_recip_w;
+ inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
+ tex = 2;
+ } else {
+ inst_mad->U.I.Opcode = RC_OPCODE_ADD;
+ tex = 1;
+ }
+ inst_mad->U.I.SrcReg[tex].File = RC_FILE_TEMPORARY;
+ inst_mad->U.I.SrcReg[tex].Index = tmp_texsample;
+ inst_mad->U.I.SrcReg[tex].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].depth_texture_swizzle;
+
+ /* Fake EQUAL/NOTEQUAL, it seems to pass some tests suprisingly. */
+ if (comparefunc == RC_COMPARE_FUNC_EQUAL) {
+ comparefunc = RC_COMPARE_FUNC_GEQUAL;
+ } else if (comparefunc == RC_COMPARE_FUNC_NOTEQUAL) {
+ comparefunc = RC_COMPARE_FUNC_LESS;
+ }
+
+ /* Recall that SrcReg[0] is r, SrcReg[tex] is tex and:
+ * LESS: r < tex <=> -tex+r < 0
+ * GEQUAL: r >= tex <=> not (-tex+r < 0)
+ * GREATER: r > tex <=> tex-r < 0
+ * LEQUAL: r <= tex <=> not ( tex-r < 0)
+ *
+ * This negates either r or tex: */
+ if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL)
+ inst_mad->U.I.SrcReg[tex].Negate = inst_mad->U.I.SrcReg[tex].Negate ^ RC_MASK_XYZW;
+ else
+ inst_mad->U.I.SrcReg[0].Negate = inst_mad->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW;
+
+ /* This negates the whole expresion: */
+ if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER) {
+ pass = 1;
+ fail = 2;
+ } else {
+ pass = 2;
+ fail = 1;
+ }
+
+ inst_cmp = rc_insert_new_instruction(c, inst_mad);
+ inst_cmp->U.I.Opcode = RC_OPCODE_CMP;
+ inst_cmp->U.I.DstReg = output_reg;
+ inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst_cmp->U.I.SrcReg[0].Index = tmp_sum;
+ inst_cmp->U.I.SrcReg[pass].File = RC_FILE_NONE;
+ inst_cmp->U.I.SrcReg[pass].Swizzle = RC_SWIZZLE_1111;
+ inst_cmp->U.I.SrcReg[fail] = shadow_ambient(compiler, inst->U.I.TexSrcUnit);
+ }
+ }
+
+ /* Texture wrap modes don't work on NPOT textures or texrects.
+ *
+ * The game plan is simple. We have two flags, fake_npot and
+ * non_normalized_coords, as well as a tex target. The RECT tex target
+ * will make the emitted code use non-scaled texcoords.
+ *
+ * Non-wrapped/clamped texcoords with NPOT are free in HW. Repeat and
+ * mirroring are not. If we need to repeat, we do:
+ *
+ * MUL temp, texcoord, <scaling factor constant>
+ * FRC temp, temp ; Discard integer portion of coords
+ *
+ * This gives us coords in [0, 1].
+ *
+ * Mirroring is trickier. We're going to start out like repeat:
+ *
+ * MUL temp, texcoord, <scaling factor constant> ; De-mirror across axes
+ * MUL temp, temp, 0.5 ; Pattern repeats in [0, 2]
+ * ; so scale to [0, 1]
+ * FRC temp, temp ; Make the pattern repeat
+ * MAD temp, temp, 2, -1 ; Move the pattern to [-1, 1]
+ * ADD temp, 1, -abs(temp) ; Now comes a neat trick: use abs to mirror the pattern.
+ * ; The pattern is backwards, so reverse it (1-x).
+ *
+ * This gives us coords in [0, 1].
+ *
+ * ~ C & M. ;)
+ */
+ if (inst->U.I.Opcode != RC_OPCODE_KIL &&
+ (inst->U.I.TexSrcTarget == RC_TEXTURE_RECT ||
+ compiler->state.unit[inst->U.I.TexSrcUnit].fake_npot ||
+ compiler->state.unit[inst->U.I.TexSrcUnit].non_normalized_coords)) {
+ rc_wrap_mode wrapmode = compiler->state.unit[inst->U.I.TexSrcUnit].wrap_mode;
+
+ /* R300 cannot sample from rectangles. */
+ if (!c->is_r500) {
+ lower_texture_rect(compiler, inst);
+ }
+
+ if (compiler->state.unit[inst->U.I.TexSrcUnit].fake_npot &&
+ wrapmode != RC_WRAP_NONE) {
+ struct rc_instruction *inst_mov;
+ unsigned temp = rc_find_free_temporary(c);
+
+ /* For NPOT fallback, we need normalized coordinates anyway. */
+ if (c->is_r500) {
+ lower_texture_rect(compiler, inst);
+ }
+
+ if (wrapmode == RC_WRAP_REPEAT) {
+ /* Both instructions will be paired up. */
+ struct rc_instruction *inst_frc = rc_insert_new_instruction(c, inst->Prev);
+
+ inst_frc->U.I.Opcode = RC_OPCODE_FRC;
+ inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_frc->U.I.DstReg.Index = temp;
+ inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+ inst_frc->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+ } else if (wrapmode == RC_WRAP_MIRRORED_REPEAT) {
+ /*
+ * Function:
+ * f(v) = 1 - abs(frac(v * 0.5) * 2 - 1)
+ *
+ * Code:
+ * MUL temp, src0, 0.5
+ * FRC temp, temp
+ * MAD temp, temp, 2, -1
+ * ADD temp, 1, -abs(temp)
+ */
+
+ struct rc_instruction *inst_mul, *inst_frc, *inst_mad, *inst_add;
+ unsigned two, two_swizzle;
+
+ inst_mul = rc_insert_new_instruction(c, inst->Prev);
+
+ inst_mul->U.I.Opcode = RC_OPCODE_MUL;
+ inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mul->U.I.DstReg.Index = temp;
+ inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+ inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+ inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_HHHH;
+
+ inst_frc = rc_insert_new_instruction(c, inst->Prev);
+
+ inst_frc->U.I.Opcode = RC_OPCODE_FRC;
+ inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_frc->U.I.DstReg.Index = temp;
+ inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+ inst_frc->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst_frc->U.I.SrcReg[0].Index = temp;
+ inst_frc->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
+
+ two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2, &two_swizzle);
+ inst_mad = rc_insert_new_instruction(c, inst->Prev);
+
+ inst_mad->U.I.Opcode = RC_OPCODE_MAD;
+ inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mad->U.I.DstReg.Index = temp;
+ inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+ inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst_mad->U.I.SrcReg[0].Index = temp;
+ inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
+ inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
+ inst_mad->U.I.SrcReg[1].Index = two;
+ inst_mad->U.I.SrcReg[1].Swizzle = two_swizzle;
+ inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_1111;
+ inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZ;
+
+ inst_add = rc_insert_new_instruction(c, inst->Prev);
+
+ inst_add->U.I.Opcode = RC_OPCODE_ADD;
+ inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_add->U.I.DstReg.Index = temp;
+ inst_add->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+ inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
+ inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+ inst_add->U.I.SrcReg[1].Index = temp;
+ inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;
+ inst_add->U.I.SrcReg[1].Abs = 1;
+ inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZ;
+ } else if (wrapmode == RC_WRAP_MIRRORED_CLAMP) {
+ /*
+ * Mirrored clamp modes are bloody simple, we just use abs
+ * to mirror [0, 1] into [-1, 0]. This works for
+ * all modes i.e. CLAMP, CLAMP_TO_EDGE, and CLAMP_TO_BORDER.
+ */
+ struct rc_instruction *inst_mov;
+
+ inst_mov = rc_insert_new_instruction(c, inst->Prev);
+
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.DstReg.Index = temp;
+ inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+ inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+ inst_mov->U.I.SrcReg[0].Abs = 1;
+ }
+
+ /* Preserve W for TXP/TXB. */
+ inst_mov = rc_insert_new_instruction(c, inst->Prev);
+
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.DstReg.Index = temp;
+ inst_mov->U.I.DstReg.WriteMask = RC_MASK_W;
+ inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+
+ reset_srcreg(&inst->U.I.SrcReg[0]);
+ inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[0].Index = temp;
+ }
+ }
+
+ /* Cannot write texture to output registers (all chips) or with masks (non-r500) */
+ if (inst->U.I.Opcode != RC_OPCODE_KIL &&
+ (inst->U.I.DstReg.File != RC_FILE_TEMPORARY ||
+ (!c->is_r500 && inst->U.I.DstReg.WriteMask != RC_MASK_XYZW))) {
+ struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst);
+
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg = inst->U.I.DstReg;
+ inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c);
+
+ inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index;
+ inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+ }
+
+ /* Cannot read texture coordinate from constants file */
+ if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) {
+ struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
+
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c);
+ inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+
+ reset_srcreg(&inst->U.I.SrcReg[0]);
+ inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index;
+ }
+
+ return 1;
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.h
new file mode 100644
index 0000000000..a0105051ac
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2010 Corbin Simpson
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __RADEON_PROGRAM_TEX_H_
+#define __RADEON_PROGRAM_TEX_H_
+
+#include "radeon_compiler.h"
+#include "radeon_program.h"
+
+int radeonTransformTEX(
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
+ void* data);
+
+#endif /* __RADEON_PROGRAM_TEX_H_ */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_swizzle.h b/src/mesa/drivers/dri/r300/compiler/radeon_swizzle.h
new file mode 100644
index 0000000000..c81d5f7a5e
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_swizzle.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_SWIZZLE_H
+#define RADEON_SWIZZLE_H
+
+#include "radeon_program.h"
+
+struct rc_swizzle_split {
+ unsigned char NumPhases;
+ unsigned char Phase[4];
+};
+
+/**
+ * Describe the swizzling capability of target hardware.
+ */
+struct rc_swizzle_caps {
+ /**
+ * Check whether the given swizzle, absolute and negate combination
+ * can be implemented natively by the hardware for this opcode.
+ *
+ * \return 1 if the swizzle is native for the given opcode
+ */
+ int (*IsNative)(rc_opcode opcode, struct rc_src_register reg);
+
+ /**
+ * Determine how to split access to the masked channels of the
+ * given source register to obtain ALU-native swizzles.
+ */
+ void (*Split)(struct rc_src_register reg, unsigned int mask, struct rc_swizzle_split * split);
+};
+
+#endif /* RADEON_SWIZZLE_H */