summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/mesa/drivers/dri/r300/compiler/Makefile8
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c35
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h7
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c38
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c50
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog.c24
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog.h6
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler.c35
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler.h26
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c106
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h113
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow_annotate.c365
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow_dealias.c150
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c126
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c267
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h92
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c87
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h8
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program.c150
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program.h110
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h128
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_print.c214
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_swizzle.h57
-rw-r--r--src/mesa/drivers/dri/r300/r300_vertprog.c1
24 files changed, 1486 insertions, 717 deletions
diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile
index 080c79898b..53fb7caa95 100644
--- a/src/mesa/drivers/dri/r300/compiler/Makefile
+++ b/src/mesa/drivers/dri/r300/compiler/Makefile
@@ -8,11 +8,15 @@ LIBNAME = r300compiler
C_SOURCES = \
radeon_code.c \
radeon_compiler.c \
- radeon_nqssadce.c \
radeon_program.c \
- radeon_opcodes.c \
+ radeon_program_print.c \
+ radeon_opcodes.c \
radeon_program_alu.c \
radeon_program_pair.c \
+ radeon_dataflow.c \
+ radeon_dataflow_annotate.c \
+ radeon_dataflow_dealias.c \
+ radeon_dataflow_swizzles.c \
r3xx_fragprog.c \
r300_fragprog.c \
r300_fragprog_swizzle.c \
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
index ded6966d08..cfa48a59e3 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
@@ -36,7 +36,6 @@
#include <stdio.h>
#include "../r300_reg.h"
-#include "radeon_nqssadce.h"
#include "radeon_compiler.h"
#define MAKE_SWZ3(x, y, z) (RC_MAKE_SWIZZLE(RC_SWIZZLE_##x, RC_SWIZZLE_##y, RC_SWIZZLE_##z, RC_SWIZZLE_ZERO))
@@ -92,7 +91,7 @@ static const struct swizzle_data* lookup_native_swizzle(unsigned int swizzle)
* Check whether the given instruction supports the swizzle and negate
* combinations in the given source register.
*/
-int r300FPIsNativeSwizzle(rc_opcode opcode, struct rc_src_register reg)
+static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
{
if (reg.Abs)
reg.Negate = RC_MASK_NONE;
@@ -134,15 +133,16 @@ int r300FPIsNativeSwizzle(rc_opcode opcode, struct rc_src_register reg)
}
-/**
- * Generate MOV dst, src using only native swizzles.
- */
-void r300FPBuildSwizzle(struct nqssadce_state *s, struct rc_dst_register dst, struct rc_src_register src)
+static void r300_swizzle_split(
+ struct rc_src_register src, unsigned int mask,
+ struct rc_swizzle_split * split)
{
if (src.Abs)
src.Negate = RC_MASK_NONE;
- while(dst.WriteMask) {
+ split->NumPhases = 0;
+
+ while(mask) {
const struct swizzle_data *best_swizzle = 0;
unsigned int best_matchcount = 0;
unsigned int best_matchmask = 0;
@@ -153,7 +153,7 @@ void r300FPBuildSwizzle(struct nqssadce_state *s, struct rc_dst_register dst, st
unsigned int matchcount = 0;
unsigned int matchmask = 0;
for(comp = 0; comp < 3; ++comp) {
- if (!GET_BIT(dst.WriteMask, comp))
+ if (!GET_BIT(mask, comp))
continue;
unsigned int swz = GET_SWZ(src.Swizzle, comp);
if (swz == RC_SWIZZLE_UNUSED)
@@ -172,23 +172,24 @@ void r300FPBuildSwizzle(struct nqssadce_state *s, struct rc_dst_register dst, st
best_swizzle = sd;
best_matchcount = matchcount;
best_matchmask = matchmask;
- if (matchmask == (dst.WriteMask & RC_MASK_XYZ))
+ if (matchmask == (mask & RC_MASK_XYZ))
break;
}
}
- struct rc_instruction *inst = rc_insert_new_instruction(s->Compiler, s->IP->Prev);
- inst->I.Opcode = RC_OPCODE_MOV;
- inst->I.DstReg = dst;
- inst->I.DstReg.WriteMask &= (best_matchmask | RC_MASK_W);
- inst->I.SrcReg[0] = src;
- inst->I.SrcReg[0].Negate = (best_matchmask & src.Negate) ? RC_MASK_XYZW : RC_MASK_NONE;
- /* Note: We rely on NqSSA/DCE to set unused swizzle components to NIL */
+ if (mask & RC_MASK_W)
+ best_matchmask |= RC_MASK_W;
- dst.WriteMask &= ~inst->I.DstReg.WriteMask;
+ split->Phase[split->NumPhases++] = best_matchmask;
+ mask &= ~best_matchmask;
}
}
+struct rc_swizzle_caps r300_swizzle_caps = {
+ .IsNative = r300_swizzle_is_native,
+ .Split = r300_swizzle_split
+};
+
/**
* Translate an RGB (XYZ) swizzle into the hardware code for the given
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h
index 728c2cd972..118476af13 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h
@@ -28,12 +28,9 @@
#ifndef __R300_FRAGPROG_SWIZZLE_H_
#define __R300_FRAGPROG_SWIZZLE_H_
-#include "radeon_program.h"
+#include "radeon_swizzle.h"
-struct nqssadce_state;
-
-int r300FPIsNativeSwizzle(rc_opcode opcode, struct rc_src_register reg);
-void r300FPBuildSwizzle(struct nqssadce_state*, struct rc_dst_register dst, struct rc_src_register src);
+extern struct rc_swizzle_caps r300_swizzle_caps;
unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle);
unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle);
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
index 0aa40c0587..bf9bea685a 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
@@ -24,18 +24,18 @@
#include <stdio.h>
-#include "radeon_nqssadce.h"
#include "radeon_program_alu.h"
#include "r300_fragprog.h"
#include "r300_fragprog_swizzle.h"
#include "r500_fragprog.h"
-static void nqssadce_init(struct nqssadce_state* s)
+static void dataflow_outputs_mark_use(void * userdata, void * data,
+ void (*callback)(void *, unsigned int, unsigned int))
{
- struct r300_fragment_program_compiler * c = s->UserData;
- s->Outputs[c->OutputColor].Sourced = RC_MASK_XYZW;
- s->Outputs[c->OutputDepth].Sourced = RC_MASK_W;
+ struct r300_fragment_program_compiler * c = userdata;
+ callback(data, c->OutputColor, RC_MASK_XYZW);
+ callback(data, c->OutputDepth, RC_MASK_W);
}
static void rewrite_depth_out(struct r300_fragment_program_compiler * c)
@@ -92,6 +92,8 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
{ &radeonTransformTrigScale, 0 }
};
radeonLocalTransform(&c->Base, 4, transformations);
+
+ c->Base.SwizzleCaps = &r500_swizzle_caps;
} else {
struct radeon_program_transformation transformations[] = {
{ &r300_transform_TEX, c },
@@ -99,33 +101,23 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
{ &radeonTransformTrigSimple, 0 }
};
radeonLocalTransform(&c->Base, 3, transformations);
+
+ c->Base.SwizzleCaps = &r300_swizzle_caps;
}
if (c->Base.Debug) {
fprintf(stderr, "Fragment Program: After native rewrite:\n");
- rc_print_program(&c->Base.Program);
+ rc_print_program(&c->Base.Program, 0);
fflush(stderr);
}
- if (c->is_r500) {
- struct radeon_nqssadce_descr nqssadce = {
- .Init = &nqssadce_init,
- .IsNativeSwizzle = &r500FPIsNativeSwizzle,
- .BuildSwizzle = &r500FPBuildSwizzle
- };
- radeonNqssaDce(&c->Base, &nqssadce, c);
- } else {
- struct radeon_nqssadce_descr nqssadce = {
- .Init = &nqssadce_init,
- .IsNativeSwizzle = &r300FPIsNativeSwizzle,
- .BuildSwizzle = &r300FPBuildSwizzle
- };
- radeonNqssaDce(&c->Base, &nqssadce, c);
- }
+ rc_dataflow_annotate(&c->Base, &dataflow_outputs_mark_use, c);
+ rc_dataflow_dealias(&c->Base);
+ rc_dataflow_swizzles(&c->Base);
if (c->Base.Debug) {
- fprintf(stderr, "Compiler: after NqSSA-DCE:\n");
- rc_print_program(&c->Base.Program);
+ fprintf(stderr, "Compiler: after dataflow passes:\n");
+ rc_print_program(&c->Base.Program, 0);
fflush(stderr);
}
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
index 0efd2c91e6..c64648ff3b 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
@@ -26,9 +26,9 @@
#include "../r300_reg.h"
-#include "radeon_nqssadce.h"
-#include "radeon_program.h"
+#include "radeon_dataflow.h"
#include "radeon_program_alu.h"
+#include "radeon_swizzle.h"
/*
@@ -545,18 +545,19 @@ static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler)
}
}
-static void nqssadceInit(struct nqssadce_state* s)
+static void dataflow_outputs_mark_used(void * userdata, void * data,
+ void (*callback)(void *, unsigned int, unsigned int))
{
- struct r300_vertex_program_compiler * compiler = s->UserData;
+ struct r300_vertex_program_compiler * c = userdata;
int i;
for(i = 0; i < 32; ++i) {
- if (compiler->RequiredOutputs & (1 << i))
- s->Outputs[i].Sourced = RC_MASK_XYZW;
+ if (c->RequiredOutputs & (1 << i))
+ callback(data, i, RC_MASK_XYZW);
}
}
-static int swizzleIsNative(rc_opcode opcode, struct rc_src_register reg)
+static int swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
{
(void) opcode;
(void) reg;
@@ -565,9 +566,16 @@ static int swizzleIsNative(rc_opcode opcode, struct rc_src_register reg)
}
+static struct rc_swizzle_caps r300_vertprog_swizzle_caps = {
+ .IsNative = &swizzle_is_native,
+ .Split = 0 /* should never be called */
+};
+
void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
{
+ compiler->Base.SwizzleCaps = &r300_vertprog_swizzle_caps;
+
addArtificialOutputs(compiler);
{
@@ -579,7 +587,7 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
if (compiler->Base.Debug) {
fprintf(stderr, "Vertex program after native rewrite:\n");
- rc_print_program(&compiler->Base.Program);
+ rc_print_program(&compiler->Base.Program, 0);
fflush(stderr);
}
@@ -596,26 +604,22 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
if (compiler->Base.Debug) {
fprintf(stderr, "Vertex program after source conflict resolve:\n");
- rc_print_program(&compiler->Base.Program);
+ rc_print_program(&compiler->Base.Program, 0);
fflush(stderr);
}
- {
- struct radeon_nqssadce_descr nqssadce = {
- .Init = &nqssadceInit,
- .IsNativeSwizzle = &swizzleIsNative,
- .BuildSwizzle = NULL
- };
- radeonNqssaDce(&compiler->Base, &nqssadce, compiler);
+ rc_dataflow_annotate(&compiler->Base, &dataflow_outputs_mark_used, compiler);
+ rc_dataflow_dealias(&compiler->Base);
+ rc_dataflow_swizzles(&compiler->Base);
- /* We need this step for reusing temporary registers */
- allocate_temporary_registers(compiler);
+ /* This invalidates dataflow annotations and should be replaced
+ * by a future generic register allocation pass. */
+ allocate_temporary_registers(compiler);
- if (compiler->Base.Debug) {
- fprintf(stderr, "Vertex program after NQSSADCE:\n");
- rc_print_program(&compiler->Base.Program);
- fflush(stderr);
- }
+ if (compiler->Base.Debug) {
+ fprintf(stderr, "Vertex program after dataflow:\n");
+ rc_print_program(&compiler->Base.Program, 0);
+ fflush(stderr);
}
translate_vertex_program(compiler);
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
index 3e994ebd1b..971465e359 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
@@ -169,7 +169,7 @@ int r500_transform_TEX(
return 1;
}
-int r500FPIsNativeSwizzle(rc_opcode opcode, struct rc_src_register reg)
+static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
{
unsigned int relevant;
int i;
@@ -227,36 +227,38 @@ int r500FPIsNativeSwizzle(rc_opcode opcode, struct rc_src_register reg)
}
/**
- * Implement a MOV with a potentially non-native swizzle.
+ * Split source register access.
*
* The only thing we *cannot* do in an ALU instruction is per-component
- * negation. Therefore, we split the MOV into two instructions when necessary.
+ * negation.
*/
-void r500FPBuildSwizzle(struct nqssadce_state *s, struct rc_dst_register dst, struct rc_src_register src)
+static void r500_swizzle_split(struct rc_src_register src, unsigned int usemask,
+ struct rc_swizzle_split * split)
{
unsigned int negatebase[2] = { 0, 0 };
int i;
for(i = 0; i < 4; ++i) {
unsigned int swz = GET_SWZ(src.Swizzle, i);
- if (swz == RC_SWIZZLE_UNUSED)
+ if (swz == RC_SWIZZLE_UNUSED || !GET_BIT(usemask, i))
continue;
negatebase[GET_BIT(src.Negate, i)] |= 1 << i;
}
+ split->NumPhases = 0;
+
for(i = 0; i <= 1; ++i) {
if (!negatebase[i])
continue;
- struct rc_instruction *inst = rc_insert_new_instruction(s->Compiler, s->IP->Prev);
- inst->I.Opcode = RC_OPCODE_MOV;
- inst->I.DstReg = dst;
- inst->I.DstReg.WriteMask = negatebase[i];
- inst->I.SrcReg[0] = src;
- inst->I.SrcReg[0].Negate = (i == 0) ? RC_MASK_NONE : RC_MASK_XYZW;
+ split->Phase[split->NumPhases++] = negatebase[i];
}
}
+struct rc_swizzle_caps r500_swizzle_caps = {
+ .IsNative = r500_swizzle_is_native,
+ .Split = r500_swizzle_split
+};
static char *toswiz(int swiz_val) {
switch(swiz_val) {
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
index 887d4abbd2..92ac75d5fd 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
@@ -34,15 +34,13 @@
#define __R500_FRAGPROG_H_
#include "radeon_compiler.h"
-#include "radeon_nqssadce.h"
+#include "radeon_swizzle.h"
extern void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler);
extern void r500FragmentProgramDump(struct rX00_fragment_program_code *c);
-extern int r500FPIsNativeSwizzle(rc_opcode opcode, struct rc_src_register reg);
-
-extern void r500FPBuildSwizzle(struct nqssadce_state *s, struct rc_dst_register dst, struct rc_src_register src);
+extern struct rc_swizzle_caps r500_swizzle_caps;
extern int r500_transform_TEX(
struct radeon_compiler * c,
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
index babdcffd3a..d0b78ec1c8 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
@@ -93,6 +93,41 @@ void rc_error(struct radeon_compiler * c, const char * fmt, ...)
}
}
+int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion)
+{
+ rc_error(c, "ICE at %s:%i: assertion failed: %s\n", file, line, assertion);
+ return 1;
+}
+
+/**
+ * Recompute c->Program.InputsRead and c->Program.OutputsWritten
+ * based on which inputs and outputs are actually referenced
+ * in program instructions.
+ */
+void rc_calculate_inputs_outputs(struct radeon_compiler * c)
+{
+ struct rc_instruction *inst;
+
+ c->Program.InputsRead = 0;
+ c->Program.OutputsWritten = 0;
+
+ for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next)
+ {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode);
+ int i;
+
+ for (i = 0; i < opcode->NumSrcRegs; ++i) {
+ if (inst->I.SrcReg[i].File == RC_FILE_INPUT)
+ c->Program.InputsRead |= 1 << inst->I.SrcReg[i].Index;
+ }
+
+ if (opcode->HasDstReg) {
+ if (inst->I.DstReg.File == RC_FILE_OUTPUT)
+ c->Program.OutputsWritten |= 1 << inst->I.DstReg.Index;
+ }
+ }
+}
+
/**
* Rewrite the program such that everything that source the given input
* register will source new_input instead.
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
index 018f9bba06..87a732cd90 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
@@ -27,6 +27,7 @@
#include "radeon_code.h"
#include "radeon_program.h"
+struct rc_swizzle_caps;
struct radeon_compiler {
struct memory_pool Pool;
@@ -34,6 +35,14 @@ struct radeon_compiler {
unsigned Debug:1;
unsigned Error:1;
char * ErrorMsg;
+
+ /**
+ * Variables used internally, not be touched by callers
+ * of the compiler
+ */
+ /*@{*/
+ struct rc_swizzle_caps * SwizzleCaps;
+ /*@}*/
};
void rc_init(struct radeon_compiler * c);
@@ -42,6 +51,23 @@ void rc_destroy(struct radeon_compiler * c);
void rc_debug(struct radeon_compiler * c, const char * fmt, ...);
void rc_error(struct radeon_compiler * c, const char * fmt, ...);
+int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion);
+
+/**
+ * This macro acts like an if-statement that can be used to implement
+ * non-aborting assertions in the compiler.
+ *
+ * It checks whether \p cond is true. If not, an internal compiler error is
+ * flagged and the if-clause is run.
+ *
+ * A typical use-case would be:
+ *
+ * if (rc_assert(c, condition-that-must-be-true))
+ * return;
+ */
+#define rc_assert(c, cond) \
+ (!(cond) && rc_if_fail_helper(c, __FILE__, __LINE__, #cond))
+
void rc_calculate_inputs_outputs(struct radeon_compiler * c);
void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
new file mode 100644
index 0000000000..af6777a7bd
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_dataflow.h"
+
+#include "radeon_compiler.h"
+
+
+static void add_ref_to_vector(struct rc_dataflow_ref * ref, struct rc_dataflow_vector * vector)
+{
+ ref->Vector = vector;
+ ref->Prev = &vector->Refs;
+ ref->Next = vector->Refs.Next;
+ ref->Prev->Next = ref;
+ ref->Next->Prev = ref;
+}
+
+struct rc_dataflow_ref * rc_dataflow_create_ref(struct radeon_compiler * c,
+ struct rc_dataflow_vector * vector, struct rc_instruction * inst)
+{
+ struct rc_dataflow_ref * ref = memory_pool_malloc(&c->Pool, sizeof(struct rc_dataflow_ref));
+ ref->ReadInstruction = inst;
+ ref->UseMask = 0;
+
+ add_ref_to_vector(ref, vector);
+
+ return ref;
+}
+
+struct rc_dataflow_vector * rc_dataflow_create_vector(struct radeon_compiler * c,
+ rc_register_file file, unsigned int index, struct rc_instruction * inst)
+{
+ struct rc_dataflow_vector * vec = memory_pool_malloc(&c->Pool, sizeof(struct rc_dataflow_vector));
+
+ memset(vec, 0, sizeof(struct rc_dataflow_vector));
+ vec->File = file;
+ vec->Index = index;
+ vec->WriteInstruction = inst;
+
+ vec->Refs.Next = vec->Refs.Prev = &vec->Refs;
+
+ return vec;
+}
+
+void rc_dataflow_remove_ref(struct rc_dataflow_ref * ref)
+{
+ ref->Prev->Next = ref->Next;
+ ref->Next->Prev = ref->Prev;
+}
+
+void rc_dataflow_remove_instruction(struct rc_instruction * inst)
+{
+ for(unsigned int i = 0; i < 3; ++i) {
+ if (inst->Dataflow.SrcReg[i]) {
+ rc_dataflow_remove_ref(inst->Dataflow.SrcReg[i]);
+ inst->Dataflow.SrcReg[i] = 0;
+ }
+ if (inst->Dataflow.SrcRegAddress[i]) {
+ rc_dataflow_remove_ref(inst->Dataflow.SrcRegAddress[i]);
+ inst->Dataflow.SrcRegAddress[i] = 0;
+ }
+ }
+
+ if (inst->Dataflow.DstReg) {
+ while(inst->Dataflow.DstReg->Refs.Next != &inst->Dataflow.DstReg->Refs) {
+ struct rc_dataflow_ref * ref = inst->Dataflow.DstReg->Refs.Next;
+ rc_dataflow_remove_ref(ref);
+ if (inst->Dataflow.DstRegPrev)
+ add_ref_to_vector(ref, inst->Dataflow.DstRegPrev->Vector);
+ }
+
+ inst->Dataflow.DstReg->WriteInstruction = 0;
+ inst->Dataflow.DstReg = 0;
+ }
+
+ if (inst->Dataflow.DstRegPrev) {
+ rc_dataflow_remove_ref(inst->Dataflow.DstRegPrev);
+ inst->Dataflow.DstRegPrev = 0;
+ }
+
+ inst->Dataflow.DstRegAliased = 0;
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h
new file mode 100644
index 0000000000..c9856affe8
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_DATAFLOW_H
+#define RADEON_DATAFLOW_H
+
+#include "radeon_program_constants.h"
+
+struct radeon_compiler;
+struct rc_instruction;
+struct rc_swizzle_caps;
+
+struct rc_dataflow_vector;
+
+struct rc_dataflow_ref {
+ struct rc_dataflow_vector * Vector;
+
+ /**
+ * Linked list of references to the above-mentioned vector.
+ * The linked list is \em not sorted.
+ */
+ /*@{*/
+ struct rc_dataflow_ref * Prev;
+ struct rc_dataflow_ref * Next;
+ /*@}*/
+
+ unsigned int UseMask:4;
+ struct rc_instruction * ReadInstruction;
+};
+
+struct rc_dataflow_vector {
+ rc_register_file File:3;
+ unsigned int Index:RC_REGISTER_INDEX_BITS;
+
+ /** For private use in compiler passes. MUST BE RESET TO 0 by the end of each pass.
+ * The annotate pass uses this bit to track whether a vector is in the
+ * update stack.
+ */
+ unsigned int PassBit:1;
+ /** Which of the components have been written with useful values */
+ unsigned int ValidMask:4;
+ /** Which of the components are used downstream */
+ unsigned int UseMask:4;
+ /** The instruction that produced this vector */
+ struct rc_instruction * WriteInstruction;
+
+ /** Linked list of references to this vector */
+ struct rc_dataflow_ref Refs;
+};
+
+struct rc_instruction_dataflow {
+ struct rc_dataflow_ref * SrcReg[3];
+ struct rc_dataflow_ref * SrcRegAddress[3];
+
+ /** Reference the components of the destination register
+ * that are carried over without being overwritten */
+ struct rc_dataflow_ref * DstRegPrev;
+ /** Indicates whether the destination register was in use
+ * before this instruction */
+ unsigned int DstRegAliased:1;
+ struct rc_dataflow_vector * DstReg;
+};
+
+/**
+ * General functions for manipulating the dataflow structures.
+ */
+/*@{*/
+struct rc_dataflow_ref * rc_dataflow_create_ref(struct radeon_compiler * c,
+ struct rc_dataflow_vector * vector, struct rc_instruction * inst);
+struct rc_dataflow_vector * rc_dataflow_create_vector(struct radeon_compiler * c,
+ rc_register_file file, unsigned int index, struct rc_instruction * inst);
+void rc_dataflow_remove_ref(struct rc_dataflow_ref * ref);
+
+void rc_dataflow_remove_instruction(struct rc_instruction * inst);
+/*@}*/
+
+
+/**
+ * Compiler passes based on dataflow structures.
+ */
+/*@{*/
+typedef void (*rc_dataflow_mark_outputs_fn)(void * userdata, void * data,
+ void (*mark_fn)(void * data, unsigned int index, unsigned int mask));
+void rc_dataflow_annotate(struct radeon_compiler * c, rc_dataflow_mark_outputs_fn dce, void * userdata);
+void rc_dataflow_dealias(struct radeon_compiler * c);
+void rc_dataflow_swizzles(struct radeon_compiler * c);
+/*@}*/
+
+#endif /* RADEON_DATAFLOW_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_annotate.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_annotate.c
new file mode 100644
index 0000000000..41d175a22f
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_annotate.c
@@ -0,0 +1,365 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_dataflow.h"
+
+#include "radeon_compiler.h"
+
+
+struct dataflow_state {
+ struct radeon_compiler * C;
+ unsigned int DCE:1;
+ unsigned int UpdateRunning:1;
+
+ struct rc_dataflow_vector * Input[RC_REGISTER_MAX_INDEX];
+ struct rc_dataflow_vector * Output[RC_REGISTER_MAX_INDEX];
+ struct rc_dataflow_vector * Temporary[RC_REGISTER_MAX_INDEX];
+ struct rc_dataflow_vector * Address;
+
+ struct rc_dataflow_vector ** UpdateStack;
+ unsigned int UpdateStackSize;
+ unsigned int UpdateStackReserved;
+};
+
+static void mark_vector_use(struct dataflow_state * s, struct rc_dataflow_vector * vector, unsigned int mask);
+
+static struct rc_dataflow_vector * get_register_contents(struct dataflow_state * s,
+ rc_register_file file, unsigned int index)
+{
+ if (file == RC_FILE_INPUT || file == RC_FILE_OUTPUT || file == RC_FILE_TEMPORARY) {
+ if (index >= RC_REGISTER_MAX_INDEX)
+ return 0; /* cannot happen, but be defensive */
+
+ if (file == RC_FILE_TEMPORARY)
+ return s->Temporary[index];
+ if (file == RC_FILE_INPUT)
+ return s->Input[index];
+ if (file == RC_FILE_OUTPUT)
+ return s->Output[index];
+ }
+
+ if (file == RC_FILE_ADDRESS)
+ return s->Address;
+
+ return 0; /* can happen, constant register file */
+}
+
+static void mark_ref_use(struct dataflow_state * s, struct rc_dataflow_ref * ref, unsigned int mask)
+{
+ if (!(mask & ~ref->UseMask))
+ return;
+
+ ref->UseMask |= mask;
+ mark_vector_use(s, ref->Vector, ref->UseMask);
+}
+
+static void mark_source_use(struct dataflow_state * s, struct rc_instruction * inst,
+ unsigned int src, unsigned int srcmask)
+{
+ unsigned int refmask = 0;
+
+ for(unsigned int i = 0; i < 4; ++i) {
+ if (GET_BIT(srcmask, i))
+ refmask |= 1 << GET_SWZ(inst->I.SrcReg[src].Swizzle, i);
+ }
+
+ /* get rid of spurious bits from ZERO, ONE, etc. swizzles */
+ refmask &= RC_MASK_XYZW;
+
+ if (!refmask)
+ return; /* can happen if the swizzle contains constant components */
+
+ if (inst->Dataflow.SrcReg[src])
+ mark_ref_use(s, inst->Dataflow.SrcReg[src], refmask);
+
+ if (inst->Dataflow.SrcRegAddress[src])
+ mark_ref_use(s, inst->Dataflow.SrcRegAddress[src], RC_MASK_X);
+}
+
+static void compute_sources_for_writemask(
+ struct rc_instruction * inst,
+ unsigned int writemask,
+ unsigned int *srcmasks)
+{
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode);
+
+ srcmasks[0] = 0;
+ srcmasks[1] = 0;
+ srcmasks[2] = 0;
+
+ if (inst->I.Opcode == RC_OPCODE_KIL)
+ srcmasks[0] |= RC_MASK_XYZW;
+
+ if (!writemask)
+ return;
+
+ if (opcode->IsComponentwise) {
+ for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src)
+ srcmasks[src] |= writemask;
+ } else if (opcode->IsStandardScalar) {
+ for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src)
+ srcmasks[src] |= RC_MASK_X;
+ } else {
+ switch(inst->I.Opcode) {
+ case RC_OPCODE_ARL:
+ srcmasks[0] |= RC_MASK_X;
+ break;
+ case RC_OPCODE_DP3:
+ srcmasks[0] |= RC_MASK_XYZ;
+ srcmasks[1] |= RC_MASK_XYZ;
+ break;
+ case RC_OPCODE_DP4:
+ srcmasks[0] |= RC_MASK_XYZW;
+ srcmasks[1] |= RC_MASK_XYZW;
+ break;
+ case RC_OPCODE_TEX:
+ case RC_OPCODE_TXB:
+ case RC_OPCODE_TXP:
+ srcmasks[0] |= RC_MASK_XYZW;
+ break;
+ case RC_OPCODE_DST:
+ srcmasks[0] |= 0x6;
+ srcmasks[1] |= 0xa;
+ break;
+ case RC_OPCODE_EXP:
+ case RC_OPCODE_LOG:
+ srcmasks[0] |= RC_MASK_XY;
+ break;
+ case RC_OPCODE_LIT:
+ srcmasks[0] |= 0xb;
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static void mark_instruction_source_use(struct dataflow_state * s,
+ struct rc_instruction * inst, unsigned int writemask)
+{
+ unsigned int srcmasks[3];
+
+ compute_sources_for_writemask(inst, writemask, srcmasks);
+
+ for(unsigned int src = 0; src < 3; ++src)
+ mark_source_use(s, inst, src, srcmasks[src]);
+}
+
+static void run_update(struct dataflow_state * s)
+{
+ s->UpdateRunning = 1;
+
+ while(s->UpdateStackSize) {
+ struct rc_dataflow_vector * vector = s->UpdateStack[--s->UpdateStackSize];
+ vector->PassBit = 0;
+
+ if (vector->WriteInstruction) {
+ struct rc_instruction * inst = vector->WriteInstruction;
+
+ if (inst->Dataflow.DstRegPrev) {
+ unsigned int carryover = vector->UseMask & ~inst->I.DstReg.WriteMask;
+
+ if (carryover)
+ mark_ref_use(s, inst->Dataflow.DstRegPrev, carryover);
+ }
+
+ mark_instruction_source_use(
+ s, vector->WriteInstruction,
+ vector->UseMask & inst->I.DstReg.WriteMask);
+ }
+ }
+
+ s->UpdateRunning = 0;
+}
+
+static void mark_vector_use(struct dataflow_state * s, struct rc_dataflow_vector * vector, unsigned int mask)
+{
+ if (!(mask & ~vector->UseMask))
+ return; /* no new used bits */
+
+ vector->UseMask |= mask;
+ if (vector->PassBit)
+ return;
+
+ if (s->UpdateStackSize >= s->UpdateStackReserved) {
+ unsigned int new_reserve = 2 * s->UpdateStackReserved;
+ struct rc_dataflow_vector ** new_stack;
+
+ if (!new_reserve)
+ new_reserve = 16;
+
+ new_stack = memory_pool_malloc(&s->C->Pool, new_reserve * sizeof(struct rc_dataflow_vector *));
+ memcpy(new_stack, s->UpdateStack, s->UpdateStackSize * sizeof(struct rc_dataflow_vector *));
+
+ s->UpdateStack = new_stack;
+ s->UpdateStackReserved = new_reserve;
+ }
+
+ s->UpdateStack[s->UpdateStackSize++] = vector;
+ vector->PassBit = 1;
+
+ if (!s->UpdateRunning)
+ run_update(s);
+}
+
+static void annotate_instruction(struct dataflow_state * s, struct rc_instruction * inst)
+{
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode);
+ unsigned int src;
+
+ for(src = 0; src < opcode->NumSrcRegs; ++src) {
+ struct rc_dataflow_vector * vector = get_register_contents(s, inst->I.SrcReg[src].File, inst->I.SrcReg[src].Index);
+ if (vector) {
+ inst->Dataflow.SrcReg[src] = rc_dataflow_create_ref(s->C, vector, inst);
+ }
+ if (inst->I.SrcReg[src].RelAddr) {
+ struct rc_dataflow_vector * addr = get_register_contents(s, RC_FILE_ADDRESS, 0);
+ if (addr)
+ inst->Dataflow.SrcRegAddress[src] = rc_dataflow_create_ref(s->C, addr, inst);
+ }
+ }
+
+ mark_instruction_source_use(s, inst, 0); /* for KIL */
+
+ if (opcode->HasDstReg) {
+ struct rc_dataflow_vector * oldvec = get_register_contents(s, inst->I.DstReg.File, inst->I.DstReg.Index);
+ struct rc_dataflow_vector * newvec = rc_dataflow_create_vector(s->C, inst->I.DstReg.File, inst->I.DstReg.Index, inst);
+
+ newvec->ValidMask = inst->I.DstReg.WriteMask;
+
+ if (oldvec) {
+ unsigned int carryover = oldvec->ValidMask & ~inst->I.DstReg.WriteMask;
+
+ if (oldvec->ValidMask)
+ inst->Dataflow.DstRegAliased = 1;
+
+ if (carryover) {
+ inst->Dataflow.DstRegPrev = rc_dataflow_create_ref(s->C, oldvec, inst);
+ newvec->ValidMask |= carryover;
+
+ if (!s->DCE)
+ mark_ref_use(s, inst->Dataflow.DstRegPrev, carryover);
+ }
+ }
+
+ inst->Dataflow.DstReg = newvec;
+
+ if (newvec->File == RC_FILE_TEMPORARY)
+ s->Temporary[newvec->Index] = newvec;
+ else if (newvec->File == RC_FILE_OUTPUT)
+ s->Output[newvec->Index] = newvec;
+ else
+ s->Address = newvec;
+
+ if (!s->DCE)
+ mark_vector_use(s, newvec, inst->I.DstReg.WriteMask);
+ }
+}
+
+static void init_inputs(struct dataflow_state * s)
+{
+ unsigned int index;
+
+ for(index = 0; index < 32; ++index) {
+ if (s->C->Program.InputsRead & (1 << index)) {
+ s->Input[index] = rc_dataflow_create_vector(s->C, RC_FILE_INPUT, index, 0);
+ s->Input[index]->ValidMask = RC_MASK_XYZW;
+ }
+ }
+}
+
+static void mark_output_use(void * data, unsigned int index, unsigned int mask)
+{
+ struct dataflow_state * s = data;
+ struct rc_dataflow_vector * vec = s->Output[index];
+
+ if (vec)
+ mark_vector_use(s, vec, mask);
+}
+
+void rc_dataflow_annotate(struct radeon_compiler * c, rc_dataflow_mark_outputs_fn dce, void * userdata)
+{
+ struct dataflow_state s;
+ struct rc_instruction * inst;
+
+ memset(&s, 0, sizeof(s));
+ s.C = c;
+ s.DCE = dce ? 1 : 0;
+
+ init_inputs(&s);
+
+ for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
+ annotate_instruction(&s, inst);
+ }
+
+ if (s.DCE) {
+ dce(userdata, &s, &mark_output_use);
+
+ for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode);
+
+ if (opcode->HasDstReg) {
+ unsigned int redundant_writes = inst->I.DstReg.WriteMask & ~inst->Dataflow.DstReg->UseMask;
+
+ inst->I.DstReg.WriteMask &= ~redundant_writes;
+
+ if (!inst->I.DstReg.WriteMask) {
+ struct rc_instruction * todelete = inst;
+ inst = inst->Prev;
+ rc_remove_instruction(todelete);
+ continue;
+ }
+ }
+
+ unsigned int srcmasks[3];
+ compute_sources_for_writemask(inst, inst->I.DstReg.WriteMask, srcmasks);
+
+ for(unsigned int src = 0; src < 3; ++src) {
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ if (!GET_BIT(srcmasks[src], chan))
+ SET_SWZ(inst->I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED);
+ }
+
+ if (inst->Dataflow.SrcReg[src]) {
+ if (!inst->Dataflow.SrcReg[src]->UseMask) {
+ rc_dataflow_remove_ref(inst->Dataflow.SrcReg[src]);
+ inst->Dataflow.SrcReg[src] = 0;
+ }
+ }
+
+ if (inst->Dataflow.SrcRegAddress[src]) {
+ if (!inst->Dataflow.SrcRegAddress[src]->UseMask) {
+ rc_dataflow_remove_ref(inst->Dataflow.SrcRegAddress[src]);
+ inst->Dataflow.SrcRegAddress[src] = 0;
+ }
+ }
+ }
+ }
+
+ rc_calculate_inputs_outputs(c);
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_dealias.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_dealias.c
new file mode 100644
index 0000000000..4596636970
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_dealias.c
@@ -0,0 +1,150 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_dataflow.h"
+
+#include "radeon_compiler.h"
+
+
+#define DEALIAS_LIST_SIZE 128
+
+struct dealias_state {
+ struct radeon_compiler * C;
+
+ unsigned int OldIndex:RC_REGISTER_INDEX_BITS;
+ unsigned int NewIndex:RC_REGISTER_INDEX_BITS;
+ unsigned int DealiasFail:1;
+
+ struct rc_dataflow_vector * List[DEALIAS_LIST_SIZE];
+ unsigned int Length;
+};
+
+static void push_dealias_vector(struct dealias_state * s, struct rc_dataflow_vector * vec)
+{
+ if (s->Length >= DEALIAS_LIST_SIZE) {
+ rc_debug(s->C, "%s: list size exceeded\n", __FUNCTION__);
+ s->DealiasFail = 1;
+ return;
+ }
+
+ if (rc_assert(s->C, vec->File == RC_FILE_TEMPORARY && vec->Index == s->OldIndex))
+ return;
+
+ s->List[s->Length++] = vec;
+}
+
+static void run_dealias(struct dealias_state * s)
+{
+ unsigned int i;
+
+ for(i = 0; i < s->Length && !s->DealiasFail; ++i) {
+ struct rc_dataflow_vector * vec = s->List[i];
+ struct rc_dataflow_ref * ref;
+
+ for(ref = vec->Refs.Next; ref != &vec->Refs; ref = ref->Next) {
+ if (ref->ReadInstruction->Dataflow.DstRegPrev == ref)
+ push_dealias_vector(s, ref->ReadInstruction->Dataflow.DstReg);
+ }
+ }
+
+ if (s->DealiasFail)
+ return;
+
+ for(i = 0; i < s->Length; ++i) {
+ struct rc_dataflow_vector * vec = s->List[i];
+ struct rc_dataflow_ref * ref;
+
+ vec->Index = s->NewIndex;
+ vec->WriteInstruction->I.DstReg.Index = s->NewIndex;
+
+ for(ref = vec->Refs.Next; ref != &vec->Refs; ref = ref->Next) {
+ struct rc_instruction * inst = ref->ReadInstruction;
+ unsigned int i;
+
+ for(i = 0; i < 3; ++i) {
+ if (inst->Dataflow.SrcReg[i] == ref) {
+ if (rc_assert(s->C, inst->I.SrcReg[i].File == RC_FILE_TEMPORARY &&
+ inst->I.SrcReg[i].Index == s->OldIndex))
+ return;
+
+ inst->I.SrcReg[i].Index = s->NewIndex;
+ }
+ }
+ }
+ }
+}
+
+/**
+ * Breaks register aliasing to reduce multiple assignments to a single register.
+ *
+ * This affects sequences like:
+ * MUL r0, ...;
+ * MAD r0, r1, r2, r0;
+ * In this example, a new register will be used for the destination of the
+ * second MAD.
+ *
+ * The purpose of this dealiasing is to make the resulting code more SSA-like
+ * and therefore make it easier to move instructions around.
+ * This is of crucial importance for R300 fragment programs, where de-aliasing
+ * can help to reduce texture indirections, but other targets can benefit from
+ * it as well.
+ *
+ * \note When compiling GLSL, there may be some benefit gained from breaking
+ * up vectors whose components are unrelated. This is not done yet and should
+ * be investigated at some point (of course, a matching pass to re-merge
+ * components would be required).
+ */
+void rc_dataflow_dealias(struct radeon_compiler * c)
+{
+ struct dealias_state s;
+
+ memset(&s, 0, sizeof(s));
+ s.C = c;
+
+ struct rc_instruction * inst;
+ for(inst = c->Program.Instructions.Prev; inst != &c->Program.Instructions; inst = inst->Prev) {
+ if (!inst->Dataflow.DstRegAliased || inst->Dataflow.DstReg->File != RC_FILE_TEMPORARY)
+ continue;
+
+ if (inst->Dataflow.DstReg->UseMask & ~inst->I.DstReg.WriteMask)
+ continue;
+
+ s.OldIndex = inst->I.DstReg.Index;
+ s.NewIndex = rc_find_free_temporary(c);
+ s.DealiasFail = 0;
+ s.Length = 0;
+
+ inst->Dataflow.DstRegAliased = 0;
+ if (inst->Dataflow.DstRegPrev) {
+ rc_dataflow_remove_ref(inst->Dataflow.DstRegPrev);
+ inst->Dataflow.DstRegPrev = 0;
+ }
+
+ push_dealias_vector(&s, inst->Dataflow.DstReg);
+ run_dealias(&s);
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c
new file mode 100644
index 0000000000..1aa91eff7c
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_dataflow.h"
+
+#include "radeon_compiler.h"
+#include "radeon_swizzle.h"
+
+
+static void rewrite_source(struct radeon_compiler * c,
+ struct rc_instruction * inst, unsigned src)
+{
+ struct rc_swizzle_split split;
+ unsigned int tempreg = rc_find_free_temporary(c);
+ unsigned int usemask;
+ struct rc_dataflow_ref * oldref = inst->Dataflow.SrcReg[src];
+ struct rc_dataflow_vector * vector = 0;
+
+ usemask = 0;
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ if (GET_SWZ(inst->I.SrcReg[src].Swizzle, chan) != RC_SWIZZLE_UNUSED)
+ usemask |= 1 << chan;
+ }
+
+ c->SwizzleCaps->Split(inst->I.SrcReg[src], usemask, &split);
+
+ for(unsigned int phase = 0; phase < split.NumPhases; ++phase) {
+ struct rc_instruction * mov = rc_insert_new_instruction(c, inst->Prev);
+ unsigned int phase_refmask;
+ unsigned int masked_negate;
+
+ mov->I.Opcode = RC_OPCODE_MOV;
+ mov->I.DstReg.File = RC_FILE_TEMPORARY;
+ mov->I.DstReg.Index = tempreg;
+ mov->I.DstReg.WriteMask = split.Phase[phase];
+ mov->I.SrcReg[0] = inst->I.SrcReg[src];
+
+ phase_refmask = 0;
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ if (!GET_BIT(split.Phase[phase], chan))
+ SET_SWZ(mov->I.SrcReg[0].Swizzle, chan, RC_SWIZZLE_UNUSED);
+ else
+ phase_refmask |= 1 << GET_SWZ(mov->I.SrcReg[0].Swizzle, chan);
+ }
+
+ phase_refmask &= RC_MASK_XYZW;
+
+ masked_negate = split.Phase[phase] & mov->I.SrcReg[0].Negate;
+ if (masked_negate == 0)
+ mov->I.SrcReg[0].Negate = 0;
+ else if (masked_negate == split.Phase[phase])
+ mov->I.SrcReg[0].Negate = RC_MASK_XYZW;
+
+ if (oldref) {
+ mov->Dataflow.SrcReg[0] = rc_dataflow_create_ref(c, oldref->Vector, mov);
+ mov->Dataflow.SrcReg[0]->UseMask = phase_refmask;
+ }
+
+ mov->Dataflow.DstReg = rc_dataflow_create_vector(c, RC_FILE_TEMPORARY, tempreg, mov);
+ mov->Dataflow.DstReg->ValidMask = split.Phase[phase];
+
+ if (vector) {
+ mov->Dataflow.DstRegPrev = rc_dataflow_create_ref(c, vector, mov);
+ mov->Dataflow.DstRegPrev->UseMask = vector->ValidMask;
+ mov->Dataflow.DstReg->ValidMask |= vector->ValidMask;
+ mov->Dataflow.DstRegAliased = 1;
+ }
+
+ mov->Dataflow.DstReg->UseMask = mov->Dataflow.DstReg->ValidMask;
+ vector = mov->Dataflow.DstReg;
+ }
+
+ if (oldref)
+ rc_dataflow_remove_ref(oldref);
+ inst->Dataflow.SrcReg[src] = rc_dataflow_create_ref(c, vector, inst);
+ inst->Dataflow.SrcReg[src]->UseMask = usemask;
+
+ inst->I.SrcReg[src].File = RC_FILE_TEMPORARY;
+ inst->I.SrcReg[src].Index = tempreg;
+ inst->I.SrcReg[src].Swizzle = 0;
+ inst->I.SrcReg[src].Negate = RC_MASK_NONE;
+ inst->I.SrcReg[src].Abs = 0;
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ SET_SWZ(inst->I.SrcReg[src].Swizzle, chan,
+ GET_BIT(usemask, chan) ? chan : RC_SWIZZLE_UNUSED);
+ }
+}
+
+void rc_dataflow_swizzles(struct radeon_compiler * c)
+{
+ struct rc_instruction * inst;
+
+ for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode);
+ unsigned int src;
+
+ for(src = 0; src < opcode->NumSrcRegs; ++src) {
+ if (!c->SwizzleCaps->IsNative(inst->I.Opcode, inst->I.SrcReg[src]))
+ rewrite_source(c, inst, src);
+ }
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c
deleted file mode 100644
index 3e02ebee81..0000000000
--- a/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c
+++ /dev/null
@@ -1,267 +0,0 @@
-/*
- * Copyright (C) 2008 Nicolai Haehnle.
- *
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-/**
- * @file
- *
- * "Not-quite SSA" and Dead-Code Elimination.
- */
-
-#include "radeon_nqssadce.h"
-
-#include "radeon_compiler.h"
-
-
-/**
- * Return the @ref register_state for the given register (or 0 for untracked
- * registers, i.e. constants).
- */
-static struct register_state *get_reg_state(struct nqssadce_state* s, rc_register_file file, unsigned int index)
-{
- if (index >= RC_REGISTER_MAX_INDEX)
- return 0;
-
- switch(file) {
- case RC_FILE_TEMPORARY: return &s->Temps[index];
- case RC_FILE_OUTPUT: return &s->Outputs[index];
- case RC_FILE_ADDRESS: return &s->Address;
- default: return 0;
- }
-}
-
-
-static void track_used_srcreg(struct nqssadce_state* s,
- int src, unsigned int sourced)
-{
- struct rc_sub_instruction * inst = &s->IP->I;
- int i;
- unsigned int deswz_source = 0;
-
- for(i = 0; i < 4; ++i) {
- if (GET_BIT(sourced, i)) {
- unsigned int swz = GET_SWZ(inst->SrcReg[src].Swizzle, i);
- deswz_source |= 1 << swz;
- } else {
- inst->SrcReg[src].Swizzle &= ~(7 << (3*i));
- inst->SrcReg[src].Swizzle |= RC_SWIZZLE_UNUSED << (3*i);
- }
- }
-
- if (!s->Descr->IsNativeSwizzle(inst->Opcode, inst->SrcReg[src])) {
- struct rc_dst_register dstreg = inst->DstReg;
- dstreg.File = RC_FILE_TEMPORARY;
- dstreg.Index = rc_find_free_temporary(s->Compiler);
- dstreg.WriteMask = sourced;
-
- s->Descr->BuildSwizzle(s, dstreg, inst->SrcReg[src]);
-
- inst->SrcReg[src].File = RC_FILE_TEMPORARY;
- inst->SrcReg[src].Index = dstreg.Index;
- inst->SrcReg[src].Swizzle = 0;
- inst->SrcReg[src].Negate = RC_MASK_NONE;
- inst->SrcReg[src].Abs = 0;
- for(i = 0; i < 4; ++i) {
- if (GET_BIT(sourced, i))
- inst->SrcReg[src].Swizzle |= i << (3*i);
- else
- inst->SrcReg[src].Swizzle |= RC_SWIZZLE_UNUSED << (3*i);
- }
- deswz_source = sourced;
- }
-
- struct register_state *regstate;
-
- if (inst->SrcReg[src].RelAddr) {
- regstate = get_reg_state(s, RC_FILE_ADDRESS, 0);
- if (regstate)
- regstate->Sourced |= RC_MASK_X;
- } else {
- regstate = get_reg_state(s, inst->SrcReg[src].File, inst->SrcReg[src].Index);
- if (regstate)
- regstate->Sourced |= deswz_source & 0xf;
- }
-}
-
-static void unalias_srcregs(struct rc_instruction *inst, unsigned int oldindex, unsigned int newindex)
-{
- const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode);
- int i;
- for(i = 0; i < opcode->NumSrcRegs; ++i)
- if (inst->I.SrcReg[i].File == RC_FILE_TEMPORARY && inst->I.SrcReg[i].Index == oldindex)
- inst->I.SrcReg[i].Index = newindex;
-}
-
-static void unalias_temporary(struct nqssadce_state* s, unsigned int oldindex)
-{
- unsigned int newindex = rc_find_free_temporary(s->Compiler);
- struct rc_instruction * inst;
- for(inst = s->Compiler->Program.Instructions.Next; inst != s->IP; inst = inst->Next) {
- if (inst->I.DstReg.File == RC_FILE_TEMPORARY && inst->I.DstReg.Index == oldindex)
- inst->I.DstReg.Index = newindex;
- unalias_srcregs(inst, oldindex, newindex);
- }
- unalias_srcregs(s->IP, oldindex, newindex);
-}
-
-
-/**
- * Handle one instruction.
- */
-static void process_instruction(struct nqssadce_state* s)
-{
- struct rc_sub_instruction *inst = &s->IP->I;
- unsigned int WriteMask;
-
- if (inst->Opcode != RC_OPCODE_KIL) {
- struct register_state *regstate = get_reg_state(s, inst->DstReg.File, inst->DstReg.Index);
- if (!regstate) {
- rc_error(s->Compiler, "NqssaDce: bad destination register (%i[%i])\n",
- inst->DstReg.File, inst->DstReg.Index);
- return;
- }
-
- inst->DstReg.WriteMask &= regstate->Sourced;
- regstate->Sourced &= ~inst->DstReg.WriteMask;
-
- if (inst->DstReg.WriteMask == 0) {
- struct rc_instruction * inst_remove = s->IP;
- s->IP = s->IP->Prev;
- rc_remove_instruction(inst_remove);
- return;
- }
-
- if (inst->DstReg.File == RC_FILE_TEMPORARY && !regstate->Sourced)
- unalias_temporary(s, inst->DstReg.Index);
- }
-
- WriteMask = inst->DstReg.WriteMask;
-
- switch (inst->Opcode) {
- case RC_OPCODE_ARL:
- case RC_OPCODE_DDX:
- case RC_OPCODE_DDY:
- case RC_OPCODE_FRC:
- case RC_OPCODE_MOV:
- track_used_srcreg(s, 0, WriteMask);
- break;
- case RC_OPCODE_ADD:
- case RC_OPCODE_MAX:
- case RC_OPCODE_MIN:
- case RC_OPCODE_MUL:
- case RC_OPCODE_SGE:
- case RC_OPCODE_SLT:
- track_used_srcreg(s, 0, WriteMask);
- track_used_srcreg(s, 1, WriteMask);
- break;
- case RC_OPCODE_CMP:
- case RC_OPCODE_MAD:
- track_used_srcreg(s, 0, WriteMask);
- track_used_srcreg(s, 1, WriteMask);
- track_used_srcreg(s, 2, WriteMask);
- break;
- case RC_OPCODE_COS:
- case RC_OPCODE_EX2:
- case RC_OPCODE_LG2:
- case RC_OPCODE_RCP:
- case RC_OPCODE_RSQ:
- case RC_OPCODE_SIN:
- track_used_srcreg(s, 0, 0x1);
- break;
- case RC_OPCODE_DP3:
- track_used_srcreg(s, 0, 0x7);
- track_used_srcreg(s, 1, 0x7);
- break;
- case RC_OPCODE_DP4:
- track_used_srcreg(s, 0, 0xf);
- track_used_srcreg(s, 1, 0xf);
- break;
- case RC_OPCODE_KIL:
- case RC_OPCODE_TEX:
- case RC_OPCODE_TXB:
- case RC_OPCODE_TXP:
- track_used_srcreg(s, 0, 0xf);
- break;
- case RC_OPCODE_DST:
- track_used_srcreg(s, 0, 0x6);
- track_used_srcreg(s, 1, 0xa);
- break;
- case RC_OPCODE_EXP:
- case RC_OPCODE_LOG:
- case RC_OPCODE_POW:
- track_used_srcreg(s, 0, 0x3);
- break;
- case RC_OPCODE_LIT:
- track_used_srcreg(s, 0, 0xb);
- break;
- default:
- rc_error(s->Compiler, "NqssaDce: Unknown opcode %d\n", inst->Opcode);
- return;
- }
-
- s->IP = s->IP->Prev;
-}
-
-void rc_calculate_inputs_outputs(struct radeon_compiler * c)
-{
- struct rc_instruction *inst;
-
- c->Program.InputsRead = 0;
- c->Program.OutputsWritten = 0;
-
- for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next)
- {
- const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode);
- int i;
-
- for (i = 0; i < opcode->NumSrcRegs; ++i) {
- if (inst->I.SrcReg[i].File == RC_FILE_INPUT)
- c->Program.InputsRead |= 1 << inst->I.SrcReg[i].Index;
- }
-
- if (opcode->HasDstReg) {
- if (inst->I.DstReg.File == RC_FILE_OUTPUT)
- c->Program.OutputsWritten |= 1 << inst->I.DstReg.Index;
- }
- }
-}
-
-void radeonNqssaDce(struct radeon_compiler * c, struct radeon_nqssadce_descr* descr, void * data)
-{
- struct nqssadce_state s;
-
- memset(&s, 0, sizeof(s));
- s.Compiler = c;
- s.Descr = descr;
- s.UserData = data;
- s.Descr->Init(&s);
- s.IP = c->Program.Instructions.Prev;
-
- while(s.IP != &c->Program.Instructions && !c->Error)
- process_instruction(&s);
-
- rc_calculate_inputs_outputs(c);
-}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h
deleted file mode 100644
index a2aa1eb8ca..0000000000
--- a/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (C) 2008 Nicolai Haehnle.
- *
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#ifndef __RADEON_PROGRAM_NQSSADCE_H_
-#define __RADEON_PROGRAM_NQSSADCE_H_
-
-#include "radeon_program.h"
-
-struct register_state {
- /**
- * Bitmask indicating which components of the register are sourced
- * by later instructions.
- */
- unsigned int Sourced : 4;
-};
-
-/**
- * Maintain state such as which registers are used, which registers are
- * read from, etc.
- */
-struct nqssadce_state {
- struct radeon_compiler *Compiler;
- struct radeon_nqssadce_descr *Descr;
-
- /**
- * All instructions after this instruction pointer have been dealt with.
- */
- struct rc_instruction * IP;
-
- /**
- * Which registers are read by subsequent instructions?
- */
- struct register_state Temps[RC_REGISTER_MAX_INDEX];
- struct register_state Outputs[RC_REGISTER_MAX_INDEX];
- struct register_state Address;
-
- void * UserData;
-};
-
-
-/**
- * This structure contains a description of the hardware in-so-far as
- * it is required for the NqSSA-DCE pass.
- */
-struct radeon_nqssadce_descr {
- /**
- * Fill in which outputs
- */
- void (*Init)(struct nqssadce_state *);
-
- /**
- * Check whether the given swizzle, absolute and negate combination
- * can be implemented natively by the hardware for this opcode.
- *
- * \return 1 if the swizzle is native for the given opcode
- */
- int (*IsNativeSwizzle)(rc_opcode opcode, struct rc_src_register reg);
-
- /**
- * Emit (at the current IP) the instruction MOV dst, src;
- * The transformation will work recursively on the emitted instruction(s).
- */
- void (*BuildSwizzle)(struct nqssadce_state*, struct rc_dst_register dst, struct rc_src_register src);
-};
-
-void radeonNqssaDce(struct radeon_compiler * c, struct radeon_nqssadce_descr* descr, void * data);
-
-#endif /* __RADEON_PROGRAM_NQSSADCE_H_ */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
index ffe2de1a87..b7200990c2 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
@@ -40,13 +40,15 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
.Opcode = RC_OPCODE_ABS,
.Name = "ABS",
.NumSrcRegs = 1,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_ADD,
.Name = "ADD",
.NumSrcRegs = 2,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_ARL,
@@ -58,25 +60,29 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
.Opcode = RC_OPCODE_CMP,
.Name = "CMP",
.NumSrcRegs = 3,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_COS,
.Name = "COS",
.NumSrcRegs = 1,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsStandardScalar = 1
},
{
.Opcode = RC_OPCODE_DDX,
.Name = "DDX",
.NumSrcRegs = 1,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_DDY,
.Name = "DDY",
.NumSrcRegs = 1,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_DP3,
@@ -106,7 +112,8 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
.Opcode = RC_OPCODE_EX2,
.Name = "EX2",
.NumSrcRegs = 1,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsStandardScalar = 1
},
{
.Opcode = RC_OPCODE_EXP,
@@ -118,13 +125,15 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
.Opcode = RC_OPCODE_FLR,
.Name = "FLR",
.NumSrcRegs = 1,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_FRC,
.Name = "FRC",
.NumSrcRegs = 1,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_KIL,
@@ -135,7 +144,8 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
.Opcode = RC_OPCODE_LG2,
.Name = "LG2",
.NumSrcRegs = 1,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsStandardScalar = 1
},
{
.Opcode = RC_OPCODE_LIT,
@@ -153,55 +163,64 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
.Opcode = RC_OPCODE_LRP,
.Name = "LRP",
.NumSrcRegs = 3,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_MAD,
.Name = "MAD",
.NumSrcRegs = 3,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_MAX,
.Name = "MAX",
.NumSrcRegs = 2,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_MIN,
.Name = "MIN",
.NumSrcRegs = 2,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_MOV,
.Name = "MOV",
.NumSrcRegs = 1,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_MUL,
.Name = "MUL",
.NumSrcRegs = 2,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_POW,
.Name = "POW",
.NumSrcRegs = 2,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsStandardScalar = 1
},
{
.Opcode = RC_OPCODE_RCP,
.Name = "RCP",
.NumSrcRegs = 1,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsStandardScalar = 1
},
{
.Opcode = RC_OPCODE_RSQ,
.Name = "RSQ",
.NumSrcRegs = 1,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsStandardScalar = 1
},
{
.Opcode = RC_OPCODE_SCS,
@@ -213,61 +232,71 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
.Opcode = RC_OPCODE_SEQ,
.Name = "SEQ",
.NumSrcRegs = 2,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_SFL,
.Name = "SFL",
.NumSrcRegs = 0,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_SGE,
.Name = "SGE",
.NumSrcRegs = 2,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_SGT,
.Name = "SGT",
.NumSrcRegs = 2,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_SIN,
.Name = "SIN",
.NumSrcRegs = 1,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsStandardScalar = 1
},
{
.Opcode = RC_OPCODE_SLE,
.Name = "SLE",
.NumSrcRegs = 2,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_SLT,
.Name = "SLT",
.NumSrcRegs = 2,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_SNE,
.Name = "SNE",
.NumSrcRegs = 2,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_SUB,
.Name = "SUB",
.NumSrcRegs = 2,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_SWZ,
.Name = "SWZ",
.NumSrcRegs = 1,
- .HasDstReg = 1
+ .HasDstReg = 1,
+ .IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_XPD,
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
index 4eb9be3e55..8e30bef1e3 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
@@ -187,6 +187,14 @@ struct rc_opcode_info {
unsigned int NumSrcRegs:2;
unsigned int HasDstReg:1;
+
+ /** true if this is a vector instruction that operates on components in parallel
+ * without any cross-component interaction */
+ unsigned int IsComponentwise:1;
+
+ /** true if this instruction sources only its operands X components
+ * to compute one result which is smeared across all output channels */
+ unsigned int IsStandardScalar:1;
};
extern struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE];
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.c b/src/mesa/drivers/dri/r300/compiler/radeon_program.c
index 0e0c1f68e6..b97c48084b 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.c
@@ -154,155 +154,7 @@ struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, str
void rc_remove_instruction(struct rc_instruction * inst)
{
+ rc_dataflow_remove_instruction(inst);
inst->Prev->Next = inst->Next;
inst->Next->Prev = inst->Prev;
}
-
-static const char * textarget_to_string(rc_texture_target target)
-{
- switch(target) {
- case RC_TEXTURE_2D_ARRAY: return "2D_ARRAY";
- case RC_TEXTURE_1D_ARRAY: return "1D_ARRAY";
- case RC_TEXTURE_CUBE: return "CUBE";
- case RC_TEXTURE_3D: return "3D";
- case RC_TEXTURE_RECT: return "RECT";
- case RC_TEXTURE_2D: return "2D";
- case RC_TEXTURE_1D: return "1D";
- default: return "BAD_TEXTURE_TARGET";
- }
-}
-
-static void rc_print_register(FILE * f, rc_register_file file, int index, unsigned int reladdr)
-{
- if (file == RC_FILE_NONE) {
- fprintf(f, "none");
- } else {
- const char * filename;
- switch(file) {
- case RC_FILE_TEMPORARY: filename = "temp"; break;
- case RC_FILE_INPUT: filename = "input"; break;
- case RC_FILE_OUTPUT: filename = "output"; break;
- case RC_FILE_ADDRESS: filename = "addr"; break;
- case RC_FILE_CONSTANT: filename = "const"; break;
- default: filename = "BAD FILE"; break;
- }
- fprintf(f, "%s[%i%s]", filename, index, reladdr ? " + addr[0]" : "");
- }
-}
-
-static void rc_print_mask(FILE * f, unsigned int mask)
-{
- if (mask & RC_MASK_X) fprintf(f, "x");
- if (mask & RC_MASK_Y) fprintf(f, "y");
- if (mask & RC_MASK_Z) fprintf(f, "z");
- if (mask & RC_MASK_W) fprintf(f, "w");
-}
-
-static void rc_print_dst_register(FILE * f, struct rc_dst_register dst)
-{
- rc_print_register(f, dst.File, dst.Index, dst.RelAddr);
- if (dst.WriteMask != RC_MASK_XYZW) {
- fprintf(f, ".");
- rc_print_mask(f, dst.WriteMask);
- }
-}
-
-static void rc_print_swizzle(FILE * f, unsigned int swizzle, unsigned int negate)
-{
- unsigned int comp;
- for(comp = 0; comp < 4; ++comp) {
- rc_swizzle swz = GET_SWZ(swizzle, comp);
- if (GET_BIT(negate, comp))
- fprintf(f, "-");
- switch(swz) {
- case RC_SWIZZLE_X: fprintf(f, "x"); break;
- case RC_SWIZZLE_Y: fprintf(f, "y"); break;
- case RC_SWIZZLE_Z: fprintf(f, "z"); break;
- case RC_SWIZZLE_W: fprintf(f, "w"); break;
- case RC_SWIZZLE_ZERO: fprintf(f, "0"); break;
- case RC_SWIZZLE_ONE: fprintf(f, "1"); break;
- case RC_SWIZZLE_HALF: fprintf(f, "H"); break;
- case RC_SWIZZLE_UNUSED: fprintf(f, "_"); break;
- }
- }
-}
-
-static void rc_print_src_register(FILE * f, struct rc_src_register src)
-{
- int trivial_negate = (src.Negate == RC_MASK_NONE || src.Negate == RC_MASK_XYZW);
-
- if (src.Negate == RC_MASK_XYZW)
- fprintf(f, "-");
- if (src.Abs)
- fprintf(f, "|");
-
- rc_print_register(f, src.File, src.Index, src.RelAddr);
-
- if (src.Abs && !trivial_negate)
- fprintf(f, "|");
-
- if (src.Swizzle != RC_SWIZZLE_XYZW || !trivial_negate) {
- fprintf(f, ".");
- rc_print_swizzle(f, src.Swizzle, trivial_negate ? 0 : src.Negate);
- }
-
- if (src.Abs && trivial_negate)
- fprintf(f, "|");
-}
-
-static void rc_print_instruction(FILE * f, struct rc_instruction * inst)
-{
- const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode);
- unsigned int reg;
-
- fprintf(f, "%s", opcode->Name);
-
- switch(inst->I.SaturateMode) {
- case RC_SATURATE_NONE: break;
- case RC_SATURATE_ZERO_ONE: fprintf(f, "_SAT"); break;
- case RC_SATURATE_MINUS_PLUS_ONE: fprintf(f, "_SAT2"); break;
- default: fprintf(f, "_BAD_SAT"); break;
- }
-
- if (opcode->HasDstReg) {
- fprintf(f, " ");
- rc_print_dst_register(f, inst->I.DstReg);
- if (opcode->NumSrcRegs)
- fprintf(f, ",");
- }
-
- for(reg = 0; reg < opcode->NumSrcRegs; ++reg) {
- if (reg > 0)
- fprintf(f, ",");
- fprintf(f, " ");
- rc_print_src_register(f, inst->I.SrcReg[reg]);
- }
-
- if (opcode->HasTexture) {
- fprintf(f, ", %s%s[%u]",
- textarget_to_string(inst->I.TexSrcTarget),
- inst->I.TexShadow ? "SHADOW" : "",
- inst->I.TexSrcUnit);
- }
-
- fprintf(f, ";\n");
-}
-
-/**
- * Print program to stderr, default options.
- */
-void rc_print_program(const struct rc_program *prog)
-{
- unsigned int linenum = 0;
- struct rc_instruction *inst;
-
- fprintf(stderr, "# Radeon Compiler Program\n");
-
- for(inst = prog->Instructions.Next; inst != &prog->Instructions; inst = inst->Next) {
- fprintf(stderr, "%3d: ", linenum);
-
- rc_print_instruction(stderr, inst);
-
- linenum++;
- }
-}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
index a2ab757fec..d38c9a420c 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
@@ -33,102 +33,11 @@
#include "radeon_opcodes.h"
#include "radeon_code.h"
+#include "radeon_program_constants.h"
+#include "radeon_dataflow.h"
struct radeon_compiler;
-typedef enum {
- RC_SATURATE_NONE = 0,
- RC_SATURATE_ZERO_ONE,
- RC_SATURATE_MINUS_PLUS_ONE
-} rc_saturate_mode;
-
-typedef enum {
- RC_TEXTURE_2D_ARRAY,
- RC_TEXTURE_1D_ARRAY,
- RC_TEXTURE_CUBE,
- RC_TEXTURE_3D,
- RC_TEXTURE_RECT,
- RC_TEXTURE_2D,
- RC_TEXTURE_1D
-} rc_texture_target;
-
-typedef enum {
- /**
- * Used to indicate unused register descriptions and
- * source register that use a constant swizzle.
- */
- RC_FILE_NONE = 0,
- RC_FILE_TEMPORARY,
-
- /**
- * Input register.
- *
- * \note The compiler attaches no implicit semantics to input registers.
- * Fragment/vertex program specific semantics must be defined explicitly
- * using the appropriate compiler interfaces.
- */
- RC_FILE_INPUT,
-
- /**
- * Output register.
- *
- * \note The compiler attaches no implicit semantics to input registers.
- * Fragment/vertex program specific semantics must be defined explicitly
- * using the appropriate compiler interfaces.
- */
- RC_FILE_OUTPUT,
- RC_FILE_ADDRESS,
-
- /**
- * Indicates a constant from the \ref rc_constant_list .
- */
- RC_FILE_CONSTANT
-} rc_register_file;
-
-#define RC_REGISTER_INDEX_BITS 10
-#define RC_REGISTER_MAX_INDEX (1 << RC_REGISTER_INDEX_BITS)
-
-typedef enum {
- RC_SWIZZLE_X = 0,
- RC_SWIZZLE_Y,
- RC_SWIZZLE_Z,
- RC_SWIZZLE_W,
- RC_SWIZZLE_ZERO,
- RC_SWIZZLE_ONE,
- RC_SWIZZLE_HALF,
- RC_SWIZZLE_UNUSED
-} rc_swizzle;
-
-#define RC_MAKE_SWIZZLE(a,b,c,d) (((a)<<0) | ((b)<<3) | ((c)<<6) | ((d)<<9))
-#define RC_MAKE_SWIZZLE_SMEAR(a) RC_MAKE_SWIZZLE((a),(a),(a),(a))
-#define GET_SWZ(swz, idx) (((swz) >> ((idx)*3)) & 0x7)
-#define GET_BIT(msk, idx) (((msk) >> (idx)) & 0x1)
-
-#define RC_SWIZZLE_XYZW RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W)
-#define RC_SWIZZLE_XXXX RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_X)
-#define RC_SWIZZLE_YYYY RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Y)
-#define RC_SWIZZLE_ZZZZ RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Z)
-#define RC_SWIZZLE_WWWW RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_W)
-#define RC_SWIZZLE_0000 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ZERO)
-#define RC_SWIZZLE_1111 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ONE)
-
-/**
- * \name Bitmasks for components of vectors.
- *
- * Used for write masks, negation masks, etc.
- */
-/*@{*/
-#define RC_MASK_NONE 0
-#define RC_MASK_X 1
-#define RC_MASK_Y 2
-#define RC_MASK_Z 4
-#define RC_MASK_W 8
-#define RC_MASK_XY (RC_MASK_X|RC_MASK_Y)
-#define RC_MASK_XYZ (RC_MASK_X|RC_MASK_Y|RC_MASK_Z)
-#define RC_MASK_XYW (RC_MASK_X|RC_MASK_Y|RC_MASK_W)
-#define RC_MASK_XYZW (RC_MASK_X|RC_MASK_Y|RC_MASK_Z|RC_MASK_W)
-/*@}*/
-
struct rc_src_register {
rc_register_file File:3;
@@ -198,6 +107,15 @@ struct rc_instruction {
struct rc_instruction * Next;
struct rc_sub_instruction I;
+
+ /**
+ * Dataflow annotations.
+ *
+ * These are not supplied by the caller of the compiler,
+ * but filled in during compilation stages that make use of
+ * dataflow analysis.
+ */
+ struct rc_instruction_dataflow Dataflow;
};
struct rc_program {
@@ -292,6 +210,10 @@ struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c);
struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after);
void rc_remove_instruction(struct rc_instruction * inst);
-void rc_print_program(const struct rc_program *prog);
+enum {
+ RC_PRINT_DATAFLOW = 0x1
+};
+
+void rc_print_program(const struct rc_program *prog, unsigned int flags);
#endif
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h
new file mode 100644
index 0000000000..69994f9880
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_PROGRAM_CONSTANTS_H
+#define RADEON_PROGRAM_CONSTANTS_H
+
+typedef enum {
+ RC_SATURATE_NONE = 0,
+ RC_SATURATE_ZERO_ONE,
+ RC_SATURATE_MINUS_PLUS_ONE
+} rc_saturate_mode;
+
+typedef enum {
+ RC_TEXTURE_2D_ARRAY,
+ RC_TEXTURE_1D_ARRAY,
+ RC_TEXTURE_CUBE,
+ RC_TEXTURE_3D,
+ RC_TEXTURE_RECT,
+ RC_TEXTURE_2D,
+ RC_TEXTURE_1D
+} rc_texture_target;
+
+typedef enum {
+ /**
+ * Used to indicate unused register descriptions and
+ * source register that use a constant swizzle.
+ */
+ RC_FILE_NONE = 0,
+ RC_FILE_TEMPORARY,
+
+ /**
+ * Input register.
+ *
+ * \note The compiler attaches no implicit semantics to input registers.
+ * Fragment/vertex program specific semantics must be defined explicitly
+ * using the appropriate compiler interfaces.
+ */
+ RC_FILE_INPUT,
+
+ /**
+ * Output register.
+ *
+ * \note The compiler attaches no implicit semantics to input registers.
+ * Fragment/vertex program specific semantics must be defined explicitly
+ * using the appropriate compiler interfaces.
+ */
+ RC_FILE_OUTPUT,
+ RC_FILE_ADDRESS,
+
+ /**
+ * Indicates a constant from the \ref rc_constant_list .
+ */
+ RC_FILE_CONSTANT
+} rc_register_file;
+
+#define RC_REGISTER_INDEX_BITS 10
+#define RC_REGISTER_MAX_INDEX (1 << RC_REGISTER_INDEX_BITS)
+
+typedef enum {
+ RC_SWIZZLE_X = 0,
+ RC_SWIZZLE_Y,
+ RC_SWIZZLE_Z,
+ RC_SWIZZLE_W,
+ RC_SWIZZLE_ZERO,
+ RC_SWIZZLE_ONE,
+ RC_SWIZZLE_HALF,
+ RC_SWIZZLE_UNUSED
+} rc_swizzle;
+
+#define RC_MAKE_SWIZZLE(a,b,c,d) (((a)<<0) | ((b)<<3) | ((c)<<6) | ((d)<<9))
+#define RC_MAKE_SWIZZLE_SMEAR(a) RC_MAKE_SWIZZLE((a),(a),(a),(a))
+#define GET_SWZ(swz, idx) (((swz) >> ((idx)*3)) & 0x7)
+#define GET_BIT(msk, idx) (((msk) >> (idx)) & 0x1)
+#define SET_SWZ(swz, idx, newv) \
+ do { \
+ (swz) = ((swz) & ~(7 << ((idx)*3))) | ((newv) << ((idx)*3)); \
+ } while(0)
+
+#define RC_SWIZZLE_XYZW RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W)
+#define RC_SWIZZLE_XXXX RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_X)
+#define RC_SWIZZLE_YYYY RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Y)
+#define RC_SWIZZLE_ZZZZ RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Z)
+#define RC_SWIZZLE_WWWW RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_W)
+#define RC_SWIZZLE_0000 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ZERO)
+#define RC_SWIZZLE_1111 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ONE)
+
+/**
+ * \name Bitmasks for components of vectors.
+ *
+ * Used for write masks, negation masks, etc.
+ */
+/*@{*/
+#define RC_MASK_NONE 0
+#define RC_MASK_X 1
+#define RC_MASK_Y 2
+#define RC_MASK_Z 4
+#define RC_MASK_W 8
+#define RC_MASK_XY (RC_MASK_X|RC_MASK_Y)
+#define RC_MASK_XYZ (RC_MASK_X|RC_MASK_Y|RC_MASK_Z)
+#define RC_MASK_XYW (RC_MASK_X|RC_MASK_Y|RC_MASK_W)
+#define RC_MASK_XYZW (RC_MASK_X|RC_MASK_Y|RC_MASK_Z|RC_MASK_W)
+/*@}*/
+
+#endif /* RADEON_PROGRAM_CONSTANTS_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c
new file mode 100644
index 0000000000..38060ea3ad
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c
@@ -0,0 +1,214 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_program.h"
+
+#include <stdio.h>
+
+static void print_comment(FILE * f)
+{
+ fprintf(f, " # ");
+}
+
+static const char * textarget_to_string(rc_texture_target target)
+{
+ switch(target) {
+ case RC_TEXTURE_2D_ARRAY: return "2D_ARRAY";
+ case RC_TEXTURE_1D_ARRAY: return "1D_ARRAY";
+ case RC_TEXTURE_CUBE: return "CUBE";
+ case RC_TEXTURE_3D: return "3D";
+ case RC_TEXTURE_RECT: return "RECT";
+ case RC_TEXTURE_2D: return "2D";
+ case RC_TEXTURE_1D: return "1D";
+ default: return "BAD_TEXTURE_TARGET";
+ }
+}
+
+static void rc_print_register(FILE * f, rc_register_file file, int index, unsigned int reladdr)
+{
+ if (file == RC_FILE_NONE) {
+ fprintf(f, "none");
+ } else {
+ const char * filename;
+ switch(file) {
+ case RC_FILE_TEMPORARY: filename = "temp"; break;
+ case RC_FILE_INPUT: filename = "input"; break;
+ case RC_FILE_OUTPUT: filename = "output"; break;
+ case RC_FILE_ADDRESS: filename = "addr"; break;
+ case RC_FILE_CONSTANT: filename = "const"; break;
+ default: filename = "BAD FILE"; break;
+ }
+ fprintf(f, "%s[%i%s]", filename, index, reladdr ? " + addr[0]" : "");
+ }
+}
+
+static void rc_print_mask(FILE * f, unsigned int mask)
+{
+ if (mask & RC_MASK_X) fprintf(f, "x");
+ if (mask & RC_MASK_Y) fprintf(f, "y");
+ if (mask & RC_MASK_Z) fprintf(f, "z");
+ if (mask & RC_MASK_W) fprintf(f, "w");
+}
+
+static void rc_print_dst_register(FILE * f, struct rc_dst_register dst)
+{
+ rc_print_register(f, dst.File, dst.Index, dst.RelAddr);
+ if (dst.WriteMask != RC_MASK_XYZW) {
+ fprintf(f, ".");
+ rc_print_mask(f, dst.WriteMask);
+ }
+}
+
+static void rc_print_swizzle(FILE * f, unsigned int swizzle, unsigned int negate)
+{
+ unsigned int comp;
+ for(comp = 0; comp < 4; ++comp) {
+ rc_swizzle swz = GET_SWZ(swizzle, comp);
+ if (GET_BIT(negate, comp))
+ fprintf(f, "-");
+ switch(swz) {
+ case RC_SWIZZLE_X: fprintf(f, "x"); break;
+ case RC_SWIZZLE_Y: fprintf(f, "y"); break;
+ case RC_SWIZZLE_Z: fprintf(f, "z"); break;
+ case RC_SWIZZLE_W: fprintf(f, "w"); break;
+ case RC_SWIZZLE_ZERO: fprintf(f, "0"); break;
+ case RC_SWIZZLE_ONE: fprintf(f, "1"); break;
+ case RC_SWIZZLE_HALF: fprintf(f, "H"); break;
+ case RC_SWIZZLE_UNUSED: fprintf(f, "_"); break;
+ }
+ }
+}
+
+static void rc_print_src_register(FILE * f, struct rc_src_register src)
+{
+ int trivial_negate = (src.Negate == RC_MASK_NONE || src.Negate == RC_MASK_XYZW);
+
+ if (src.Negate == RC_MASK_XYZW)
+ fprintf(f, "-");
+ if (src.Abs)
+ fprintf(f, "|");
+
+ rc_print_register(f, src.File, src.Index, src.RelAddr);
+
+ if (src.Abs && !trivial_negate)
+ fprintf(f, "|");
+
+ if (src.Swizzle != RC_SWIZZLE_XYZW || !trivial_negate) {
+ fprintf(f, ".");
+ rc_print_swizzle(f, src.Swizzle, trivial_negate ? 0 : src.Negate);
+ }
+
+ if (src.Abs && trivial_negate)
+ fprintf(f, "|");
+}
+
+static void rc_print_ref(FILE * f, struct rc_dataflow_ref * ref)
+{
+ fprintf(f, "ref(%p", ref->Vector);
+
+ if (ref->UseMask != RC_MASK_XYZW) {
+ fprintf(f, ".");
+ rc_print_mask(f, ref->UseMask);
+ }
+
+ fprintf(f, ")");
+}
+
+static void rc_print_instruction(FILE * f, unsigned int flags, struct rc_instruction * inst)
+{
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode);
+ unsigned int reg;
+
+ fprintf(f, "%s", opcode->Name);
+
+ switch(inst->I.SaturateMode) {
+ case RC_SATURATE_NONE: break;
+ case RC_SATURATE_ZERO_ONE: fprintf(f, "_SAT"); break;
+ case RC_SATURATE_MINUS_PLUS_ONE: fprintf(f, "_SAT2"); break;
+ default: fprintf(f, "_BAD_SAT"); break;
+ }
+
+ if (opcode->HasDstReg) {
+ fprintf(f, " ");
+ rc_print_dst_register(f, inst->I.DstReg);
+ if (opcode->NumSrcRegs)
+ fprintf(f, ",");
+ }
+
+ for(reg = 0; reg < opcode->NumSrcRegs; ++reg) {
+ if (reg > 0)
+ fprintf(f, ",");
+ fprintf(f, " ");
+ rc_print_src_register(f, inst->I.SrcReg[reg]);
+ }
+
+ if (opcode->HasTexture) {
+ fprintf(f, ", %s%s[%u]",
+ textarget_to_string(inst->I.TexSrcTarget),
+ inst->I.TexShadow ? "SHADOW" : "",
+ inst->I.TexSrcUnit);
+ }
+
+ fprintf(f, ";\n");
+
+ if (flags & RC_PRINT_DATAFLOW) {
+ print_comment(f);
+
+ fprintf(f, "Dst = %p", inst->Dataflow.DstReg);
+ if (inst->Dataflow.DstRegAliased)
+ fprintf(f, " aliased");
+ if (inst->Dataflow.DstRegPrev) {
+ fprintf(f, " from ");
+ rc_print_ref(f, inst->Dataflow.DstRegPrev);
+ }
+
+ for(reg = 0; reg < opcode->NumSrcRegs; ++reg) {
+ fprintf(f, ", ");
+ if (inst->Dataflow.SrcReg[reg])
+ rc_print_ref(f, inst->Dataflow.SrcReg[reg]);
+ else
+ fprintf(f, "<no ref>");
+ }
+
+ fprintf(f, "\n");
+ }
+}
+
+/**
+ * Print program to stderr, default options.
+ */
+void rc_print_program(const struct rc_program *prog, unsigned int flags)
+{
+ unsigned int linenum = 0;
+ struct rc_instruction *inst;
+
+ fprintf(stderr, "# Radeon Compiler Program%s\n",
+ flags & RC_PRINT_DATAFLOW ? " (with dataflow annotations)" : "");
+
+ for(inst = prog->Instructions.Next; inst != &prog->Instructions; inst = inst->Next) {
+ fprintf(stderr, "%3d: ", linenum);
+
+ rc_print_instruction(stderr, flags, inst);
+
+ linenum++;
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_swizzle.h b/src/mesa/drivers/dri/r300/compiler/radeon_swizzle.h
new file mode 100644
index 0000000000..c81d5f7a5e
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_swizzle.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_SWIZZLE_H
+#define RADEON_SWIZZLE_H
+
+#include "radeon_program.h"
+
+struct rc_swizzle_split {
+ unsigned char NumPhases;
+ unsigned char Phase[4];
+};
+
+/**
+ * Describe the swizzling capability of target hardware.
+ */
+struct rc_swizzle_caps {
+ /**
+ * Check whether the given swizzle, absolute and negate combination
+ * can be implemented natively by the hardware for this opcode.
+ *
+ * \return 1 if the swizzle is native for the given opcode
+ */
+ int (*IsNative)(rc_opcode opcode, struct rc_src_register reg);
+
+ /**
+ * Determine how to split access to the masked channels of the
+ * given source register to obtain ALU-native swizzles.
+ */
+ void (*Split)(struct rc_src_register reg, unsigned int mask, struct rc_swizzle_split * split);
+};
+
+#endif /* RADEON_SWIZZLE_H */
diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c
index be21268ba5..b7d5429dc5 100644
--- a/src/mesa/drivers/dri/r300/r300_vertprog.c
+++ b/src/mesa/drivers/dri/r300/r300_vertprog.c
@@ -41,7 +41,6 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "tnl/tnl.h"
#include "compiler/radeon_compiler.h"
-#include "compiler/radeon_nqssadce.h"
#include "radeon_mesa_to_rc.h"
#include "r300_context.h"
#include "r300_state.h"