summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/r300
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/r300')
-rw-r--r--src/mesa/drivers/dri/r300/compiler/Makefile1
-rwxr-xr-xsrc/mesa/drivers/dri/r300/compiler/SConscript1
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c18
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c231
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_code.h2
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler.h5
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c31
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c17
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h6
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c30
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c117
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c8
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c128
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.h36
-rw-r--r--src/mesa/drivers/dri/r300/r300_fragprog_common.c1
15 files changed, 578 insertions, 54 deletions
diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile
index 3167d49bca..d0eb170784 100644
--- a/src/mesa/drivers/dri/r300/compiler/Makefile
+++ b/src/mesa/drivers/dri/r300/compiler/Makefile
@@ -23,6 +23,7 @@ C_SOURCES = \
radeon_dataflow_deadcode.c \
radeon_dataflow_swizzles.c \
radeon_optimize.c \
+ radeon_remove_constants.c \
radeon_rename_regs.c \
r3xx_fragprog.c \
r300_fragprog.c \
diff --git a/src/mesa/drivers/dri/r300/compiler/SConscript b/src/mesa/drivers/dri/r300/compiler/SConscript
index c6f47a6f8a..847857b142 100755
--- a/src/mesa/drivers/dri/r300/compiler/SConscript
+++ b/src/mesa/drivers/dri/r300/compiler/SConscript
@@ -22,6 +22,7 @@ r300compiler = env.ConvenienceLibrary(
'radeon_pair_schedule.c',
'radeon_pair_regalloc.c',
'radeon_optimize.c',
+ 'radeon_remove_constants.c',
'radeon_rename_regs.c',
'radeon_emulate_branches.c',
'radeon_emulate_loops.c',
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
index d2fa816894..8613ec5109 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
@@ -30,6 +30,7 @@
#include "radeon_program_alu.h"
#include "radeon_program_tex.h"
#include "radeon_rename_regs.h"
+#include "radeon_remove_constants.h"
#include "r300_fragprog.h"
#include "r300_fragprog_swizzle.h"
#include "r500_fragprog.h"
@@ -180,6 +181,13 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
debug_program_log(c, "after dataflow passes");
+ if (c->Base.remove_unused_constants) {
+ rc_remove_unused_constants(&c->Base,
+ &c->code->constants_remap_table);
+
+ debug_program_log(c, "after constants cleanup");
+ }
+
if(!c->Base.is_r500) {
/* This pass makes it easier for the scheduler to group TEX
* instructions and reduces the chances of creating too
@@ -224,4 +232,14 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
r300FragmentProgramDump(c->code);
}
}
+
+ /* Check the number of constants. */
+ if (!c->Base.Error) {
+ unsigned max = c->Base.is_r500 ? R500_PFS_NUM_CONST_REGS : R300_PFS_NUM_CONST_REGS;
+
+ if (c->Base.Program.Constants.Count > max) {
+ rc_error(&c->Base, "Too many constants. Max: %i, Got: %i\n",
+ max, c->Base.Program.Constants.Count);
+ }
+ }
}
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
index 666c9c2a7a..b05b3aabf3 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
@@ -31,6 +31,7 @@
#include "radeon_swizzle.h"
#include "radeon_emulate_branches.h"
#include "radeon_emulate_loops.h"
+#include "radeon_remove_constants.h"
struct loop {
int BgnLoop;
@@ -465,7 +466,7 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi
{
struct rc_instruction *rci;
- struct loop * loops;
+ struct loop * loops = NULL;
int current_loop_depth = 0;
int loops_reserved = 0;
@@ -484,6 +485,21 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi
if (!valid_dst(compiler->code, &vpi->DstReg))
continue;
+ if (rc_get_opcode_info(vpi->Opcode)->HasDstReg) {
+ /* Relative addressing of destination operands is not supported yet. */
+ if (vpi->DstReg.RelAddr) {
+ rc_error(&compiler->Base, "Vertex program does not support relative "
+ "addressing of destination operands (yet).\n");
+ return;
+ }
+
+ /* Neither is Saturate. */
+ if (vpi->SaturateMode != RC_SATURATE_NONE) {
+ rc_error(&compiler->Base, "Vertex program does not support the Saturate "
+ "modifier (yet).\n");
+ }
+ }
+
if (compiler->code->length >= R500_VS_MAX_ALU_DWORDS ||
(compiler->code->length >= R300_VS_MAX_ALU_DWORDS && !compiler->Base.is_r500)) {
rc_error(&compiler->Base, "Vertex program has too many instructions\n");
@@ -543,10 +559,16 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi
}
case RC_OPCODE_ENDLOOP:
{
- struct loop * l = &loops[current_loop_depth - 1];
- unsigned int act_addr = l->BgnLoop - 1;
- unsigned int last_addr = (compiler->code->length / 4) - 1;
- unsigned int ret_addr = l->BgnLoop;
+ struct loop * l;
+ unsigned int act_addr;
+ unsigned int last_addr;
+ unsigned int ret_addr;
+
+ assert(loops);
+ l = &loops[current_loop_depth - 1];
+ act_addr = l->BgnLoop - 1;
+ last_addr = (compiler->code->length / 4) - 1;
+ ret_addr = l->BgnLoop;
if (loops_reserved >= R300_VS_MAX_FC_OPS) {
rc_error(&compiler->Base,
@@ -624,10 +646,9 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c
struct temporary_allocation * ta;
unsigned int i, j;
- compiler->code->num_temporaries = 0;
memset(hwtemps, 0, sizeof(hwtemps));
- /* Pass 1: Count original temporaries and allocate structures */
+ /* Pass 1: Count original temporaries. */
for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
@@ -645,12 +666,30 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c
}
}
}
+ compiler->code->num_temporaries = num_orig_temps;
+
+ /* Pass 2: If there is relative addressing of temporaries, we cannot change register indices. Give up. */
+ for (inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
+ const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ if (opcode->HasDstReg)
+ if (inst->U.I.DstReg.RelAddr)
+ return;
+ for (i = 0; i < opcode->NumSrcRegs; ++i) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY &&
+ inst->U.I.SrcReg[i].RelAddr) {
+ return;
+ }
+ }
+ }
+
+ compiler->code->num_temporaries = 0;
ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool,
sizeof(struct temporary_allocation) * num_orig_temps);
memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps);
- /* Pass 2: Determine original temporary lifetimes */
+ /* Pass 3: Determine original temporary lifetimes */
for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
/* Instructions inside of loops need to use the ENDLOOP
@@ -685,7 +724,7 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c
}
}
- /* Pass 3: Register allocation */
+ /* Pass 4: Register allocation */
for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
@@ -853,6 +892,76 @@ static int swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
return 1;
}
+static void transform_negative_addressing(struct r300_vertex_program_compiler *c,
+ struct rc_instruction *arl,
+ struct rc_instruction *end,
+ int min_offset)
+{
+ struct rc_instruction *inst, *add;
+ unsigned const_swizzle;
+
+ /* Transform ARL */
+ add = rc_insert_new_instruction(&c->Base, arl->Prev);
+ add->U.I.Opcode = RC_OPCODE_ADD;
+ add->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ add->U.I.DstReg.Index = rc_find_free_temporary(&c->Base);
+ add->U.I.DstReg.WriteMask = RC_MASK_X;
+ add->U.I.SrcReg[0] = arl->U.I.SrcReg[0];
+ add->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
+ add->U.I.SrcReg[1].Index = rc_constants_add_immediate_scalar(&c->Base.Program.Constants,
+ min_offset, &const_swizzle);
+ add->U.I.SrcReg[1].Swizzle = const_swizzle;
+
+ arl->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ arl->U.I.SrcReg[0].Index = add->U.I.DstReg.Index;
+ arl->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XXXX;
+
+ /* Rewrite offsets up to and excluding inst. */
+ for (inst = arl->Next; inst != end; inst = inst->Next) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ for (unsigned i = 0; i < opcode->NumSrcRegs; i++)
+ if (inst->U.I.SrcReg[i].RelAddr)
+ inst->U.I.SrcReg[i].Index -= min_offset;
+ }
+}
+
+static void rc_emulate_negative_addressing(struct r300_vertex_program_compiler *c)
+{
+ struct rc_instruction *inst, *lastARL = NULL;
+ int min_offset = 0;
+
+ for (inst = c->Base.Program.Instructions.Next; inst != &c->Base.Program.Instructions; inst = inst->Next) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ if (inst->U.I.Opcode == RC_OPCODE_ARL) {
+ if (lastARL != NULL && min_offset < 0)
+ transform_negative_addressing(c, lastARL, inst, min_offset);
+
+ lastARL = inst;
+ min_offset = 0;
+ continue;
+ }
+
+ for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
+ if (inst->U.I.SrcReg[i].RelAddr &&
+ inst->U.I.SrcReg[i].Index < 0) {
+ /* ARL must precede any indirect addressing. */
+ if (lastARL == NULL) {
+ rc_error(&c->Base, "Vertex shader: Found relative addressing without ARL.");
+ return;
+ }
+
+ if (inst->U.I.SrcReg[i].Index < min_offset)
+ min_offset = inst->U.I.SrcReg[i].Index;
+ }
+ }
+ }
+
+ if (lastARL != NULL && min_offset < 0)
+ transform_negative_addressing(c, lastARL, inst, min_offset);
+}
+
static void debug_program_log(struct r300_vertex_program_compiler* c, const char * where)
{
if (c->Base.Debug) {
@@ -868,44 +977,56 @@ static struct rc_swizzle_caps r300_vertprog_swizzle_caps = {
};
-void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
+void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c)
{
struct emulate_loop_state loop_state;
- compiler->Base.SwizzleCaps = &r300_vertprog_swizzle_caps;
+ c->Base.SwizzleCaps = &r300_vertprog_swizzle_caps;
- addArtificialOutputs(compiler);
+ addArtificialOutputs(c);
- debug_program_log(compiler, "before compilation");
+ debug_program_log(c, "before compilation");
- if (compiler->Base.is_r500)
- rc_transform_loops(&compiler->Base, &loop_state, R500_VS_MAX_ALU);
+ if (c->Base.is_r500)
+ rc_transform_loops(&c->Base, &loop_state, R500_VS_MAX_ALU);
else
- rc_transform_loops(&compiler->Base, &loop_state, R300_VS_MAX_ALU);
+ rc_transform_loops(&c->Base, &loop_state, R300_VS_MAX_ALU);
+ if (c->Base.Error)
+ return;
- debug_program_log(compiler, "after emulate loops");
+ debug_program_log(c, "after emulate loops");
- if (!compiler->Base.is_r500) {
- rc_emulate_branches(&compiler->Base);
- debug_program_log(compiler, "after emulate branches");
+ if (!c->Base.is_r500) {
+ rc_emulate_branches(&c->Base);
+ if (c->Base.Error)
+ return;
+ debug_program_log(c, "after emulate branches");
}
- if (compiler->Base.is_r500) {
+ rc_emulate_negative_addressing(c);
+
+ debug_program_log(c, "after negative addressing emulation");
+
+ if (c->Base.is_r500) {
struct radeon_program_transformation transformations[] = {
{ &r300_transform_vertex_alu, 0 },
{ &r300_transform_trig_scale_vertex, 0 }
};
- radeonLocalTransform(&compiler->Base, 2, transformations);
+ radeonLocalTransform(&c->Base, 2, transformations);
+ if (c->Base.Error)
+ return;
- debug_program_log(compiler, "after native rewrite");
+ debug_program_log(c, "after native rewrite");
} else {
struct radeon_program_transformation transformations[] = {
{ &r300_transform_vertex_alu, 0 },
{ &radeonTransformTrigSimple, 0 }
};
- radeonLocalTransform(&compiler->Base, 2, transformations);
+ radeonLocalTransform(&c->Base, 2, transformations);
+ if (c->Base.Error)
+ return;
- debug_program_log(compiler, "after native rewrite");
+ debug_program_log(c, "after native rewrite");
/* Note: This pass has to be done seperately from ALU rewrite,
* because it needs to check every instruction.
@@ -913,9 +1034,11 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
struct radeon_program_transformation transformations2[] = {
{ &transform_nonnative_modifiers, 0 },
};
- radeonLocalTransform(&compiler->Base, 1, transformations2);
+ radeonLocalTransform(&c->Base, 1, transformations2);
+ if (c->Base.Error)
+ return;
- debug_program_log(compiler, "after emulate modifiers");
+ debug_program_log(c, "after emulate modifiers");
}
{
@@ -926,30 +1049,58 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
struct radeon_program_transformation transformations[] = {
{ &transform_source_conflicts, 0 },
};
- radeonLocalTransform(&compiler->Base, 1, transformations);
+ radeonLocalTransform(&c->Base, 1, transformations);
+ if (c->Base.Error)
+ return;
}
- debug_program_log(compiler, "after source conflict resolve");
+ debug_program_log(c, "after source conflict resolve");
- rc_dataflow_deadcode(&compiler->Base, &dataflow_outputs_mark_used, compiler);
+ rc_dataflow_deadcode(&c->Base, &dataflow_outputs_mark_used, c);
+ if (c->Base.Error)
+ return;
- debug_program_log(compiler, "after deadcode");
+ debug_program_log(c, "after deadcode");
- rc_dataflow_swizzles(&compiler->Base);
+ rc_dataflow_swizzles(&c->Base);
+ if (c->Base.Error)
+ return;
- allocate_temporary_registers(compiler);
+ debug_program_log(c, "after dataflow");
- debug_program_log(compiler, "after dataflow");
+ allocate_temporary_registers(c);
+ if (c->Base.Error)
+ return;
- translate_vertex_program(compiler);
+ debug_program_log(c, "after register allocation");
- rc_constants_copy(&compiler->code->constants, &compiler->Base.Program.Constants);
+ if (c->Base.remove_unused_constants) {
+ rc_remove_unused_constants(&c->Base,
+ &c->code->constants_remap_table);
+ if (c->Base.Error)
+ return;
- compiler->code->InputsRead = compiler->Base.Program.InputsRead;
- compiler->code->OutputsWritten = compiler->Base.Program.OutputsWritten;
+ debug_program_log(c, "after constants cleanup");
+ }
- if (compiler->Base.Debug) {
+ translate_vertex_program(c);
+ if (c->Base.Error)
+ return;
+
+ rc_constants_copy(&c->code->constants, &c->Base.Program.Constants);
+
+ c->code->InputsRead = c->Base.Program.InputsRead;
+ c->code->OutputsWritten = c->Base.Program.OutputsWritten;
+
+ if (c->Base.Debug) {
fprintf(stderr, "Final vertex program code:\n");
- r300_vertex_program_dump(compiler);
+ r300_vertex_program_dump(c);
+ }
+
+ /* Check the number of constants. */
+ if (!c->Base.Error &&
+ c->Base.Program.Constants.Count > 256) {
+ rc_error(&c->Base, "Too many constants. Max: 256, Got: %i\n",
+ c->Base.Program.Constants.Count);
}
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
index 896246d203..f76676fae8 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
@@ -235,6 +235,7 @@ struct rX00_fragment_program_code {
unsigned writes_depth:1;
struct rc_constant_list constants;
+ unsigned *constants_remap_table;
};
@@ -266,6 +267,7 @@ struct r300_vertex_program_code {
int outputs[VSF_MAX_OUTPUTS];
struct rc_constant_list constants;
+ unsigned *constants_remap_table;
uint32_t InputsRead;
uint32_t OutputsWritten;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
index 7c42eb3ae5..5155b912e1 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
@@ -39,9 +39,12 @@ struct radeon_compiler {
char * ErrorMsg;
/* Hardware specification. */
- unsigned is_r500;
+ unsigned is_r500:1;
unsigned max_temp_regs;
+ /* Whether to remove unused constants and empty holes in constant space. */
+ unsigned remove_unused_constants:1;
+
/**
* Variables used internally, not be touched by callers
* of the compiler
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
index faf531b412..acdb371de9 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
@@ -157,8 +157,12 @@ static void update_instruction(struct deadcode_state * s, struct rc_instruction
unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index);
if (pused) {
usedmask = *pused & inst->U.I.DstReg.WriteMask;
- *pused &= ~usedmask;
+ if (!inst->U.I.DstReg.RelAddr)
+ *pused &= ~usedmask;
}
+
+ if (inst->U.I.DstReg.RelAddr)
+ mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X);
}
insts->WriteMask |= usedmask;
@@ -213,6 +217,7 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f
{
struct deadcode_state s;
unsigned int nr_instructions;
+ unsigned has_temp_reladdr_src = 0;
memset(&s, 0, sizeof(s));
s.C = c;
@@ -300,6 +305,30 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f
rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name);
}
}
+
+ if (!has_temp_reladdr_src) {
+ for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY &&
+ inst->U.I.SrcReg[i].RelAddr) {
+ /* If there is a register read from a temporary file with relative addressing,
+ * mark all preceding written registers as used. */
+ for (struct rc_instruction *ptr = inst->Prev;
+ ptr != &c->Program.Instructions;
+ ptr = ptr->Prev) {
+ if (opcode->HasDstReg &&
+ ptr->U.I.DstReg.File == RC_FILE_TEMPORARY &&
+ ptr->U.I.DstReg.WriteMask) {
+ mark_used(&s,
+ ptr->U.I.DstReg.File,
+ ptr->U.I.DstReg.Index,
+ ptr->U.I.DstReg.WriteMask);
+ }
+ }
+
+ has_temp_reladdr_src = 1;
+ }
+ }
+ }
}
update_instruction(&s, inst);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
index 2ea830be7f..da495a3afa 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
@@ -95,6 +95,12 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
.IsComponentwise = 1
},
{
+ .Opcode = RC_OPCODE_DP2,
+ .Name = "DP2",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1
+ },
+ {
.Opcode = RC_OPCODE_DP3,
.Name = "DP3",
.NumSrcRegs = 2,
@@ -295,6 +301,13 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
.IsComponentwise = 1
},
{
+ .Opcode = RC_OPCODE_SSG,
+ .Name = "SSG",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
.Opcode = RC_OPCODE_SUB,
.Name = "SUB",
.NumSrcRegs = 2,
@@ -435,6 +448,10 @@ void rc_compute_sources_for_writemask(
case RC_OPCODE_ARL:
srcmasks[0] |= RC_MASK_X;
break;
+ case RC_OPCODE_DP2:
+ srcmasks[0] |= RC_MASK_XY;
+ srcmasks[1] |= RC_MASK_XY;
+ break;
case RC_OPCODE_DP3:
srcmasks[0] |= RC_MASK_XYZ;
srcmasks[1] |= RC_MASK_XYZ;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
index 6e18d6eb3f..d3f639c870 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
@@ -64,6 +64,9 @@ typedef enum {
* dst.c = d src0.c / dy */
RC_OPCODE_DDY,
+ /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y */
+ RC_OPCODE_DP2,
+
/** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z */
RC_OPCODE_DP3,
@@ -154,6 +157,9 @@ typedef enum {
/** vec4 instruction: dst.c = (src0.c != src1.c) ? 1.0 : 0.0 */
RC_OPCODE_SNE,
+ /** vec4 instruction: dst.c = (src0.c < 0 ?) -1 : ((src0.c > 0) : 1 : 0) */
+ RC_OPCODE_SSG,
+
/** vec4 instruction: dst.c = src0.c - src1.c */
RC_OPCODE_SUB,
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
index 407a0a55ee..8327e9aced 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
@@ -230,6 +230,34 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c,
}
+static void check_opcode_support(struct r300_fragment_program_compiler *c,
+ struct rc_sub_instruction *inst)
+{
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
+
+ if (opcode->HasDstReg) {
+ if (inst->DstReg.RelAddr) {
+ rc_error(&c->Base, "Fragment program does not support relative addressing "
+ "of destination operands.\n");
+ return;
+ }
+
+ if (inst->SaturateMode == RC_SATURATE_MINUS_PLUS_ONE) {
+ rc_error(&c->Base, "Fragment program does not support signed Saturate.\n");
+ return;
+ }
+ }
+
+ for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
+ if (inst->SrcReg[i].RelAddr) {
+ rc_error(&c->Base, "Fragment program does not support relative addressing "
+ " of source operands.\n");
+ return;
+ }
+ }
+}
+
+
/**
* Translate all ALU instructions into corresponding pair instructions,
* performing no other changes.
@@ -249,6 +277,8 @@ void rc_pair_translate(struct r300_fragment_program_compiler *c)
struct rc_sub_instruction copy = inst->U.I;
+ check_opcode_support(c, &copy);
+
final_rewrite(&copy);
inst->Type = RC_INSTRUCTION_PAIR;
set_pair_instruction(c, &inst->U.P, &copy);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
index 857aae5514..704a7bb2d2 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
@@ -216,18 +216,18 @@ static void transform_CEIL(struct radeon_compiler* c,
rc_remove_instruction(inst);
}
-static void transform_DP3(struct radeon_compiler* c,
+static void transform_DP2(struct radeon_compiler* c,
struct rc_instruction* inst)
{
struct rc_src_register src0 = inst->U.I.SrcReg[0];
struct rc_src_register src1 = inst->U.I.SrcReg[1];
- src0.Negate &= ~RC_MASK_W;
- src0.Swizzle &= ~(7 << (3 * 3));
- src0.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
- src1.Negate &= ~RC_MASK_W;
- src1.Swizzle &= ~(7 << (3 * 3));
- src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
- emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1);
+ src0.Negate &= ~(RC_MASK_Z | RC_MASK_W);
+ src0.Swizzle &= ~(63 << (3 * 2));
+ src0.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3));
+ src1.Negate &= ~(RC_MASK_Z | RC_MASK_W);
+ src1.Swizzle &= ~(63 << (3 * 2));
+ src1.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3));
+ emit2(c, inst->Prev, RC_OPCODE_DP3, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1);
rc_remove_instruction(inst);
}
@@ -464,6 +464,43 @@ static void transform_SNE(struct radeon_compiler* c,
rc_remove_instruction(inst);
}
+static void transform_SSG(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ /* result = sign(x)
+ *
+ * CMP tmp0, -x, 1, 0
+ * CMP tmp1, x, 1, 0
+ * ADD result, tmp0, -tmp1;
+ */
+ unsigned tmp0, tmp1;
+
+ /* 0 < x */
+ tmp0 = rc_find_free_temporary(c);
+ emit3(c, inst->Prev, RC_OPCODE_CMP, 0,
+ dstregtmpmask(tmp0, inst->U.I.DstReg.WriteMask),
+ negate(inst->U.I.SrcReg[0]),
+ builtin_one,
+ builtin_zero);
+
+ /* x < 0 */
+ tmp1 = rc_find_free_temporary(c);
+ emit3(c, inst->Prev, RC_OPCODE_CMP, 0,
+ dstregtmpmask(tmp1, inst->U.I.DstReg.WriteMask),
+ inst->U.I.SrcReg[0],
+ builtin_one,
+ builtin_zero);
+
+ /* Either both are zero, or one of them is one and the other is zero. */
+ /* result = tmp0 - tmp1 */
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0,
+ inst->U.I.DstReg,
+ srcreg(RC_FILE_TEMPORARY, tmp0),
+ negate(srcreg(RC_FILE_TEMPORARY, tmp1)));
+
+ rc_remove_instruction(inst);
+}
+
static void transform_SUB(struct radeon_compiler* c,
struct rc_instruction* inst)
{
@@ -516,6 +553,7 @@ int radeonTransformALU(
switch(inst->U.I.Opcode) {
case RC_OPCODE_ABS: transform_ABS(c, inst); return 1;
case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1;
+ case RC_OPCODE_DP2: transform_DP2(c, inst); return 1;
case RC_OPCODE_DPH: transform_DPH(c, inst); return 1;
case RC_OPCODE_DST: transform_DST(c, inst); return 1;
case RC_OPCODE_FLR: transform_FLR(c, inst); return 1;
@@ -530,6 +568,7 @@ int radeonTransformALU(
case RC_OPCODE_SLE: transform_SLE(c, inst); return 1;
case RC_OPCODE_SLT: transform_SLT(c, inst); return 1;
case RC_OPCODE_SNE: transform_SNE(c, inst); return 1;
+ case RC_OPCODE_SSG: transform_SSG(c, inst); return 1;
case RC_OPCODE_SUB: transform_SUB(c, inst); return 1;
case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1;
case RC_OPCODE_XPD: transform_XPD(c, inst); return 1;
@@ -577,6 +616,29 @@ static void transform_r300_vertex_CMP(struct radeon_compiler* c,
rc_remove_instruction(inst);
}
+static void transform_r300_vertex_DP2(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ struct rc_instruction *next_inst = inst->Next;
+ transform_DP2(c, inst);
+ next_inst->Prev->U.I.Opcode = RC_OPCODE_DP4;
+}
+
+static void transform_r300_vertex_DP3(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ struct rc_src_register src0 = inst->U.I.SrcReg[0];
+ struct rc_src_register src1 = inst->U.I.SrcReg[1];
+ src0.Negate &= ~RC_MASK_W;
+ src0.Swizzle &= ~(7 << (3 * 3));
+ src0.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
+ src1.Negate &= ~RC_MASK_W;
+ src1.Swizzle &= ~(7 << (3 * 3));
+ src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
+ emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1);
+ rc_remove_instruction(inst);
+}
+
static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c,
struct rc_instruction* inst)
{
@@ -672,6 +734,41 @@ static void transform_r300_vertex_SLE(struct radeon_compiler* c,
inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
}
+static void transform_r300_vertex_SSG(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ /* result = sign(x)
+ *
+ * SLT tmp0, 0, x;
+ * SLT tmp1, x, 0;
+ * ADD result, tmp0, -tmp1;
+ */
+ unsigned tmp0, tmp1;
+
+ /* 0 < x */
+ tmp0 = rc_find_free_temporary(c);
+ emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
+ dstregtmpmask(tmp0, inst->U.I.DstReg.WriteMask),
+ builtin_zero,
+ inst->U.I.SrcReg[0]);
+
+ /* x < 0 */
+ tmp1 = rc_find_free_temporary(c);
+ emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
+ dstregtmpmask(tmp1, inst->U.I.DstReg.WriteMask),
+ inst->U.I.SrcReg[0],
+ builtin_zero);
+
+ /* Either both are zero, or one of them is one and the other is zero. */
+ /* result = tmp0 - tmp1 */
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0,
+ inst->U.I.DstReg,
+ srcreg(RC_FILE_TEMPORARY, tmp0),
+ negate(srcreg(RC_FILE_TEMPORARY, tmp1)));
+
+ rc_remove_instruction(inst);
+}
+
/**
* For use with radeonLocalTransform, this transforms non-native ALU
* instructions of the r300 up to r500 vertex engine.
@@ -685,7 +782,8 @@ int r300_transform_vertex_alu(
case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1;
case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1;
case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1;
- case RC_OPCODE_DP3: transform_DP3(c, inst); return 1;
+ case RC_OPCODE_DP2: transform_r300_vertex_DP2(c, inst); return 1;
+ case RC_OPCODE_DP3: transform_r300_vertex_DP3(c, inst); return 1;
case RC_OPCODE_DPH: transform_DPH(c, inst); return 1;
case RC_OPCODE_FLR: transform_FLR(c, inst); return 1;
case RC_OPCODE_LIT: transform_r300_vertex_fix_LIT(c, inst); return 1;
@@ -705,6 +803,7 @@ int r300_transform_vertex_alu(
return 1;
}
return 0;
+ case RC_OPCODE_SSG: transform_r300_vertex_SSG(c, inst); return 1;
case RC_OPCODE_SUB: transform_SUB(c, inst); return 1;
case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1;
case RC_OPCODE_XPD: transform_XPD(c, inst); return 1;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c
index 9c4b65f4c0..ddce590ee6 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c
@@ -117,8 +117,8 @@ int radeonTransformTEX(
struct rc_instruction * inst_rcp = NULL;
struct rc_instruction * inst_mad;
struct rc_instruction * inst_cmp;
- unsigned tmp_texsample = rc_find_free_temporary(c);
- unsigned tmp_sum = rc_find_free_temporary(c);
+ unsigned tmp_texsample;
+ unsigned tmp_sum;
unsigned tmp_recip_w = 0;
int pass, fail, tex;
@@ -126,6 +126,7 @@ int radeonTransformTEX(
struct rc_dst_register output_reg = inst->U.I.DstReg;
/* Redirect TEX to a new temp. */
+ tmp_texsample = rc_find_free_temporary(c);
inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst->U.I.DstReg.Index = tmp_texsample;
inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
@@ -144,6 +145,7 @@ int radeonTransformTEX(
}
/* Perspective-divide r by W (if it's TXP) and add the texture sample (see below). */
+ tmp_sum = rc_find_free_temporary(c);
inst_mad = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst);
inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_mad->U.I.DstReg.Index = tmp_sum;
@@ -199,6 +201,8 @@ int radeonTransformTEX(
inst_cmp->U.I.SrcReg[pass].File = RC_FILE_NONE;
inst_cmp->U.I.SrcReg[pass].Swizzle = RC_SWIZZLE_1111;
inst_cmp->U.I.SrcReg[fail] = shadow_ambient(compiler, inst->U.I.TexSrcUnit);
+
+ assert(tmp_texsample != tmp_sum && tmp_sum != tmp_recip_w);
}
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c b/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c
new file mode 100644
index 0000000000..be89e9fa5b
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2010 Marek Olšák <maraeo@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_remove_constants.h"
+
+void rc_remove_unused_constants(struct radeon_compiler *c,
+ unsigned **out_remap_table)
+{
+ unsigned char *const_used;
+ unsigned *remap_table;
+ unsigned *inv_remap_table;
+ unsigned has_rel_addr = 0;
+ unsigned is_identity = 1;
+ unsigned are_externals_remapped = 0;
+ struct rc_constant *constants = c->Program.Constants.Constants;
+
+ if (!c->Program.Constants.Count) {
+ *out_remap_table = NULL;
+ return;
+ }
+
+ const_used = malloc(c->Program.Constants.Count);
+ memset(const_used, 0, c->Program.Constants.Count);
+
+ /* Pass 1: Mark used constants. */
+ for (struct rc_instruction *inst = c->Program.Instructions.Next;
+ inst != &c->Program.Instructions; inst = inst->Next) {
+ const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_CONSTANT) {
+ if (inst->U.I.SrcReg[i].RelAddr) {
+ has_rel_addr = 1;
+ } else {
+ const_used[inst->U.I.SrcReg[i].Index] = 1;
+ }
+ }
+ }
+ }
+
+ /* Pass 2: If there is relative addressing, mark all externals as used. */
+ if (has_rel_addr) {
+ for (unsigned i = 0; i < c->Program.Constants.Count; i++)
+ if (constants[i].Type == RC_CONSTANT_EXTERNAL)
+ const_used[i] = 1;
+ }
+
+ /* Pass 3: Make the remapping table and remap constants.
+ * This pass removes unused constants simply by overwriting them by other constants. */
+ remap_table = malloc(c->Program.Constants.Count * sizeof(unsigned));
+ inv_remap_table = malloc(c->Program.Constants.Count * sizeof(unsigned));
+ unsigned new_count = 0;
+
+ for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
+ if (const_used[i]) {
+ remap_table[new_count] = i;
+ inv_remap_table[i] = new_count;
+
+ if (i != new_count) {
+ if (constants[i].Type == RC_CONSTANT_EXTERNAL)
+ are_externals_remapped = 1;
+
+ constants[new_count] = constants[i];
+ is_identity = 0;
+ }
+ new_count++;
+ }
+ }
+
+ /* is_identity ==> new_count == old_count
+ * !is_identity ==> new_count < old_count */
+ assert( is_identity || new_count < c->Program.Constants.Count);
+ assert(!(has_rel_addr && are_externals_remapped));
+
+ /* Pass 4: Redirect reads of all constants to their new locations. */
+ if (!is_identity) {
+ for (struct rc_instruction *inst = c->Program.Instructions.Next;
+ inst != &c->Program.Instructions; inst = inst->Next) {
+ const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_CONSTANT) {
+ inst->U.I.SrcReg[i].Index = inv_remap_table[inst->U.I.SrcReg[i].Index];
+ }
+ }
+ }
+
+ }
+
+ /* Set the new constant count. Note that new_count may be less than
+ * Count even though the remapping function is identity. In that case,
+ * the constants have been removed at the end of the array. */
+ c->Program.Constants.Count = new_count;
+
+ if (are_externals_remapped) {
+ *out_remap_table = remap_table;
+ } else {
+ *out_remap_table = NULL;
+ free(remap_table);
+ }
+
+ free(const_used);
+ free(inv_remap_table);
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.h b/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.h
new file mode 100644
index 0000000000..0d3a26ca1c
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2010 Marek Olšák <maraeo@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_REMOVE_CONSTANTS_H
+#define RADEON_REMOVE_CONSTANTS_H
+
+#include "radeon_compiler.h"
+
+void rc_remove_unused_constants(struct radeon_compiler *c,
+ unsigned **out_remap_table);
+
+#endif
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
index 95f4306f60..7b6521c748 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c
+++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
@@ -38,7 +38,6 @@
#include "r300_fragprog_common.h"
-#include "program/prog_parameter.h"
#include "program/prog_print.h"
#include "compiler/radeon_compiler.h"