summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/r300/compiler
diff options
context:
space:
mode:
authorKeith Whitwell <keithw@vmware.com>2010-10-17 19:03:42 -0700
committerKeith Whitwell <keithw@vmware.com>2010-10-17 19:09:42 -0700
commit0072acd447dc6be652e63752e50215c3105322c8 (patch)
tree847d1763b54772d336a04e606f8248291c3092b7 /src/mesa/drivers/dri/r300/compiler
parent543fb77ddece7e1806e8eaa0d65bb2a945ef9a75 (diff)
parentca2b2ac131933b4171b519813df1aaa3a81621cd (diff)
Merge remote branch 'origin/master' into lp-setup-llvm
Conflicts: src/gallium/drivers/llvmpipe/lp_setup_coef.c src/gallium/drivers/llvmpipe/lp_setup_coef.h src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c src/gallium/drivers/llvmpipe/lp_setup_point.c src/gallium/drivers/llvmpipe/lp_setup_tri.c src/gallium/drivers/llvmpipe/lp_state_derived.c src/gallium/drivers/llvmpipe/lp_state_fs.h
Diffstat (limited to 'src/mesa/drivers/dri/r300/compiler')
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c30
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c11
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c2
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog.c7
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c18
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_code.c14
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_code.h26
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c125
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h12
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c14
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c29
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c11
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_optimize.c67
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c3
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c38
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c15
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c28
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h47
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c339
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c161
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c5
21 files changed, 604 insertions, 398 deletions
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
index 3b2b06fc2b..8be32ea91f 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
@@ -74,7 +74,7 @@ static void use_temporary(struct r300_fragment_program_code *code, unsigned int
code->pixsize = index;
}
-static unsigned int use_source(struct r300_fragment_program_code* code, struct radeon_pair_instruction_source src)
+static unsigned int use_source(struct r300_fragment_program_code* code, struct rc_pair_instruction_source src)
{
if (src.File == RC_FILE_CONSTANT) {
return src.Index | (1 << 5);
@@ -133,6 +133,8 @@ static unsigned int translate_alpha_opcode(struct r300_fragment_program_compiler
*/
static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* inst)
{
+ int ip;
+ int j;
PROG_CODE;
if (code->alu.length >= c->Base.max_alu_insts) {
@@ -140,20 +142,20 @@ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* i
return 0;
}
- int ip = code->alu.length++;
- int j;
+ ip = code->alu.length++;
code->alu.inst[ip].rgb_inst = translate_rgb_opcode(c, inst->RGB.Opcode);
code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode);
for(j = 0; j < 3; ++j) {
unsigned int src = use_source(code, inst->RGB.Src[j]);
+ unsigned int arg;
code->alu.inst[ip].rgb_addr |= src << (6*j);
src = use_source(code, inst->Alpha.Src[j]);
code->alu.inst[ip].alpha_addr |= src << (6*j);
- unsigned int arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle);
+ arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle);
arg |= inst->RGB.Arg[j].Abs << 6;
arg |= inst->RGB.Arg[j].Negate << 5;
code->alu.inst[ip].rgb_inst |= arg << (7*j);
@@ -259,6 +261,10 @@ static int finish_node(struct r300_emit_state * emit)
{
struct r300_fragment_program_compiler * c = emit->compiler;
struct r300_fragment_program_code *code = &emit->compiler->code->code.r300;
+ unsigned alu_offset;
+ unsigned alu_end;
+ unsigned tex_offset;
+ unsigned tex_end;
if (code->alu.length == emit->node_first_alu) {
/* Generate a single NOP for this node */
@@ -268,10 +274,10 @@ static int finish_node(struct r300_emit_state * emit)
return 0;
}
- unsigned alu_offset = emit->node_first_alu;
- unsigned alu_end = code->alu.length - alu_offset - 1;
- unsigned tex_offset = emit->node_first_tex;
- unsigned tex_end = code->tex.length - tex_offset - 1;
+ alu_offset = emit->node_first_alu;
+ alu_end = code->alu.length - alu_offset - 1;
+ tex_offset = emit->node_first_tex;
+ tex_end = code->tex.length - tex_offset - 1;
if (code->tex.length == emit->node_first_tex) {
if (emit->current_node > 0) {
@@ -334,6 +340,9 @@ static int begin_tex(struct r300_emit_state * emit)
static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst)
{
+ unsigned int unit;
+ unsigned int dest;
+ unsigned int opcode;
PROG_CODE;
if (code->tex.length >= R300_PFS_MAX_TEX_INST) {
@@ -341,9 +350,8 @@ static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst)
return 0;
}
- unsigned int unit = inst->U.I.TexSrcUnit;
- unsigned int dest = inst->U.I.DstReg.Index;
- unsigned int opcode;
+ unit = inst->U.I.TexSrcUnit;
+ dest = inst->U.I.DstReg.Index;
switch(inst->U.I.Opcode) {
case RC_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break;
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
index caa48fe478..2d28b06539 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
@@ -94,6 +94,9 @@ static const struct swizzle_data* lookup_native_swizzle(unsigned int swizzle)
*/
static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
{
+ unsigned int relevant;
+ int j;
+
if (reg.Abs)
reg.Negate = RC_MASK_NONE;
@@ -101,8 +104,6 @@ static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
opcode == RC_OPCODE_TEX ||
opcode == RC_OPCODE_TXB ||
opcode == RC_OPCODE_TXP) {
- int j;
-
if (reg.Abs || reg.Negate)
return 0;
@@ -117,8 +118,7 @@ static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
return 1;
}
- unsigned int relevant = 0;
- int j;
+ relevant = 0;
for(j = 0; j < 3; ++j)
if (GET_SWZ(reg.Swizzle, j) != RC_SWIZZLE_UNUSED)
@@ -154,9 +154,10 @@ static void r300_swizzle_split(
unsigned int matchcount = 0;
unsigned int matchmask = 0;
for(comp = 0; comp < 3; ++comp) {
+ unsigned int swz;
if (!GET_BIT(mask, comp))
continue;
- unsigned int swz = GET_SWZ(src.Swizzle, comp);
+ swz = GET_SWZ(src.Swizzle, comp);
if (swz == RC_SWIZZLE_UNUSED)
continue;
if (swz == GET_SWZ(sd->hash, comp)) {
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
index 08785716db..d7d49e514b 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
@@ -392,9 +392,9 @@ static void ei_if(struct r300_vertex_program_compiler * compiler,
* don't already have one. */
if (!compiler->PredicateMask) {
unsigned int writemasks[RC_REGISTER_MAX_INDEX];
- memset(writemasks, 0, sizeof(writemasks));
struct rc_instruction * inst;
unsigned int i;
+ memset(writemasks, 0, sizeof(writemasks));
for(inst = compiler->Base.Program.Instructions.Next;
inst != &compiler->Base.Program.Instructions;
inst = inst->Next) {
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
index 627ce374ef..289bb87ae5 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
@@ -39,10 +39,12 @@ int r500_transform_IF(
struct rc_instruction * inst,
void* data)
{
+ struct rc_instruction * inst_mov;
+
if (inst->U.I.Opcode != RC_OPCODE_IF)
return 0;
- struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
+ inst_mov = rc_insert_new_instruction(c, inst->Prev);
inst_mov->U.I.Opcode = RC_OPCODE_MOV;
inst_mov->U.I.DstReg.WriteMask = 0;
inst_mov->U.I.WriteALUResult = RC_ALURESULT_W;
@@ -251,12 +253,11 @@ void r500FragmentProgramDump(struct radeon_compiler *c, void *user)
{
struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
struct r500_fragment_program_code *code = &compiler->code->code.r500;
- fprintf(stderr, "R500 Fragment Program:\n--------\n");
-
int n, i;
uint32_t inst;
uint32_t inst0;
char *str = NULL;
+ fprintf(stderr, "R500 Fragment Program:\n--------\n");
for (n = 0; n < code->inst_end+1; n++) {
inst0 = inst = code->inst[n].inst0;
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
index 54cff9169a..6f101c68eb 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
@@ -198,7 +198,7 @@ static void use_temporary(struct r500_fragment_program_code* code, unsigned int
code->max_temp_idx = index;
}
-static unsigned int use_source(struct r500_fragment_program_code* code, struct radeon_pair_instruction_source src)
+static unsigned int use_source(struct r500_fragment_program_code* code, struct rc_pair_instruction_source src)
{
if (src.File == RC_FILE_CONSTANT) {
return src.Index | 0x100;
@@ -227,6 +227,7 @@ static void alu_nop(struct r300_fragment_program_compiler *c, int ip)
*/
static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst)
{
+ int ip;
PROG_CODE;
if (code->inst_end >= c->Base.max_alu_insts-1) {
@@ -234,7 +235,7 @@ static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair
return;
}
- int ip = ++code->inst_end;
+ ip = ++code->inst_end;
/* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */
if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX ||
@@ -250,7 +251,7 @@ static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair
if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) {
code->inst[ip].inst0 = R500_INST_TYPE_OUT;
if (inst->WriteALUResult) {
- error("%s: cannot write output and ALU result at the same time");
+ error("Cannot write output and ALU result at the same time");
return;
}
} else {
@@ -357,6 +358,7 @@ static unsigned int translate_strq_swizzle(unsigned int swizzle)
*/
static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst)
{
+ int ip;
PROG_CODE;
if (code->inst_end >= c->Base.max_alu_insts-1) {
@@ -364,7 +366,7 @@ static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_inst
return 0;
}
- int ip = ++code->inst_end;
+ ip = ++code->inst_end;
code->inst[ip].inst0 = R500_INST_TYPE_TEX
| (inst->DstReg.WriteMask << 11)
@@ -407,12 +409,14 @@ static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_inst
static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst)
{
+ unsigned int newip;
+
if (s->Code->inst_end >= s->C->max_alu_insts-1) {
rc_error(s->C, "emit_tex: Too many instructions");
return;
}
- unsigned int newip = ++s->Code->inst_end;
+ newip = ++s->Code->inst_end;
/* Currently all loops use the same integer constant to intialize
* the loop variables. */
@@ -623,6 +627,8 @@ void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
if (code->inst_end == -1 ||
(code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
+ int ip;
+
/* This may happen when dead-code elimination is disabled or
* when most of the fragment program logic is leading to a KIL */
if (code->inst_end >= compiler->Base.max_alu_insts-1) {
@@ -630,7 +636,7 @@ void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
return;
}
- int ip = ++code->inst_end;
+ ip = ++code->inst_end;
code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.c b/src/mesa/drivers/dri/r300/compiler/radeon_code.c
index 0eab18c344..6842fb873b 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_code.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.c
@@ -28,6 +28,7 @@
#include "radeon_code.h"
#include <stdlib.h>
+#include <stdio.h>
#include <string.h>
#include "radeon_program.h"
@@ -171,3 +172,16 @@ unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float da
return rc_constants_add(c, &constant);
}
+
+void rc_constants_print(struct rc_constant_list * c)
+{
+ unsigned int i;
+ for(i = 0; i < c->Count; i++) {
+ if (c->Constants[i].Type == RC_CONSTANT_IMMEDIATE) {
+ float * values = c->Constants[i].u.Immediate;
+ fprintf(stderr, "CONST[%u] = "
+ "{ %10.4f %10.4f %10.4f %10.4f }\n",
+ i, values[0],values[1], values[2], values[3]);
+ }
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
index f76676fae8..cfb6df2cd7 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
@@ -60,6 +60,7 @@ enum {
RC_STATE_R300_WINDOW_DIMENSION,
RC_STATE_R300_TEXRECT_FACTOR,
+ RC_STATE_R300_TEXSCALE_FACTOR,
RC_STATE_R300_VIEWPORT_SCALE,
RC_STATE_R300_VIEWPORT_OFFSET
};
@@ -89,6 +90,7 @@ unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * cons
unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state1, unsigned state2);
unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data);
unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle);
+void rc_constants_print(struct rc_constant_list * c);
/**
* Compare functions.
@@ -126,11 +128,7 @@ struct r300_fragment_program_external_state {
struct {
/**
* If the sampler is used as a shadow sampler,
- * this field is:
- * 0 - GL_LUMINANCE
- * 1 - GL_INTENSITY
- * 2 - GL_ALPHA
- * depending on the depth texture mode.
+ * this field contains swizzle depending on the depth texture mode.
*/
unsigned depth_texture_swizzle:12;
@@ -150,13 +148,9 @@ struct r300_fragment_program_external_state {
unsigned compare_mode_enabled : 1;
/**
- * If the sampler needs to fake NPOT, this field is set.
- */
- unsigned fake_npot : 1;
-
- /**
- * If the sampler will recieve non-normalized coords,
- * this field is set.
+ * If the sampler will receive non-normalized coords,
+ * this field is set. The scaling factor is given by
+ * RC_STATE_R300_TEXRECT_FACTOR.
*/
unsigned non_normalized_coords : 1;
@@ -166,7 +160,13 @@ struct r300_fragment_program_external_state {
* If this field is \ref RC_WRAP_NONE (aka 0), no wrapping maths
* will be performed on the coordinates.
*/
- unsigned wrap_mode : 2;
+ unsigned wrap_mode : 3;
+
+ /**
+ * The coords are scaled after applying the wrap mode emulation
+ * and right before texture fetch. The scaling factor is given by
+ * RC_STATE_R300_TEXSCALE_FACTOR. */
+ unsigned clamp_and_scale_before_fetch : 1;
} unit[16];
};
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
index e73700f84a..a27d395587 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
@@ -29,49 +29,32 @@
#include "radeon_program.h"
+struct read_write_mask_data {
+ void * UserData;
+ rc_read_write_mask_fn Cb;
+};
+
static void reads_normal_callback(
- rc_read_write_chan_fn cb,
+ void * userdata,
struct rc_instruction * fullinst,
- struct rc_src_register src,
- void * userdata)
+ struct rc_src_register * src)
{
+ struct read_write_mask_data * cb_data = userdata;
unsigned int refmask = 0;
unsigned int chan;
for(chan = 0; chan < 4; chan++) {
- refmask |= 1 << GET_SWZ(src.Swizzle, chan);
+ refmask |= 1 << GET_SWZ(src->Swizzle, chan);
}
refmask &= RC_MASK_XYZW;
- if (refmask)
- cb(userdata, fullinst, src.File, src.Index, refmask);
-
- if (refmask && src.RelAddr)
- cb(userdata, fullinst, RC_FILE_ADDRESS, 0, RC_MASK_X);
-}
-
-static void reads_normal(struct rc_instruction * fullinst, rc_read_write_chan_fn cb, void * userdata)
-{
- struct rc_sub_instruction * inst = &fullinst->U.I;
- const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
-
- for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
-
- if (inst->SrcReg[src].File == RC_FILE_NONE)
- return;
+ if (refmask) {
+ cb_data->Cb(cb_data->UserData, fullinst, src->File,
+ src->Index, refmask);
+ }
- if (inst->SrcReg[src].File == RC_FILE_PRESUB) {
- unsigned int i;
- unsigned int srcp_regs = rc_presubtract_src_reg_count(
- inst->PreSub.Opcode);
- for( i = 0; i < srcp_regs; i++) {
- reads_normal_callback(cb, fullinst,
- inst->PreSub.SrcReg[i],
- userdata);
- }
- } else {
- reads_normal_callback(cb, fullinst,
- inst->SrcReg[src], userdata);
- }
+ if (refmask && src->RelAddr) {
+ cb_data->Cb(cb_data->UserData, fullinst, RC_FILE_ADDRESS, 0,
+ RC_MASK_X);
}
}
@@ -112,7 +95,7 @@ static void pair_get_src_refmasks(unsigned int * refmasks,
}
}
-static void reads_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata)
+static void reads_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata)
{
struct rc_pair_instruction * inst = &fullinst->U.P;
unsigned int refmasks[3] = { 0, 0, 0 };
@@ -143,6 +126,74 @@ static void reads_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn
}
}
+static void pair_sub_for_all_args(
+ struct rc_instruction * fullinst,
+ struct rc_pair_sub_instruction * sub,
+ rc_pair_read_arg_fn cb,
+ void * userdata)
+{
+ int i;
+ const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
+
+ for(i = 0; i < info->NumSrcRegs; i++) {
+ cb(userdata, fullinst, &sub->Arg[i]);
+ }
+}
+
+/* This function calls the callback function (cb) for each source used by
+ * the instruction.
+ * */
+void rc_for_all_reads_src(
+ struct rc_instruction * inst,
+ rc_read_src_fn cb,
+ void * userdata)
+{
+ const struct rc_opcode_info * opcode =
+ rc_get_opcode_info(inst->U.I.Opcode);
+
+ /* This function only works with normal instructions. */
+ if (inst->Type != RC_INSTRUCTION_NORMAL) {
+ assert(0);
+ return;
+ }
+
+ for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
+
+ if (inst->U.I.SrcReg[src].File == RC_FILE_NONE)
+ continue;
+
+ if (inst->U.I.SrcReg[src].File == RC_FILE_PRESUB) {
+ unsigned int i;
+ unsigned int srcp_regs = rc_presubtract_src_reg_count(
+ inst->U.I.PreSub.Opcode);
+ for( i = 0; i < srcp_regs; i++) {
+ cb(userdata, inst, &inst->U.I.PreSub.SrcReg[i]);
+ }
+ } else {
+ cb(userdata, inst, &inst->U.I.SrcReg[src]);
+ }
+ }
+}
+
+/**
+ * This function calls the callback function (cb) for each arg of the RGB and
+ * alpha components.
+ */
+void rc_pair_for_all_reads_arg(
+ struct rc_instruction * inst,
+ rc_pair_read_arg_fn cb,
+ void * userdata)
+{
+ /* This function only works with pair instructions. */
+ if (inst->Type != RC_INSTRUCTION_PAIR) {
+ assert(0);
+ return;
+ }
+
+ pair_sub_for_all_args(inst, &inst->U.P.RGB, cb, userdata);
+ pair_sub_for_all_args(inst, &inst->U.P.Alpha, cb, userdata);
+}
+
/**
* Calls a callback function for all register reads.
*
@@ -153,7 +204,11 @@ static void reads_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn
void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata)
{
if (inst->Type == RC_INSTRUCTION_NORMAL) {
- reads_normal(inst, cb, userdata);
+ struct read_write_mask_data cb_data;
+ cb_data.UserData = userdata;
+ cb_data.Cb = cb;
+
+ rc_for_all_reads_src(inst, reads_normal_callback, &cb_data);
} else {
reads_pair(inst, cb, userdata);
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h
index 795d9cc2b6..d10ae3c7b7 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h
@@ -33,6 +33,8 @@
struct radeon_compiler;
struct rc_instruction;
struct rc_swizzle_caps;
+struct rc_src_register;
+struct rc_pair_instruction_arg;
/**
@@ -49,6 +51,16 @@ typedef void (*rc_read_write_mask_fn)(void * userdata, struct rc_instruction * i
void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata);
void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata);
+typedef void (*rc_read_src_fn)(void * userdata, struct rc_instruction * inst,
+ struct rc_src_register * src);
+void rc_for_all_reads_src(struct rc_instruction * inst, rc_read_src_fn cb,
+ void * userdata);
+
+typedef void (*rc_pair_read_arg_fn)(void * userdata,
+ struct rc_instruction * inst, struct rc_pair_instruction_arg * arg);
+void rc_pair_for_all_reads_arg(struct rc_instruction * inst,
+ rc_pair_read_arg_fn cb, void * userdata);
+
typedef void (*rc_remap_register_fn)(void * userdata, struct rc_instruction * inst,
rc_register_file * pfile, unsigned int * pindex);
void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, void * userdata);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
index 9d17b4772a..87906f37b1 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
@@ -106,10 +106,12 @@ static void push_loop(struct deadcode_state * s)
static void push_branch(struct deadcode_state * s)
{
+ struct branchinfo * branch;
+
memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack,
s->BranchStackSize, s->BranchStackReserved, 1);
- struct branchinfo * branch = &s->BranchStack[s->BranchStackSize++];
+ branch = &s->BranchStack[s->BranchStackSize++];
branch->HaveElse = 0;
memcpy(&branch->StoreEndif, &s->R, sizeof(s->R));
}
@@ -152,6 +154,7 @@ static void update_instruction(struct deadcode_state * s, struct rc_instruction
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
struct instruction_state * insts = &s->Instructions[inst->IP];
unsigned int usedmask = 0;
+ unsigned int srcmasks[3];
if (opcode->HasDstReg) {
unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index);
@@ -180,7 +183,6 @@ static void update_instruction(struct deadcode_state * s, struct rc_instruction
}
}
- unsigned int srcmasks[3];
rc_compute_sources_for_writemask(inst, usedmask, srcmasks);
for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
@@ -219,6 +221,7 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, void *user)
unsigned int nr_instructions;
unsigned has_temp_reladdr_src = 0;
rc_dataflow_mark_outputs_fn dce = (rc_dataflow_mark_outputs_fn)user;
+ unsigned int ip;
/* Give up if there is relative addressing of destination operands. */
for(struct rc_instruction * inst = c->Program.Instructions.Next;
@@ -349,12 +352,14 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, void *user)
update_instruction(&s, inst);
}
- unsigned int ip = 0;
+ ip = 0;
for(struct rc_instruction * inst = c->Program.Instructions.Next;
inst != &c->Program.Instructions;
inst = inst->Next, ++ip) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
int dead = 1;
+ unsigned int srcmasks[3];
+ unsigned int usemask;
if (!opcode->HasDstReg) {
dead = 0;
@@ -376,8 +381,7 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, void *user)
continue;
}
- unsigned int srcmasks[3];
- unsigned int usemask = s.Instructions[ip].WriteMask;
+ usemask = s.Instructions[ip].WriteMask;
if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
usemask |= RC_MASK_X;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c
index 222e5b7e02..7bede344f3 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c
@@ -57,18 +57,21 @@ struct emulate_branch_state {
static void handle_if(struct emulate_branch_state * s, struct rc_instruction * inst)
{
+ struct branch_info * branch;
+ struct rc_instruction * inst_mov;
+
memory_pool_array_reserve(&s->C->Pool, struct branch_info,
s->Branches, s->BranchCount, s->BranchReserved, 1);
DBG("%s\n", __FUNCTION__);
- struct branch_info * branch = &s->Branches[s->BranchCount++];
+ branch = &s->Branches[s->BranchCount++];
memset(branch, 0, sizeof(struct branch_info));
branch->If = inst;
/* Make a safety copy of the decision register, because we will need
* it at ENDIF time and it might be overwritten in both branches. */
- struct rc_instruction * inst_mov = rc_insert_new_instruction(s->C, inst->Prev);
+ inst_mov = rc_insert_new_instruction(s->C, inst->Prev);
inst_mov->U.I.Opcode = RC_OPCODE_MOV;
inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_mov->U.I.DstReg.Index = rc_find_free_temporary(s->C);
@@ -84,6 +87,8 @@ static void handle_if(struct emulate_branch_state * s, struct rc_instruction * i
static void handle_else(struct emulate_branch_state * s, struct rc_instruction * inst)
{
+ struct branch_info * branch;
+
if (!s->BranchCount) {
rc_error(s->C, "Encountered ELSE outside of branches");
return;
@@ -91,7 +96,7 @@ static void handle_else(struct emulate_branch_state * s, struct rc_instruction *
DBG("%s\n", __FUNCTION__);
- struct branch_info * branch = &s->Branches[s->BranchCount - 1];
+ branch = &s->Branches[s->BranchCount - 1];
branch->Else = inst;
}
@@ -191,6 +196,10 @@ static void inject_cmp(struct emulate_branch_state * s,
static void handle_endif(struct emulate_branch_state * s, struct rc_instruction * inst)
{
+ struct branch_info * branch;
+ struct register_proxies IfProxies;
+ struct register_proxies ElseProxies;
+
if (!s->BranchCount) {
rc_error(s->C, "Encountered ENDIF outside of branches");
return;
@@ -198,9 +207,7 @@ static void handle_endif(struct emulate_branch_state * s, struct rc_instruction
DBG("%s\n", __FUNCTION__);
- struct branch_info * branch = &s->Branches[s->BranchCount - 1];
- struct register_proxies IfProxies;
- struct register_proxies ElseProxies;
+ branch = &s->Branches[s->BranchCount - 1];
memset(&IfProxies, 0, sizeof(IfProxies));
memset(&ElseProxies, 0, sizeof(ElseProxies));
@@ -261,16 +268,19 @@ static void remap_output_function(void * userdata, struct rc_instruction * inst,
*/
static void fix_output_writes(struct emulate_branch_state * s, struct rc_instruction * inst)
{
+ const struct rc_opcode_info * opcode;
+
if (!s->BranchCount)
return;
- const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ opcode = rc_get_opcode_info(inst->U.I.Opcode);
if (!opcode->HasDstReg)
return;
if (inst->U.I.DstReg.File == RC_FILE_OUTPUT) {
struct remap_output_data remap;
+ struct rc_instruction * inst_mov;
remap.Output = inst->U.I.DstReg.Index;
remap.Temporary = rc_find_free_temporary(s->C);
@@ -281,7 +291,7 @@ static void fix_output_writes(struct emulate_branch_state * s, struct rc_instruc
rc_remap_registers(inst, &remap_output_function, &remap);
}
- struct rc_instruction * inst_mov = rc_insert_new_instruction(s->C, s->C->Program.Instructions.Prev);
+ inst_mov = rc_insert_new_instruction(s->C, s->C->Program.Instructions.Prev);
inst_mov->U.I.Opcode = RC_OPCODE_MOV;
inst_mov->U.I.DstReg.File = RC_FILE_OUTPUT;
inst_mov->U.I.DstReg.Index = remap.Output;
@@ -299,12 +309,13 @@ static void fix_output_writes(struct emulate_branch_state * s, struct rc_instruc
void rc_emulate_branches(struct radeon_compiler *c, void *user)
{
struct emulate_branch_state s;
+ struct rc_instruction * ptr;
memset(&s, 0, sizeof(s));
s.C = c;
/* Untypical loop because we may remove the current instruction */
- struct rc_instruction * ptr = c->Program.Instructions.Next;
+ ptr = c->Program.Instructions.Next;
while(ptr != &c->Program.Instructions) {
struct rc_instruction * inst = ptr;
ptr = ptr->Next;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c
index cd4fcbabb9..205eecd112 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c
@@ -386,8 +386,6 @@ static int build_loop_info(struct radeon_compiler * c, struct loop_info * loop,
case RC_OPCODE_SNE:
break;
default:
- rc_error(c, "%s: expected conditional",
- __FUNCTION__);
return 0;
}
loop->Cond = loop->If->Prev;
@@ -431,8 +429,10 @@ static int transform_loop(struct emulate_loop_state * s,
loop = &s->Loops[s->LoopCount++];
- if (!build_loop_info(s->C, loop, inst))
+ if (!build_loop_info(s->C, loop, inst)) {
+ rc_error(s->C, "Failed to build loop info\n");
return 0;
+ }
if(try_unroll_loop(s->C, loop)){
return 1;
@@ -511,11 +511,12 @@ void rc_emulate_loops(struct radeon_compiler *c, void *user)
* loops are unrolled first.
*/
for( i = s->LoopCount - 1; i >= 0; i-- ){
+ unsigned int iterations;
+
if(!s->Loops[i].EndLoop){
continue;
}
- unsigned int iterations = loop_max_possible_iterations(
- s->C, &s->Loops[i]);
+ iterations = loop_max_possible_iterations(s->C, &s->Loops[i]);
unroll_loop(s->C, &s->Loops[i], iterations);
}
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
index c15a9b1c45..41769e347e 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
@@ -163,7 +163,8 @@ static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * i
if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY ||
inst_mov->U.I.DstReg.RelAddr ||
- inst_mov->U.I.WriteALUResult)
+ inst_mov->U.I.WriteALUResult ||
+ inst_mov->U.I.SaturateMode)
return;
memset(&s, 0, sizeof(s));
@@ -410,27 +411,34 @@ static void constant_folding(struct radeon_compiler * c, struct rc_instruction *
/* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
+ struct rc_constant * constant;
+ struct rc_src_register newsrc;
+ int have_real_reference;
+
if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT ||
inst->U.I.SrcReg[src].RelAddr ||
inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count)
continue;
- struct rc_constant * constant =
+ constant =
&c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index];
if (constant->Type != RC_CONSTANT_IMMEDIATE)
continue;
- struct rc_src_register newsrc = inst->U.I.SrcReg[src];
- int have_real_reference = 0;
+ newsrc = inst->U.I.SrcReg[src];
+ have_real_reference = 0;
for(unsigned int chan = 0; chan < 4; ++chan) {
unsigned int swz = GET_SWZ(newsrc.Swizzle, chan);
+ unsigned int newswz;
+ float imm;
+ float baseimm;
+
if (swz >= 4)
continue;
- unsigned int newswz;
- float imm = constant->u.Immediate[swz];
- float baseimm = imm;
+ imm = constant->u.Immediate[swz];
+ baseimm = imm;
if (imm < 0.0)
baseimm = -baseimm;
@@ -475,8 +483,8 @@ static void constant_folding(struct radeon_compiler * c, struct rc_instruction *
}
/**
- * This function returns a writemask that indicates wich components are
- * read by src and also written by dst.
+ * If src and dst use the same register, this function returns a writemask that
+ * indicates wich components are read by src. Otherwise zero is returned.
*/
static unsigned int src_reads_dst_mask(struct rc_src_register src,
struct rc_dst_register dst)
@@ -563,10 +571,18 @@ static int presub_helper(
* s->Inst->U.I.DstReg, because if it does we must not
* remove s->Inst. */
for(i = 0; i < info->NumSrcRegs; i++) {
- if(s->Inst->U.I.DstReg.WriteMask !=
- src_reads_dst_mask(inst->U.I.SrcReg[i],
- s->Inst->U.I.DstReg)) {
- continue;
+ unsigned int mask = src_reads_dst_mask(
+ inst->U.I.SrcReg[i], s->Inst->U.I.DstReg);
+ /* XXX We could be more aggressive here using
+ * presubtract. It is okay if SrcReg[i] only reads
+ * from some of the mask components. */
+ if(s->Inst->U.I.DstReg.WriteMask != mask) {
+ if (s->Inst->U.I.DstReg.WriteMask & mask) {
+ can_remove = 0;
+ break;
+ } else {
+ continue;
+ }
}
if (cant_sub || !can_use_presub) {
can_remove = 0;
@@ -626,6 +642,21 @@ static void presub_replace_add(struct peephole_state *s,
inst->U.I.SrcReg[src_index].Index = presub_opcode;
}
+static int is_presub_candidate(struct rc_instruction * inst)
+{
+ const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
+ unsigned int i;
+
+ if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE || inst->U.I.SaturateMode)
+ return 0;
+
+ for(i = 0; i < info->NumSrcRegs; i++) {
+ if (src_reads_dst_mask(inst->U.I.SrcReg[i], inst->U.I.DstReg))
+ return 0;
+ }
+ return 1;
+}
+
static int peephole_add_presub_add(
struct radeon_compiler * c,
struct rc_instruction * inst_add)
@@ -635,10 +666,7 @@ static int peephole_add_presub_add(
unsigned int i;
struct peephole_state s;
- if (inst_add->U.I.PreSub.Opcode != RC_PRESUB_NONE)
- return 0;
-
- if (inst_add->U.I.SaturateMode)
+ if (!is_presub_candidate(inst_add))
return 0;
if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle)
@@ -705,10 +733,7 @@ static int peephole_add_presub_inv(
unsigned int i, swz, mask;
struct peephole_state s;
- if (inst_add->U.I.PreSub.Opcode != RC_PRESUB_NONE)
- return 0;
-
- if (inst_add->U.I.SaturateMode)
+ if (!is_presub_candidate(inst_add))
return 0;
mask = inst_add->U.I.DstReg.WriteMask;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
index c73845512f..91524f5ec6 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
@@ -322,12 +322,13 @@ void rc_pair_regalloc_inputs_only(struct radeon_compiler *cc, void *user)
{
struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
struct regalloc_state s;
+ int temp_reg_offset;
compute_live_intervals(cc, &s);
c->AllocateHwInputs(c, &alloc_input, &s);
- int temp_reg_offset = 0;
+ temp_reg_offset = 0;
for (unsigned i = 0; i < RC_REGISTER_MAX_INDEX; i++) {
if (s.Input[i].Allocated && temp_reg_offset <= s.Input[i].Index)
temp_reg_offset = s.Input[i].Index + 1;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
index 5269d65985..bfe3553460 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
@@ -237,11 +237,12 @@ static void commit_alu_instruction(struct schedule_state * s, struct schedule_in
static void emit_all_tex(struct schedule_state * s, struct rc_instruction * before)
{
struct schedule_instruction *readytex;
+ struct rc_instruction * inst_begin;
assert(s->ReadyTEX);
/* Node marker for R300 */
- struct rc_instruction * inst_begin = rc_insert_new_instruction(s->C, before->Prev);
+ inst_begin = rc_insert_new_instruction(s->C, before->Prev);
inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX;
/* Link texture instructions back in */
@@ -280,6 +281,8 @@ static int destructive_merge_instructions(
struct rc_pair_instruction * alpha)
{
const struct rc_opcode_info * opcode;
+ const struct rc_opcode_info * rgb_info;
+
assert(rgb->Alpha.Opcode == RC_OPCODE_NOP);
assert(alpha->RGB.Opcode == RC_OPCODE_NOP);
@@ -288,8 +291,7 @@ static int destructive_merge_instructions(
* src1. */
/* Merge the rgb presubtract registers. */
- const struct rc_opcode_info * rgb_info =
- rc_get_opcode_info(rgb->RGB.Opcode);
+ rgb_info = rc_get_opcode_info(rgb->RGB.Opcode);
if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
unsigned int srcp_src;
unsigned int srcp_regs;
@@ -301,9 +303,9 @@ static int destructive_merge_instructions(
unsigned int arg;
int free_source;
unsigned int one_way = 0;
- struct radeon_pair_instruction_source srcp =
+ struct rc_pair_instruction_source srcp =
alpha->RGB.Src[srcp_src];
- struct radeon_pair_instruction_source temp;
+ struct rc_pair_instruction_source temp;
/* 2nd arg of 1 means this is an rgb source.
* 3rd arg of 0 means this is not an alpha source. */
free_source = rc_pair_alloc_source(rgb, 1, 0,
@@ -366,9 +368,9 @@ static int destructive_merge_instructions(
unsigned int arg;
int free_source;
unsigned int one_way = 0;
- struct radeon_pair_instruction_source srcp =
+ struct rc_pair_instruction_source srcp =
alpha->Alpha.Src[srcp_src];
- struct radeon_pair_instruction_source temp;
+ struct rc_pair_instruction_source temp;
/* 2nd arg of 0 means this is not an rgb source.
* 3rd arg of 1 means this is an alpha source. */
free_source = rc_pair_alloc_source(rgb, 0, 1,
@@ -424,6 +426,7 @@ static int destructive_merge_instructions(
unsigned int oldsrc = alpha->Alpha.Arg[arg].Source;
rc_register_file file = 0;
unsigned int index = 0;
+ int source;
if (alpha->Alpha.Arg[arg].Swizzle < 3) {
srcrgb = 1;
@@ -435,7 +438,7 @@ static int destructive_merge_instructions(
index = alpha->Alpha.Src[oldsrc].Index;
}
- int source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index);
+ source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index);
if (source < 0)
return 0;
@@ -475,6 +478,12 @@ static int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_i
{
struct rc_pair_instruction backup;
+ /*Instructions can't write output registers and ALU result at the
+ * same time. */
+ if ((rgb->WriteALUResult && alpha->Alpha.OutputWriteMask)
+ || (rgb->RGB.OutputWriteMask && alpha->WriteALUResult)) {
+ return 0;
+ }
memcpy(&backup, rgb, sizeof(struct rc_pair_instruction));
if (destructive_merge_instructions(rgb, alpha))
@@ -597,6 +606,7 @@ static void scan_read(void * data, struct rc_instruction * inst,
{
struct schedule_state * s = data;
struct reg_value * v = get_reg_value(s, file, index, chan);
+ struct reg_value_reader * reader;
if (!v)
return;
@@ -610,7 +620,7 @@ static void scan_read(void * data, struct rc_instruction * inst,
DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
- struct reg_value_reader * reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader));
+ reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader));
reader->Reader = s->Current;
reader->Next = v->Readers;
v->Readers = reader;
@@ -630,13 +640,14 @@ static void scan_write(void * data, struct rc_instruction * inst,
{
struct schedule_state * s = data;
struct reg_value ** pv = get_reg_valuep(s, file, index, chan);
+ struct reg_value * newv;
if (!pv)
return;
DBG("%i: write %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
- struct reg_value * newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv));
+ newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv));
memset(newv, 0, sizeof(*newv));
newv->Writer = s->Current;
@@ -659,12 +670,13 @@ static void schedule_block(struct r300_fragment_program_compiler * c,
struct rc_instruction * begin, struct rc_instruction * end)
{
struct schedule_state s;
+ unsigned int ip;
memset(&s, 0, sizeof(s));
s.C = &c->Base;
/* Scan instructions for data dependencies */
- unsigned int ip = 0;
+ ip = 0;
for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
s.Current = memory_pool_malloc(&c->Base.Pool, sizeof(*s.Current));
memset(s.Current, 0, sizeof(struct schedule_instruction));
@@ -716,12 +728,14 @@ void rc_pair_schedule(struct radeon_compiler *cc, void *user)
struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
struct rc_instruction * inst = c->Base.Program.Instructions.Next;
while(inst != &c->Base.Program.Instructions) {
+ struct rc_instruction * first;
+
if (is_controlflow(inst)) {
inst = inst->Next;
continue;
}
- struct rc_instruction * first = inst;
+ first = inst;
while(inst != &c->Base.Program.Instructions && !is_controlflow(inst))
inst = inst->Next;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
index ff82584466..c549be5218 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
@@ -148,9 +148,12 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c,
struct rc_pair_instruction * pair,
struct rc_sub_instruction * inst)
{
+ int needrgb, needalpha, istranscendent;
+ const struct rc_opcode_info * opcode;
+ int i;
+
memset(pair, 0, sizeof(struct rc_pair_instruction));
- int needrgb, needalpha, istranscendent;
classify_instruction(inst, &needrgb, &needalpha, &istranscendent);
if (needrgb) {
@@ -167,8 +170,7 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c,
pair->Alpha.Saturate = 1;
}
- const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
- int i;
+ opcode = rc_get_opcode_info(inst->Opcode);
/* Presubtract handling:
* We need to make sure that the values used by the presubtract
@@ -330,15 +332,18 @@ void rc_pair_translate(struct radeon_compiler *cc, void *user)
for(struct rc_instruction * inst = c->Base.Program.Instructions.Next;
inst != &c->Base.Program.Instructions;
inst = inst->Next) {
+ const struct rc_opcode_info * opcode;
+ struct rc_sub_instruction copy;
+
if (inst->Type != RC_INSTRUCTION_NORMAL)
continue;
- const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ opcode = rc_get_opcode_info(inst->U.I.Opcode);
if (opcode->HasTexture || opcode->IsFlowControl || opcode->Opcode == RC_OPCODE_KIL)
continue;
- struct rc_sub_instruction copy = inst->U.I;
+ copy = inst->U.I;
check_opcode_support(c, &copy);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
index 4d3e26f28c..39408845d5 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
@@ -874,13 +874,15 @@ int r300_transform_trig_simple(struct radeon_compiler* c,
struct rc_instruction* inst,
void* unused)
{
+ unsigned int constants[2];
+ unsigned int tempreg;
+
if (inst->U.I.Opcode != RC_OPCODE_COS &&
inst->U.I.Opcode != RC_OPCODE_SIN &&
inst->U.I.Opcode != RC_OPCODE_SCS)
return 0;
- unsigned int constants[2];
- unsigned int tempreg = rc_find_free_temporary(c);
+ tempreg = rc_find_free_temporary(c);
sincos_constants(c, constants);
@@ -918,6 +920,8 @@ int r300_transform_trig_simple(struct radeon_compiler* c,
swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)),
constants);
} else {
+ struct rc_dst_register dst;
+
emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY),
swizzle_xxxx(inst->U.I.SrcReg[0]),
swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])),
@@ -929,7 +933,7 @@ int r300_transform_trig_simple(struct radeon_compiler* c,
swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])),
negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0]))));
- struct rc_dst_register dst = inst->U.I.DstReg;
+ dst = inst->U.I.DstReg;
dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_X;
sin_approx(c, inst, dst,
@@ -988,16 +992,16 @@ int radeonTransformTrigScale(struct radeon_compiler* c,
struct rc_instruction* inst,
void* unused)
{
- if (inst->U.I.Opcode != RC_OPCODE_COS &&
- inst->U.I.Opcode != RC_OPCODE_SIN &&
- inst->U.I.Opcode != RC_OPCODE_SCS)
- return 0;
-
static const float RCP_2PI = 0.15915494309189535;
unsigned int temp;
unsigned int constant;
unsigned int constant_swizzle;
+ if (inst->U.I.Opcode != RC_OPCODE_COS &&
+ inst->U.I.Opcode != RC_OPCODE_SIN &&
+ inst->U.I.Opcode != RC_OPCODE_SCS)
+ return 0;
+
temp = rc_find_free_temporary(c);
constant = rc_constants_add_immediate_scalar(&c->Program.Constants, RCP_2PI, &constant_swizzle);
@@ -1020,6 +1024,10 @@ int r300_transform_trig_scale_vertex(struct radeon_compiler *c,
struct rc_instruction *inst,
void *unused)
{
+ static const float cons[4] = {0.15915494309189535, 0.5, 6.28318530717959, -3.14159265358979};
+ unsigned int temp;
+ unsigned int constant;
+
if (inst->U.I.Opcode != RC_OPCODE_COS &&
inst->U.I.Opcode != RC_OPCODE_SIN &&
inst->U.I.Opcode != RC_OPCODE_SCS)
@@ -1030,10 +1038,6 @@ int r300_transform_trig_scale_vertex(struct radeon_compiler *c,
* repeat(x) = frac(x / 2PI + 0.5) * 2PI - PI
*/
- static const float cons[4] = {0.15915494309189535, 0.5, 6.28318530717959, -3.14159265358979};
- unsigned int temp;
- unsigned int constant;
-
temp = rc_find_free_temporary(c);
constant = rc_constants_add_immediate_vec4(&c->Program.Constants, cons);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
index e0061e454b..01cdb15424 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
@@ -55,52 +55,35 @@ struct radeon_compiler;
*/
#define RC_PAIR_PRESUB_SRC 3
-struct radeon_pair_instruction_source {
+struct rc_pair_instruction_source {
unsigned int Used:1;
unsigned int File:3;
unsigned int Index:RC_REGISTER_INDEX_BITS;
};
-struct radeon_pair_instruction_rgb {
- unsigned int Opcode:8;
- unsigned int DestIndex:RC_REGISTER_INDEX_BITS;
- unsigned int WriteMask:3;
- unsigned int Target:2;
- unsigned int OutputWriteMask:3;
- unsigned int Saturate:1;
-
- struct radeon_pair_instruction_source Src[4];
-
- struct {
- unsigned int Source:2;
- unsigned int Swizzle:9;
- unsigned int Abs:1;
- unsigned int Negate:1;
- } Arg[3];
+struct rc_pair_instruction_arg {
+ unsigned int Source:2;
+ unsigned int Swizzle:9;
+ unsigned int Abs:1;
+ unsigned int Negate:1;
};
-struct radeon_pair_instruction_alpha {
+struct rc_pair_sub_instruction {
unsigned int Opcode:8;
unsigned int DestIndex:RC_REGISTER_INDEX_BITS;
- unsigned int WriteMask:1;
- unsigned int Target:2;
- unsigned int OutputWriteMask:1;
+ unsigned int WriteMask:3;
+ unsigned int Target:2;
+ unsigned int OutputWriteMask:3;
unsigned int DepthWriteMask:1;
unsigned int Saturate:1;
- struct radeon_pair_instruction_source Src[4];
-
- struct {
- unsigned int Source:2;
- unsigned int Swizzle:3;
- unsigned int Abs:1;
- unsigned int Negate:1;
- } Arg[3];
+ struct rc_pair_instruction_source Src[4];
+ struct rc_pair_instruction_arg Arg[3];
};
struct rc_pair_instruction {
- struct radeon_pair_instruction_rgb RGB;
- struct radeon_pair_instruction_alpha Alpha;
+ struct rc_pair_sub_instruction RGB;
+ struct rc_pair_sub_instruction Alpha;
unsigned int WriteALUResult:2;
unsigned int ALUResultCompare:3;
@@ -108,7 +91,7 @@ struct rc_pair_instruction {
};
typedef void (*rc_pair_foreach_src_fn)
- (void *, struct radeon_pair_instruction_source *);
+ (void *, struct rc_pair_instruction_source *);
typedef enum {
RC_PAIR_SOURCE_NONE = 0,
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c
index ddce590ee6..530afa5e08 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c
@@ -1,5 +1,6 @@
/*
* Copyright (C) 2010 Corbin Simpson
+ * Copyright (C) 2010 Marek Olšák <maraeo@gmail.com>
*
* All Rights Reserved.
*
@@ -46,31 +47,61 @@ static struct rc_src_register shadow_ambient(struct r300_fragment_program_compil
return reg;
}
-static void lower_texture_rect(struct r300_fragment_program_compiler *compiler,
- struct rc_instruction *inst)
+static void scale_texcoords(struct r300_fragment_program_compiler *compiler,
+ struct rc_instruction *inst,
+ unsigned state_constant)
{
- struct rc_instruction *inst_rect;
+ struct rc_instruction *inst_mov;
+
unsigned temp = rc_find_free_temporary(&compiler->Base);
- if (inst->U.I.TexSrcTarget == RC_TEXTURE_RECT ||
- compiler->state.unit[inst->U.I.TexSrcUnit].non_normalized_coords) {
- inst_rect = rc_insert_new_instruction(&compiler->Base, inst->Prev);
+ inst_mov = rc_insert_new_instruction(&compiler->Base, inst->Prev);
- inst_rect->U.I.Opcode = RC_OPCODE_MUL;
- inst_rect->U.I.DstReg.File = RC_FILE_TEMPORARY;
- inst_rect->U.I.DstReg.Index = temp;
- inst_rect->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
- inst_rect->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
- inst_rect->U.I.SrcReg[1].Index =
- rc_constants_add_state(&compiler->Base.Program.Constants,
- RC_STATE_R300_TEXRECT_FACTOR, inst->U.I.TexSrcUnit);
+ inst_mov->U.I.Opcode = RC_OPCODE_MUL;
+ inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.DstReg.Index = temp;
+ inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+ inst_mov->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
+ inst_mov->U.I.SrcReg[1].Index =
+ rc_constants_add_state(&compiler->Base.Program.Constants,
+ state_constant, inst->U.I.TexSrcUnit);
- reset_srcreg(&inst->U.I.SrcReg[0]);
- inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
- inst->U.I.SrcReg[0].Index = temp;
+ reset_srcreg(&inst->U.I.SrcReg[0]);
+ inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[0].Index = temp;
+}
- inst->U.I.TexSrcTarget = RC_TEXTURE_2D;
- }
+static void projective_divide(struct r300_fragment_program_compiler *compiler,
+ struct rc_instruction *inst)
+{
+ struct rc_instruction *inst_mul, *inst_rcp;
+
+ unsigned temp = rc_find_free_temporary(&compiler->Base);
+
+ inst_rcp = rc_insert_new_instruction(&compiler->Base, inst->Prev);
+ inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
+ inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_rcp->U.I.DstReg.Index = temp;
+ inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
+ inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+ /* Because the input can be arbitrarily swizzled,
+ * read the component mapped to W. */
+ inst_rcp->U.I.SrcReg[0].Swizzle =
+ RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3));
+
+ inst_mul = rc_insert_new_instruction(&compiler->Base, inst->Prev);
+ inst_mul->U.I.Opcode = RC_OPCODE_MUL;
+ inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mul->U.I.DstReg.Index = temp;
+ inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+ inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+ inst_mul->U.I.SrcReg[1].Index = temp;
+ inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
+
+ reset_srcreg(&inst->U.I.SrcReg[0]);
+ inst->U.I.Opcode = RC_OPCODE_TEX;
+ inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[0].Index = temp;
}
/**
@@ -88,6 +119,9 @@ int radeonTransformTEX(
{
struct r300_fragment_program_compiler *compiler =
(struct r300_fragment_program_compiler*)data;
+ rc_wrap_mode wrapmode = compiler->state.unit[inst->U.I.TexSrcUnit].wrap_mode;
+ int is_rect = inst->U.I.TexSrcTarget == RC_TEXTURE_RECT ||
+ compiler->state.unit[inst->U.I.TexSrcUnit].non_normalized_coords;
if (inst->U.I.Opcode != RC_OPCODE_TEX &&
inst->U.I.Opcode != RC_OPCODE_TXB &&
@@ -141,16 +175,18 @@ int radeonTransformTEX(
inst_rcp->U.I.DstReg.Index = tmp_recip_w;
inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
- inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
+ inst_rcp->U.I.SrcReg[0].Swizzle =
+ RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3));
}
- /* Perspective-divide r by W (if it's TXP) and add the texture sample (see below). */
+ /* Perspective-divide Z by W (if it's TXP) and add the texture sample (see below). */
tmp_sum = rc_find_free_temporary(c);
inst_mad = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst);
inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_mad->U.I.DstReg.Index = tmp_sum;
inst_mad->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
- inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_ZZZZ;
+ inst_mad->U.I.SrcReg[0].Swizzle =
+ RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 2));
if (inst->U.I.Opcode == RC_OPCODE_TXP) {
inst_mad->U.I.Opcode = RC_OPCODE_MAD;
inst_mad->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
@@ -206,11 +242,22 @@ int radeonTransformTEX(
}
}
- /* Texture wrap modes don't work on NPOT textures or texrects.
- *
- * The game plan is simple. We have two flags, fake_npot and
- * non_normalized_coords, as well as a tex target. The RECT tex target
- * will make the emitted code use non-scaled texcoords.
+ /* R300 cannot sample from rectangles and the wrap mode fallback needs
+ * normalized coordinates anyway. */
+ if (inst->U.I.Opcode != RC_OPCODE_KIL &&
+ is_rect && (!c->is_r500 || wrapmode != RC_WRAP_NONE)) {
+ scale_texcoords(compiler, inst, RC_STATE_R300_TEXRECT_FACTOR);
+ inst->U.I.TexSrcTarget = RC_TEXTURE_2D;
+ }
+
+ /* Divide by W if needed. */
+ if (inst->U.I.Opcode == RC_OPCODE_TXP &&
+ (wrapmode == RC_WRAP_REPEAT || wrapmode == RC_WRAP_MIRRORED_REPEAT ||
+ compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch)) {
+ projective_divide(compiler, inst);
+ }
+
+ /* Texture wrap modes don't work on NPOT textures.
*
* Non-wrapped/clamped texcoords with NPOT are free in HW. Repeat and
* mirroring are not. If we need to repeat, we do:
@@ -235,128 +282,140 @@ int radeonTransformTEX(
* ~ C & M. ;)
*/
if (inst->U.I.Opcode != RC_OPCODE_KIL &&
- (inst->U.I.TexSrcTarget == RC_TEXTURE_RECT ||
- compiler->state.unit[inst->U.I.TexSrcUnit].fake_npot ||
- compiler->state.unit[inst->U.I.TexSrcUnit].non_normalized_coords)) {
- rc_wrap_mode wrapmode = compiler->state.unit[inst->U.I.TexSrcUnit].wrap_mode;
-
- /* R300 cannot sample from rectangles. */
- if (!c->is_r500) {
- lower_texture_rect(compiler, inst);
- }
-
- if (compiler->state.unit[inst->U.I.TexSrcUnit].fake_npot &&
- wrapmode != RC_WRAP_NONE) {
+ wrapmode != RC_WRAP_NONE) {
+ struct rc_instruction *inst_mov;
+ unsigned temp = rc_find_free_temporary(c);
+
+ if (wrapmode == RC_WRAP_REPEAT) {
+ /* Both instructions will be paired up. */
+ struct rc_instruction *inst_frc = rc_insert_new_instruction(c, inst->Prev);
+
+ inst_frc->U.I.Opcode = RC_OPCODE_FRC;
+ inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_frc->U.I.DstReg.Index = temp;
+ inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+ inst_frc->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+ } else if (wrapmode == RC_WRAP_MIRRORED_REPEAT) {
+ /*
+ * Function:
+ * f(v) = 1 - abs(frac(v * 0.5) * 2 - 1)
+ *
+ * Code:
+ * MUL temp, src0, 0.5
+ * FRC temp, temp
+ * MAD temp, temp, 2, -1
+ * ADD temp, 1, -abs(temp)
+ */
+
+ struct rc_instruction *inst_mul, *inst_frc, *inst_mad, *inst_add;
+ unsigned two, two_swizzle;
+
+ inst_mul = rc_insert_new_instruction(c, inst->Prev);
+
+ inst_mul->U.I.Opcode = RC_OPCODE_MUL;
+ inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mul->U.I.DstReg.Index = temp;
+ inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+ inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+ inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_HHHH;
+
+ inst_frc = rc_insert_new_instruction(c, inst->Prev);
+
+ inst_frc->U.I.Opcode = RC_OPCODE_FRC;
+ inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_frc->U.I.DstReg.Index = temp;
+ inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+ inst_frc->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst_frc->U.I.SrcReg[0].Index = temp;
+ inst_frc->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
+
+ two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2, &two_swizzle);
+ inst_mad = rc_insert_new_instruction(c, inst->Prev);
+
+ inst_mad->U.I.Opcode = RC_OPCODE_MAD;
+ inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mad->U.I.DstReg.Index = temp;
+ inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+ inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst_mad->U.I.SrcReg[0].Index = temp;
+ inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
+ inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
+ inst_mad->U.I.SrcReg[1].Index = two;
+ inst_mad->U.I.SrcReg[1].Swizzle = two_swizzle;
+ inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_1111;
+ inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZ;
+
+ inst_add = rc_insert_new_instruction(c, inst->Prev);
+
+ inst_add->U.I.Opcode = RC_OPCODE_ADD;
+ inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_add->U.I.DstReg.Index = temp;
+ inst_add->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+ inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
+ inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+ inst_add->U.I.SrcReg[1].Index = temp;
+ inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;
+ inst_add->U.I.SrcReg[1].Abs = 1;
+ inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZ;
+ } else if (wrapmode == RC_WRAP_MIRRORED_CLAMP) {
+ /*
+ * Mirrored clamp modes are bloody simple, we just use abs
+ * to mirror [0, 1] into [-1, 0]. This works for
+ * all modes i.e. CLAMP, CLAMP_TO_EDGE, and CLAMP_TO_BORDER.
+ */
struct rc_instruction *inst_mov;
- unsigned temp = rc_find_free_temporary(c);
-
- /* For NPOT fallback, we need normalized coordinates anyway. */
- if (c->is_r500) {
- lower_texture_rect(compiler, inst);
- }
-
- if (wrapmode == RC_WRAP_REPEAT) {
- /* Both instructions will be paired up. */
- struct rc_instruction *inst_frc = rc_insert_new_instruction(c, inst->Prev);
-
- inst_frc->U.I.Opcode = RC_OPCODE_FRC;
- inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY;
- inst_frc->U.I.DstReg.Index = temp;
- inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ;
- inst_frc->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
- } else if (wrapmode == RC_WRAP_MIRRORED_REPEAT) {
- /*
- * Function:
- * f(v) = 1 - abs(frac(v * 0.5) * 2 - 1)
- *
- * Code:
- * MUL temp, src0, 0.5
- * FRC temp, temp
- * MAD temp, temp, 2, -1
- * ADD temp, 1, -abs(temp)
- */
-
- struct rc_instruction *inst_mul, *inst_frc, *inst_mad, *inst_add;
- unsigned two, two_swizzle;
-
- inst_mul = rc_insert_new_instruction(c, inst->Prev);
-
- inst_mul->U.I.Opcode = RC_OPCODE_MUL;
- inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
- inst_mul->U.I.DstReg.Index = temp;
- inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;
- inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
- inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_HHHH;
-
- inst_frc = rc_insert_new_instruction(c, inst->Prev);
-
- inst_frc->U.I.Opcode = RC_OPCODE_FRC;
- inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY;
- inst_frc->U.I.DstReg.Index = temp;
- inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ;
- inst_frc->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
- inst_frc->U.I.SrcReg[0].Index = temp;
- inst_frc->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
-
- two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2, &two_swizzle);
- inst_mad = rc_insert_new_instruction(c, inst->Prev);
-
- inst_mad->U.I.Opcode = RC_OPCODE_MAD;
- inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
- inst_mad->U.I.DstReg.Index = temp;
- inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;
- inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
- inst_mad->U.I.SrcReg[0].Index = temp;
- inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
- inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
- inst_mad->U.I.SrcReg[1].Index = two;
- inst_mad->U.I.SrcReg[1].Swizzle = two_swizzle;
- inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_1111;
- inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZ;
-
- inst_add = rc_insert_new_instruction(c, inst->Prev);
-
- inst_add->U.I.Opcode = RC_OPCODE_ADD;
- inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
- inst_add->U.I.DstReg.Index = temp;
- inst_add->U.I.DstReg.WriteMask = RC_MASK_XYZ;
- inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
- inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
- inst_add->U.I.SrcReg[1].Index = temp;
- inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;
- inst_add->U.I.SrcReg[1].Abs = 1;
- inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZ;
- } else if (wrapmode == RC_WRAP_MIRRORED_CLAMP) {
- /*
- * Mirrored clamp modes are bloody simple, we just use abs
- * to mirror [0, 1] into [-1, 0]. This works for
- * all modes i.e. CLAMP, CLAMP_TO_EDGE, and CLAMP_TO_BORDER.
- */
- struct rc_instruction *inst_mov;
-
- inst_mov = rc_insert_new_instruction(c, inst->Prev);
-
- inst_mov->U.I.Opcode = RC_OPCODE_MOV;
- inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
- inst_mov->U.I.DstReg.Index = temp;
- inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ;
- inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
- inst_mov->U.I.SrcReg[0].Abs = 1;
- }
- /* Preserve W for TXP/TXB. */
inst_mov = rc_insert_new_instruction(c, inst->Prev);
inst_mov->U.I.Opcode = RC_OPCODE_MOV;
inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_mov->U.I.DstReg.Index = temp;
- inst_mov->U.I.DstReg.WriteMask = RC_MASK_W;
+ inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ;
inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
-
- reset_srcreg(&inst->U.I.SrcReg[0]);
- inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
- inst->U.I.SrcReg[0].Index = temp;
+ inst_mov->U.I.SrcReg[0].Abs = 1;
}
+
+ /* Preserve W for TXP/TXB. */
+ inst_mov = rc_insert_new_instruction(c, inst->Prev);
+
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.DstReg.Index = temp;
+ inst_mov->U.I.DstReg.WriteMask = RC_MASK_W;
+ inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+
+ reset_srcreg(&inst->U.I.SrcReg[0]);
+ inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[0].Index = temp;
+ }
+
+ if (inst->U.I.Opcode != RC_OPCODE_KIL &&
+ compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch) {
+ struct rc_instruction *inst_mov;
+ unsigned temp = rc_find_free_temporary(c);
+
+ /* Saturate XYZ. */
+ inst_mov = rc_insert_new_instruction(c, inst->Prev);
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.SaturateMode = RC_SATURATE_ZERO_ONE;
+ inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.DstReg.Index = temp;
+ inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+ inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+
+ /* Copy W. */
+ inst_mov = rc_insert_new_instruction(c, inst->Prev);
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.DstReg.Index = temp;
+ inst_mov->U.I.DstReg.WriteMask = RC_MASK_W;
+ inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+
+ reset_srcreg(&inst->U.I.SrcReg[0]);
+ inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[0].Index = temp;
+
+ scale_texcoords(compiler, inst, RC_STATE_R300_TEXSCALE_FACTOR);
}
/* Cannot write texture to output registers (all chips) or with masks (non-r500) */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c b/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c
index d111319d3d..d6c808ad81 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c
@@ -28,107 +28,105 @@
#include "radeon_remove_constants.h"
#include "radeon_dataflow.h"
+struct mark_used_data {
+ unsigned char * const_used;
+ unsigned * has_rel_addr;
+};
+
static void remap_regs(void * userdata, struct rc_instruction * inst,
rc_register_file * pfile, unsigned int * pindex)
{
- unsigned *inv_remap_table = userdata;
+ unsigned *inv_remap_table = userdata;
+
+ if (*pfile == RC_FILE_CONSTANT) {
+ *pindex = inv_remap_table[*pindex];
+ }
+}
+
+static void mark_used(void * userdata, struct rc_instruction * inst,
+ struct rc_src_register * src)
+{
+ struct mark_used_data * d = userdata;
- if (*pfile == RC_FILE_CONSTANT) {
- *pindex = inv_remap_table[*pindex];
- }
+ if (src->File == RC_FILE_CONSTANT) {
+ if (src->RelAddr) {
+ *d->has_rel_addr = 1;
+ } else {
+ d->const_used[src->Index] = 1;
+ }
+ }
}
void rc_remove_unused_constants(struct radeon_compiler *c, void *user)
{
unsigned **out_remap_table = (unsigned**)user;
- unsigned char *const_used;
- unsigned *remap_table;
- unsigned *inv_remap_table;
- unsigned has_rel_addr = 0;
- unsigned is_identity = 1;
+ unsigned char *const_used;
+ unsigned *remap_table;
+ unsigned *inv_remap_table;
+ unsigned has_rel_addr = 0;
+ unsigned is_identity = 1;
unsigned are_externals_remapped = 0;
- struct rc_constant *constants = c->Program.Constants.Constants;
+ struct rc_constant *constants = c->Program.Constants.Constants;
+ struct mark_used_data d;
+ unsigned new_count;
+
+ if (!c->Program.Constants.Count) {
+ *out_remap_table = NULL;
+ return;
+ }
- if (!c->Program.Constants.Count) {
- *out_remap_table = NULL;
- return;
- }
+ const_used = malloc(c->Program.Constants.Count);
+ memset(const_used, 0, c->Program.Constants.Count);
- const_used = malloc(c->Program.Constants.Count);
- memset(const_used, 0, c->Program.Constants.Count);
+ d.const_used = const_used;
+ d.has_rel_addr = &has_rel_addr;
/* Pass 1: Mark used constants. */
- for (struct rc_instruction *inst = c->Program.Instructions.Next;
- inst != &c->Program.Instructions; inst = inst->Next) {
- const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
-
- /* XXX: This loop and the if statement after it should be
- * replaced by a call to one of the rc_for_all_reads_* functions.
- * The reason it does not use one of those functions now is
- * because none of them have RelAddr as an argument. */
- for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
- if (inst->U.I.SrcReg[i].File == RC_FILE_CONSTANT) {
- if (inst->U.I.SrcReg[i].RelAddr) {
- has_rel_addr = 1;
- } else {
- const_used[inst->U.I.SrcReg[i].Index] = 1;
- }
- }
- }
- if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) {
- unsigned int i;
- unsigned int srcp_regs = rc_presubtract_src_reg_count(
- inst->U.I.PreSub.Opcode);
- for( i = 0; i < srcp_regs; i++) {
- if (inst->U.I.PreSub.SrcReg[i].File ==
- RC_FILE_CONSTANT) {
- const_used[
- inst->U.I.PreSub.SrcReg[i].Index] = 1;
- }
- }
- }
- }
+ for (struct rc_instruction *inst = c->Program.Instructions.Next;
+ inst != &c->Program.Instructions; inst = inst->Next) {
+ rc_for_all_reads_src(inst, mark_used, &d);
+ }
- /* Pass 2: If there is relative addressing, mark all externals as used. */
- if (has_rel_addr) {
- for (unsigned i = 0; i < c->Program.Constants.Count; i++)
- if (constants[i].Type == RC_CONSTANT_EXTERNAL)
- const_used[i] = 1;
- }
+ /* Pass 2: If there is relative addressing, mark all externals as used. */
+ if (has_rel_addr) {
+ for (unsigned i = 0; i < c->Program.Constants.Count; i++)
+ if (constants[i].Type == RC_CONSTANT_EXTERNAL)
+ const_used[i] = 1;
+ }
- /* Pass 3: Make the remapping table and remap constants.
+ /* Pass 3: Make the remapping table and remap constants.
* This pass removes unused constants simply by overwriting them by other constants. */
- remap_table = malloc(c->Program.Constants.Count * sizeof(unsigned));
- inv_remap_table = malloc(c->Program.Constants.Count * sizeof(unsigned));
- unsigned new_count = 0;
+ remap_table = malloc(c->Program.Constants.Count * sizeof(unsigned));
+ inv_remap_table = malloc(c->Program.Constants.Count * sizeof(unsigned));
+ new_count = 0;
- for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
- if (const_used[i]) {
- remap_table[new_count] = i;
- inv_remap_table[i] = new_count;
+ for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
+ if (const_used[i]) {
+ remap_table[new_count] = i;
+ inv_remap_table[i] = new_count;
- if (i != new_count) {
+ if (i != new_count) {
if (constants[i].Type == RC_CONSTANT_EXTERNAL)
are_externals_remapped = 1;
constants[new_count] = constants[i];
- is_identity = 0;
- }
- new_count++;
- }
- }
+ is_identity = 0;
+ }
+ new_count++;
+ }
+ }
/* is_identity ==> new_count == old_count
* !is_identity ==> new_count < old_count */
assert( is_identity || new_count < c->Program.Constants.Count);
assert(!(has_rel_addr && are_externals_remapped));
- /* Pass 4: Redirect reads of all constants to their new locations. */
- if (!is_identity) {
- for (struct rc_instruction *inst = c->Program.Instructions.Next;
- inst != &c->Program.Instructions; inst = inst->Next) {
- rc_remap_registers(inst, remap_regs, inv_remap_table);
- }
+ /* Pass 4: Redirect reads of all constants to their new locations. */
+ if (!is_identity) {
+ for (struct rc_instruction *inst = c->Program.Instructions.Next;
+ inst != &c->Program.Instructions; inst = inst->Next) {
+ rc_remap_registers(inst, remap_regs, inv_remap_table);
+ }
}
@@ -138,12 +136,15 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user)
c->Program.Constants.Count = new_count;
if (are_externals_remapped) {
- *out_remap_table = remap_table;
- } else {
- *out_remap_table = NULL;
- free(remap_table);
- }
-
- free(const_used);
- free(inv_remap_table);
+ *out_remap_table = remap_table;
+ } else {
+ *out_remap_table = NULL;
+ free(remap_table);
+ }
+
+ free(const_used);
+ free(inv_remap_table);
+
+ if (c->Debug)
+ rc_constants_print(&c->Program.Constants);
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c
index 31d25f9ab8..60e228be5b 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c
@@ -104,13 +104,14 @@ void rc_rename_regs(struct radeon_compiler *c, void *user)
inst != &c->Program.Instructions;
inst = inst->Next) {
const struct rc_opcode_info * info;
+ unsigned int old_index, temp_index;
+ struct rc_dst_register * dst;
if(inst->Type != RC_INSTRUCTION_NORMAL) {
rc_error(c, "%s only works with normal instructions.",
__FUNCTION__);
return;
}
- unsigned int old_index, temp_index;
- struct rc_dst_register * dst = &inst->U.I.DstReg;
+ dst = &inst->U.I.DstReg;
info = rc_get_opcode_info(inst->U.I.Opcode);
if(!info->HasDstReg || dst->File != RC_FILE_TEMPORARY) {
continue;