summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/r300
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/r300')
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c3
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c6
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c38
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c25
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog.c1
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c8
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_code.h2
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler.c103
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler.h23
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c207
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h31
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c381
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h18
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c1
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c12
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h3
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_optimize.c73
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c31
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c364
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c5
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program.c115
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program.h53
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c165
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h5
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c50
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h10
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_print.c1
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c2
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c8
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c103
-rw-r--r--src/mesa/drivers/dri/r300/r300_texstate.c21
31 files changed, 1330 insertions, 538 deletions
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
index 8be32ea91f..1db8678e89 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
@@ -76,6 +76,9 @@ static void use_temporary(struct r300_fragment_program_code *code, unsigned int
static unsigned int use_source(struct r300_fragment_program_code* code, struct rc_pair_instruction_source src)
{
+ if (!src.Used)
+ return 0;
+
if (src.File == RC_FILE_CONSTANT) {
return src.Index | (1 << 5);
} else if (src.File == RC_FILE_TEMPORARY) {
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
index 2d28b06539..05d3da8a10 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
@@ -94,6 +94,7 @@ static const struct swizzle_data* lookup_native_swizzle(unsigned int swizzle)
*/
static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
{
+ const struct swizzle_data* sd;
unsigned int relevant;
int j;
@@ -127,7 +128,8 @@ static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant))
return 0;
- if (!lookup_native_swizzle(reg.Swizzle))
+ sd = lookup_native_swizzle(reg.Swizzle);
+ if (!sd || (reg.File == RC_FILE_PRESUB && sd->srcp_stride == 0))
return 0;
return 1;
@@ -201,7 +203,7 @@ unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle)
{
const struct swizzle_data* sd = lookup_native_swizzle(swizzle);
- if (!sd) {
+ if (!sd || (src == RC_PAIR_PRESUB_SRC && sd->srcp_stride == 0)) {
fprintf(stderr, "Not a native swizzle: %08x\n", swizzle);
return 0;
}
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
index 2f130198d3..e0d349b98c 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
@@ -24,6 +24,7 @@
#include <stdio.h>
+#include "radeon_compiler_util.h"
#include "radeon_dataflow.h"
#include "radeon_emulate_branches.h"
#include "radeon_emulate_loops.h"
@@ -54,6 +55,8 @@ static void rc_rewrite_depth_out(struct radeon_compiler *cc, void *user)
for (rci = c->Base.Program.Instructions.Next; rci != &c->Base.Program.Instructions; rci = rci->Next) {
struct rc_sub_instruction * inst = &rci->U.I;
+ unsigned i;
+ const struct rc_opcode_info *info = rc_get_opcode_info(inst->Opcode);
if (inst->DstReg.File != RC_FILE_OUTPUT || inst->DstReg.Index != c->OutputDepth)
continue;
@@ -65,27 +68,12 @@ static void rc_rewrite_depth_out(struct radeon_compiler *cc, void *user)
continue;
}
- switch (inst->Opcode) {
- case RC_OPCODE_FRC:
- case RC_OPCODE_MOV:
- inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]);
- break;
- case RC_OPCODE_ADD:
- case RC_OPCODE_MAX:
- case RC_OPCODE_MIN:
- case RC_OPCODE_MUL:
- inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]);
- inst->SrcReg[1] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[1]);
- break;
- case RC_OPCODE_CMP:
- case RC_OPCODE_MAD:
- inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]);
- inst->SrcReg[1] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[1]);
- inst->SrcReg[2] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[2]);
- break;
- default:
- // Scalar instructions needn't be reswizzled
- break;
+ if (!info->IsComponentwise) {
+ continue;
+ }
+
+ for (i = 0; i < info->NumSrcRegs; i++) {
+ inst->SrcReg[i] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[i]);
}
}
}
@@ -93,7 +81,6 @@ static void rc_rewrite_depth_out(struct radeon_compiler *cc, void *user)
void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
{
int is_r500 = c->Base.is_r500;
- int kill_consts = c->Base.remove_unused_constants;
int opt = !c->Base.disable_optimizations;
/* Lists of instruction transformations. */
@@ -133,11 +120,11 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
{"emulate loops", 1, !is_r500, rc_emulate_loops, NULL},
{"dataflow optimize", 1, opt, rc_optimize, NULL},
{"dataflow swizzles", 1, 1, rc_dataflow_swizzles, NULL},
- {"dead constants", 1, kill_consts, rc_remove_unused_constants, &c->code->constants_remap_table},
+ {"dead constants", 1, 1, rc_remove_unused_constants, &c->code->constants_remap_table},
/* This pass makes it easier for the scheduler to group TEX
* instructions and reduces the chances of creating too
* many texture indirections.*/
- {"register rename", 1, !is_r500, rc_rename_regs, NULL},
+ {"register rename", 1, !is_r500 || opt, rc_rename_regs, NULL},
{"pair translate", 1, 1, rc_pair_translate, NULL},
{"pair scheduling", 1, 1, rc_pair_schedule, NULL},
{"register allocation", 1, opt, rc_pair_regalloc, NULL},
@@ -150,9 +137,10 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
{NULL, 0, 0, NULL, NULL}
};
+ c->Base.type = RC_FRAGMENT_PROGRAM;
c->Base.SwizzleCaps = c->Base.is_r500 ? &r500_swizzle_caps : &r300_swizzle_caps;
- rc_run_compiler(&c->Base, fs_list, "Fragment Program");
+ rc_run_compiler(&c->Base, fs_list);
rc_constants_copy(&c->code->constants, &c->Base.Program.Constants);
}
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
index bf8341f017..472029f63d 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
@@ -26,6 +26,7 @@
#include "../r300_reg.h"
+#include "radeon_compiler_util.h"
#include "radeon_dataflow.h"
#include "radeon_program_alu.h"
#include "radeon_swizzle.h"
@@ -790,19 +791,14 @@ static void allocate_temporary_registers(struct radeon_compiler *c, void *user)
if (!hwtemps[j])
break;
}
- if (j >= c->max_temp_regs) {
- rc_error(c, "Too many temporaries\n");
- return;
+ ta[orig].Allocated = 1;
+ if (last_inst_src_reladdr &&
+ last_inst_src_reladdr->IP > inst->IP) {
+ ta[orig].HwTemp = orig;
} else {
- ta[orig].Allocated = 1;
- if (last_inst_src_reladdr &&
- last_inst_src_reladdr->IP > inst->IP) {
- ta[orig].HwTemp = orig;
- } else {
- ta[orig].HwTemp = j;
- }
- hwtemps[ta[orig].HwTemp] = 1;
+ ta[orig].HwTemp = j;
}
+ hwtemps[ta[orig].HwTemp] = 1;
}
inst->U.I.DstReg.Index = ta[orig].HwTemp;
@@ -1018,7 +1014,6 @@ static struct rc_swizzle_caps r300_vertprog_swizzle_caps = {
void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c)
{
int is_r500 = c->Base.is_r500;
- int kill_consts = c->Base.remove_unused_constants;
int opt = !c->Base.disable_optimizations;
/* Lists of instruction transformations. */
@@ -1062,18 +1057,18 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c)
{"dataflow optimize", 1, opt, rc_optimize, NULL},
/* This pass must be done after optimizations. */
{"source conflict resolve", 1, 1, rc_local_transform, resolve_src_conflicts},
- {"dataflow swizzles", 1, 1, rc_dataflow_swizzles, NULL},
{"register allocation", 1, opt, allocate_temporary_registers, NULL},
- {"dead constants", 1, kill_consts, rc_remove_unused_constants, &c->code->constants_remap_table},
+ {"dead constants", 1, 1, rc_remove_unused_constants, &c->code->constants_remap_table},
{"final code validation", 0, 1, rc_validate_final_shader, NULL},
{"machine code generation", 0, 1, translate_vertex_program, NULL},
{"dump machine code", 0, c->Base.Debug & RC_DBG_LOG, r300_vertex_program_dump, NULL},
{NULL, 0, 0, NULL, NULL}
};
+ c->Base.type = RC_VERTEX_PROGRAM;
c->Base.SwizzleCaps = &r300_vertprog_swizzle_caps;
- rc_run_compiler(&c->Base, vs_list, "Vertex Program");
+ rc_run_compiler(&c->Base, vs_list);
c->code->InputsRead = c->Base.Program.InputsRead;
c->code->OutputsWritten = c->Base.Program.OutputsWritten;
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
index 289bb87ae5..ef81be48f7 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
@@ -29,6 +29,7 @@
#include <stdio.h>
+#include "radeon_compiler_util.h"
#include "../r300_reg.h"
/**
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
index 6f101c68eb..5da82d90f6 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
@@ -45,9 +45,6 @@
#include "radeon_program_pair.h"
-#define MAX_BRANCH_DEPTH_FULL 32
-#define MAX_BRANCH_DEPTH_PARTIAL 4
-
#define PROG_CODE \
struct r500_fragment_program_code *code = &c->code->code.r500
@@ -200,6 +197,9 @@ static void use_temporary(struct r500_fragment_program_code* code, unsigned int
static unsigned int use_source(struct r500_fragment_program_code* code, struct rc_pair_instruction_source src)
{
+ if (!src.Used)
+ return 0;
+
if (src.File == RC_FILE_CONSTANT) {
return src.Index | 0x100;
} else if (src.File == RC_FILE_TEMPORARY) {
@@ -506,7 +506,7 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst
break;
}
case RC_OPCODE_IF:
- if ( s->CurrentBranchDepth >= MAX_BRANCH_DEPTH_FULL) {
+ if ( s->CurrentBranchDepth >= R500_PFS_MAX_BRANCH_DEPTH_FULL) {
rc_error(s->C, "Branch depth exceeds hardware limit");
return;
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
index cfb6df2cd7..b69e81698a 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
@@ -34,6 +34,8 @@
#define R500_PFS_MAX_INST 512
#define R500_PFS_NUM_TEMP_REGS 128
#define R500_PFS_NUM_CONST_REGS 256
+#define R500_PFS_MAX_BRANCH_DEPTH_FULL 32
+#define R500_PFS_MAX_BRANCH_DEPTH_PARTIAL 4
#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0)
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
index 4286baed0c..65548604bc 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
@@ -29,6 +29,7 @@
#include "radeon_dataflow.h"
#include "radeon_program.h"
#include "radeon_program_pair.h"
+#include "radeon_compiler_util.h"
void rc_init(struct radeon_compiler * c)
@@ -356,66 +357,92 @@ void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face)
static void reg_count_callback(void * userdata, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int mask)
{
- unsigned int * max_reg = userdata;
+ int *max_reg = userdata;
if (file == RC_FILE_TEMPORARY)
- index > *max_reg ? *max_reg = index : 0;
+ (int)index > *max_reg ? *max_reg = index : 0;
}
-static void print_stats(struct radeon_compiler * c)
+void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s)
{
+ int max_reg = -1;
struct rc_instruction * tmp;
- unsigned max_reg, insts, fc, tex, alpha, rgb, presub;
- max_reg = insts = fc = tex = alpha = rgb = presub = 0;
+ memset(s, 0, sizeof(*s));
+
for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions;
tmp = tmp->Next){
const struct rc_opcode_info * info;
rc_for_all_reads_mask(tmp, reg_count_callback, &max_reg);
if (tmp->Type == RC_INSTRUCTION_NORMAL) {
if (tmp->U.I.PreSub.Opcode != RC_PRESUB_NONE)
- presub++;
+ s->num_presub_ops++;
info = rc_get_opcode_info(tmp->U.I.Opcode);
} else {
if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used)
- presub++;
+ s->num_presub_ops++;
if (tmp->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
- presub++;
+ s->num_presub_ops++;
/* Assuming alpha will never be a flow control or
* a tex instruction. */
if (tmp->U.P.Alpha.Opcode != RC_OPCODE_NOP)
- alpha++;
+ s->num_alpha_insts++;
if (tmp->U.P.RGB.Opcode != RC_OPCODE_NOP)
- rgb++;
+ s->num_rgb_insts++;
info = rc_get_opcode_info(tmp->U.P.RGB.Opcode);
}
if (info->IsFlowControl)
- fc++;
+ s->num_fc_insts++;
if (info->HasTexture)
- tex++;
- insts++;
+ s->num_tex_insts++;
+ s->num_insts++;
}
- if (insts < 4)
- return;
- fprintf(stderr,"~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"
- "~%4u Instructions\n"
- "~%4u Vector Instructions (RGB)\n"
- "~%4u Scalar Instructions (Alpha)\n"
- "~%4u Flow Control Instructions\n"
- "~%4u Texture Instructions\n"
- "~%4u Presub Operations\n"
- "~%4u Temporary Registers\n"
- "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n",
- insts, rgb, alpha, fc, tex, presub, max_reg + 1);
+ s->num_temp_regs = max_reg + 1;
}
-/* Executes a list of compiler passes given in the parameter 'list'. */
-void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list,
- const char *shader_name)
+static void print_stats(struct radeon_compiler * c)
{
- if (c->Debug & RC_DBG_LOG) {
- fprintf(stderr, "%s: before compilation\n", shader_name);
- rc_print_program(&c->Program);
+ struct rc_program_stats s;
+
+ rc_get_stats(c, &s);
+
+ if (s.num_insts < 4)
+ return;
+
+ switch (c->type) {
+ case RC_VERTEX_PROGRAM:
+ fprintf(stderr,"~~~~~~~~~ VERTEX PROGRAM ~~~~~~~~\n"
+ "~%4u Instructions\n"
+ "~%4u Flow Control Instructions\n"
+ "~%4u Temporary Registers\n"
+ "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n",
+ s.num_insts, s.num_fc_insts, s.num_temp_regs);
+ break;
+
+ case RC_FRAGMENT_PROGRAM:
+ fprintf(stderr,"~~~~~~~~ FRAGMENT PROGRAM ~~~~~~~\n"
+ "~%4u Instructions\n"
+ "~%4u Vector Instructions (RGB)\n"
+ "~%4u Scalar Instructions (Alpha)\n"
+ "~%4u Flow Control Instructions\n"
+ "~%4u Texture Instructions\n"
+ "~%4u Presub Operations\n"
+ "~%4u Temporary Registers\n"
+ "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n",
+ s.num_insts, s.num_rgb_insts, s.num_alpha_insts,
+ s.num_fc_insts, s.num_tex_insts, s.num_presub_ops,
+ s.num_temp_regs);
+ break;
+ default:
+ assert(0);
}
+}
+static const char *shader_name[RC_NUM_PROGRAM_TYPES] = {
+ "Vertex Program",
+ "Fragment Program"
+};
+
+void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list)
+{
for (unsigned i = 0; list[i].name; i++) {
if (list[i].predicate) {
list[i].run(c, list[i].user);
@@ -424,11 +451,23 @@ void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *lis
return;
if ((c->Debug & RC_DBG_LOG) && list[i].dump) {
- fprintf(stderr, "%s: after '%s'\n", shader_name, list[i].name);
+ fprintf(stderr, "%s: after '%s'\n", shader_name[c->type], list[i].name);
rc_print_program(&c->Program);
}
}
}
+}
+
+/* Executes a list of compiler passes given in the parameter 'list'. */
+void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list)
+{
+ if (c->Debug & RC_DBG_LOG) {
+ fprintf(stderr, "%s: before compilation\n", shader_name[c->type]);
+ rc_print_program(&c->Program);
+ }
+
+ rc_run_compiler_passes(c, list);
+
if (c->Debug & RC_DBG_STATS)
print_stats(c);
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
index 31fd469a04..e663339589 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
@@ -35,9 +35,16 @@
struct rc_swizzle_caps;
+enum rc_program_type {
+ RC_VERTEX_PROGRAM,
+ RC_FRAGMENT_PROGRAM,
+ RC_NUM_PROGRAM_TYPES
+};
+
struct radeon_compiler {
struct memory_pool Pool;
struct rc_program Program;
+ enum rc_program_type type;
unsigned Debug:2;
unsigned Error:1;
char * ErrorMsg;
@@ -140,9 +147,21 @@ struct radeon_compiler_pass {
void *user; /* Optional parameter which is passed to the run function. */
};
+struct rc_program_stats {
+ unsigned num_insts;
+ unsigned num_fc_insts;
+ unsigned num_tex_insts;
+ unsigned num_rgb_insts;
+ unsigned num_alpha_insts;
+ unsigned num_presub_ops;
+ unsigned num_temp_regs;
+};
+
+void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s);
+
/* Executes a list of compiler passes given in the parameter 'list'. */
-void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list,
- const char *shader_name);
+void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list);
+void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list);
void rc_validate_final_shader(struct radeon_compiler *c, void *user);
#endif /* RADEON_COMPILER_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c
index 97f4c75849..bf393a9fb1 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c
@@ -31,6 +31,8 @@
#include "radeon_compiler_util.h"
+#include "radeon_compiler.h"
+#include "radeon_dataflow.h"
/**
*/
unsigned int rc_swizzle_to_writemask(unsigned int swz)
@@ -46,6 +48,91 @@ unsigned int rc_swizzle_to_writemask(unsigned int swz)
return mask;
}
+rc_swizzle get_swz(unsigned int swz, rc_swizzle idx)
+{
+ if (idx & 0x4)
+ return idx;
+ return GET_SWZ(swz, idx);
+}
+
+unsigned int combine_swizzles4(unsigned int src,
+ rc_swizzle swz_x, rc_swizzle swz_y, rc_swizzle swz_z, rc_swizzle swz_w)
+{
+ unsigned int ret = 0;
+
+ ret |= get_swz(src, swz_x);
+ ret |= get_swz(src, swz_y) << 3;
+ ret |= get_swz(src, swz_z) << 6;
+ ret |= get_swz(src, swz_w) << 9;
+
+ return ret;
+}
+
+unsigned int combine_swizzles(unsigned int src, unsigned int swz)
+{
+ unsigned int ret = 0;
+
+ ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_X));
+ ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Y)) << 3;
+ ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Z)) << 6;
+ ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_W)) << 9;
+
+ return ret;
+}
+
+/**
+ * @param mask Must be either RC_MASK_X, RC_MASK_Y, RC_MASK_Z, or RC_MASK_W
+ */
+rc_swizzle rc_mask_to_swizzle(unsigned int mask)
+{
+ switch (mask) {
+ case RC_MASK_X: return RC_SWIZZLE_X;
+ case RC_MASK_Y: return RC_SWIZZLE_Y;
+ case RC_MASK_Z: return RC_SWIZZLE_Z;
+ case RC_MASK_W: return RC_SWIZZLE_W;
+ }
+ return RC_SWIZZLE_UNUSED;
+}
+
+/* Reorder mask bits according to swizzle. */
+unsigned swizzle_mask(unsigned swizzle, unsigned mask)
+{
+ unsigned ret = 0;
+ for (unsigned chan = 0; chan < 4; ++chan) {
+ unsigned swz = GET_SWZ(swizzle, chan);
+ if (swz < 4)
+ ret |= GET_BIT(mask, swz) << chan;
+ }
+ return ret;
+}
+
+/**
+ * Left multiplication of a register with a swizzle
+ */
+struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg)
+{
+ struct rc_src_register tmp = srcreg;
+ int i;
+ tmp.Swizzle = 0;
+ tmp.Negate = 0;
+ for(i = 0; i < 4; ++i) {
+ rc_swizzle swz = GET_SWZ(swizzle, i);
+ if (swz < 4) {
+ tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3);
+ tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i;
+ } else {
+ tmp.Swizzle |= swz << (i*3);
+ }
+ }
+ return tmp;
+}
+
+void reset_srcreg(struct rc_src_register* reg)
+{
+ memset(reg, 0, sizeof(struct rc_src_register));
+ reg->Swizzle = RC_SWIZZLE_XYZW;
+}
+
unsigned int rc_src_reads_dst_mask(
rc_register_file src_file,
unsigned int src_idx,
@@ -59,3 +146,123 @@ unsigned int rc_src_reads_dst_mask(
}
return dst_mask & rc_swizzle_to_writemask(src_swz);
}
+
+unsigned int rc_source_type_swz(unsigned int swizzle, unsigned int channels)
+{
+ unsigned int chan;
+ unsigned int swz = RC_SWIZZLE_UNUSED;
+ unsigned int ret = RC_SOURCE_NONE;
+
+ for(chan = 0; chan < channels; chan++) {
+ swz = GET_SWZ(swizzle, chan);
+ if (swz == RC_SWIZZLE_W) {
+ ret |= RC_SOURCE_ALPHA;
+ } else if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y
+ || swz == RC_SWIZZLE_Z) {
+ ret |= RC_SOURCE_RGB;
+ }
+ }
+ return ret;
+}
+
+unsigned int rc_source_type_mask(unsigned int mask)
+{
+ unsigned int ret = RC_SOURCE_NONE;
+
+ if (mask & RC_MASK_XYZ)
+ ret |= RC_SOURCE_RGB;
+
+ if (mask & RC_MASK_W)
+ ret |= RC_SOURCE_ALPHA;
+
+ return ret;
+}
+
+struct can_use_presub_data {
+ struct rc_src_register RemoveSrcs[3];
+ unsigned int RGBCount;
+ unsigned int AlphaCount;
+};
+
+static void can_use_presub_read_cb(
+ void * userdata,
+ struct rc_instruction * inst,
+ rc_register_file file,
+ unsigned int index,
+ unsigned int mask)
+{
+ struct can_use_presub_data * d = userdata;
+ unsigned int src_type = rc_source_type_mask(mask);
+ unsigned int i;
+
+ if (file == RC_FILE_NONE)
+ return;
+
+ for(i = 0; i < 3; i++) {
+ if (d->RemoveSrcs[i].File == file
+ && d->RemoveSrcs[i].Index == index) {
+ src_type &=
+ ~rc_source_type_swz(d->RemoveSrcs[i].Swizzle, 4);
+ }
+ }
+
+ if (src_type & RC_SOURCE_RGB)
+ d->RGBCount++;
+
+ if (src_type & RC_SOURCE_ALPHA)
+ d->AlphaCount++;
+}
+
+unsigned int rc_inst_can_use_presub(
+ struct rc_instruction * inst,
+ rc_presubtract_op presub_op,
+ unsigned int presub_writemask,
+ struct rc_src_register replace_reg,
+ struct rc_src_register presub_src0,
+ struct rc_src_register presub_src1)
+{
+ struct can_use_presub_data d;
+ unsigned int num_presub_srcs;
+ unsigned int presub_src_type = rc_source_type_mask(presub_writemask);
+ const struct rc_opcode_info * info =
+ rc_get_opcode_info(inst->U.I.Opcode);
+
+ if (presub_op == RC_PRESUB_NONE) {
+ return 1;
+ }
+
+ if (info->HasTexture) {
+ return 0;
+ }
+
+ /* We can't use more than one presubtract value in an
+ * instruction, unless the two prsubtract operations
+ * are the same and read from the same registers.
+ * XXX For now we will limit instructions to only one presubtract
+ * value.*/
+ if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) {
+ return 0;
+ }
+
+ memset(&d, 0, sizeof(d));
+ d.RemoveSrcs[0] = replace_reg;
+ d.RemoveSrcs[1] = presub_src0;
+ d.RemoveSrcs[2] = presub_src1;
+
+ rc_for_all_reads_mask(inst, can_use_presub_read_cb, &d);
+
+ num_presub_srcs = rc_presubtract_src_reg_count(presub_op);
+
+ if ((presub_src_type & RC_SOURCE_RGB)
+ && d.RGBCount + num_presub_srcs > 3) {
+ return 0;
+ }
+
+ if ((presub_src_type & RC_SOURCE_ALPHA)
+ && d.AlphaCount + num_presub_srcs > 3) {
+ return 0;
+ }
+
+ return 1;
+}
+
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h
index 1a14e7cb0e..461ab9ffb1 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h
@@ -3,8 +3,27 @@
#ifndef RADEON_PROGRAM_UTIL_H
#define RADEON_PROGRAM_UTIL_H
+struct rc_instruction;
+struct rc_src_register;
+
unsigned int rc_swizzle_to_writemask(unsigned int swz);
+rc_swizzle get_swz(unsigned int swz, rc_swizzle idx);
+
+unsigned int combine_swizzles4(unsigned int src,
+ rc_swizzle swz_x, rc_swizzle swz_y,
+ rc_swizzle swz_z, rc_swizzle swz_w);
+
+unsigned int combine_swizzles(unsigned int src, unsigned int swz);
+
+rc_swizzle rc_mask_to_swizzle(unsigned int mask);
+
+unsigned swizzle_mask(unsigned swizzle, unsigned mask);
+
+struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg);
+
+void reset_srcreg(struct rc_src_register* reg);
+
unsigned int rc_src_reads_dst_mask(
rc_register_file src_file,
unsigned int src_idx,
@@ -13,4 +32,16 @@ unsigned int rc_src_reads_dst_mask(
unsigned int dst_idx,
unsigned int dst_mask);
+unsigned int rc_source_type_swz(unsigned int swizzle, unsigned int channels);
+
+unsigned int rc_source_type_mask(unsigned int mask);
+
+unsigned int rc_inst_can_use_presub(
+ struct rc_instruction * inst,
+ rc_presubtract_op presub_op,
+ unsigned int presub_writemask,
+ struct rc_src_register replace_reg,
+ struct rc_src_register presub_src0,
+ struct rc_src_register presub_src1);
+
#endif /* RADEON_PROGRAM_UTIL_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
index fd94194dc3..d0a64d936e 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
@@ -139,7 +139,46 @@ static void pair_sub_for_all_args(
const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
for(i = 0; i < info->NumSrcRegs; i++) {
- cb(userdata, fullinst, &sub->Arg[i]);
+ unsigned int src_type;
+ unsigned int channels = 0;
+ if (&fullinst->U.P.RGB == sub)
+ channels = 3;
+ else if (&fullinst->U.P.Alpha == sub)
+ channels = 1;
+
+ assert(channels > 0);
+ src_type = rc_source_type_swz(sub->Arg[i].Swizzle, channels);
+
+ if (src_type == RC_SOURCE_NONE)
+ continue;
+
+ if (sub->Arg[i].Source == RC_PAIR_PRESUB_SRC) {
+ unsigned int presub_type;
+ unsigned int presub_src_count;
+ struct rc_pair_instruction_source * src_array;
+ unsigned int j;
+ if (src_type & RC_SOURCE_RGB) {
+ presub_type = fullinst->
+ U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index;
+ src_array = fullinst->U.P.RGB.Src;
+ } else {
+ presub_type = fullinst->
+ U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index;
+ src_array = fullinst->U.P.Alpha.Src;
+ }
+ presub_src_count
+ = rc_presubtract_src_reg_count(presub_type);
+ for(j = 0; j < presub_src_count; j++) {
+ cb(userdata, fullinst, &sub->Arg[i],
+ &src_array[j]);
+ }
+ } else {
+ struct rc_pair_instruction_source * src =
+ rc_pair_get_src(&fullinst->U.P, &sub->Arg[i]);
+ if (src) {
+ cb(userdata, fullinst, &sub->Arg[i], src);
+ }
+ }
}
}
@@ -308,6 +347,7 @@ static void remap_normal_instruction(struct rc_instruction * fullinst,
{
struct rc_sub_instruction * inst = &fullinst->U.I;
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
+ unsigned int remapped_presub = 0;
if (opcode->HasDstReg) {
rc_register_file file = inst->DstReg.File;
@@ -327,6 +367,12 @@ static void remap_normal_instruction(struct rc_instruction * fullinst,
unsigned int i;
unsigned int srcp_srcs = rc_presubtract_src_reg_count(
inst->PreSub.Opcode);
+ /* Make sure we only remap presubtract sources once in
+ * case more than one source register reads the
+ * presubtract result. */
+ if (remapped_presub)
+ continue;
+
for(i = 0; i < srcp_srcs; i++) {
file = inst->PreSub.SrcReg[i].File;
index = inst->PreSub.SrcReg[i].Index;
@@ -334,7 +380,7 @@ static void remap_normal_instruction(struct rc_instruction * fullinst,
inst->PreSub.SrcReg[i].File = file;
inst->PreSub.SrcReg[i].Index = index;
}
-
+ remapped_presub = 1;
}
else {
cb(userdata, fullinst, &file, &index);
@@ -430,12 +476,29 @@ static rc_opcode get_flow_control_inst(struct rc_instruction * inst)
}
+struct branch_write_mask {
+ unsigned int IfWriteMask:4;
+ unsigned int ElseWriteMask:4;
+ unsigned int HasElse:1;
+};
+
+union get_readers_read_cb {
+ rc_read_src_fn I;
+ rc_pair_read_arg_fn P;
+};
+
struct get_readers_callback_data {
struct radeon_compiler * C;
struct rc_reader_data * ReaderData;
- rc_read_src_fn ReadCB;
+ rc_read_src_fn ReadNormalCB;
+ rc_pair_read_arg_fn ReadPairCB;
rc_read_write_mask_fn WriteCB;
+ rc_register_file DstFile;
+ unsigned int DstIndex;
+ unsigned int DstMask;
unsigned int AliveWriteMask;
+ /* For convenience, this is indexed starting at 1 */
+ struct branch_write_mask BranchMasks[R500_PFS_MAX_BRANCH_DEPTH_FULL + 1];
};
static void add_reader(
@@ -443,7 +506,7 @@ static void add_reader(
struct rc_reader_data * data,
struct rc_instruction * inst,
unsigned int mask,
- struct rc_src_register * src)
+ void * arg_or_src)
{
struct rc_reader * new;
memory_pool_array_reserve(pool, struct rc_reader, data->Readers,
@@ -451,7 +514,74 @@ static void add_reader(
new = &data->Readers[data->ReaderCount++];
new->Inst = inst;
new->WriteMask = mask;
- new->Src = src;
+ if (inst->Type == RC_INSTRUCTION_NORMAL) {
+ new->U.Src = arg_or_src;
+ } else {
+ new->U.Arg = arg_or_src;
+ }
+}
+
+static unsigned int get_readers_read_callback(
+ struct get_readers_callback_data * cb_data,
+ unsigned int has_rel_addr,
+ rc_register_file file,
+ unsigned int index,
+ unsigned int swizzle)
+{
+ unsigned int shared_mask, read_mask;
+
+ if (has_rel_addr) {
+ cb_data->ReaderData->Abort = 1;
+ return RC_MASK_NONE;
+ }
+
+ shared_mask = rc_src_reads_dst_mask(file, index, swizzle,
+ cb_data->DstFile, cb_data->DstIndex, cb_data->AliveWriteMask);
+
+ if (shared_mask == RC_MASK_NONE)
+ return shared_mask;
+
+ /* If we make it this far, it means that this source reads from the
+ * same register written to by d->ReaderData->Writer. */
+
+ read_mask = rc_swizzle_to_writemask(swizzle);
+ if (cb_data->ReaderData->AbortOnRead & read_mask) {
+ cb_data->ReaderData->Abort = 1;
+ return shared_mask;
+ }
+
+ /* XXX The behavior in this case should be configurable. */
+ if ((read_mask & cb_data->AliveWriteMask) != read_mask) {
+ cb_data->ReaderData->Abort = 1;
+ return shared_mask;
+ }
+
+ return shared_mask;
+}
+
+static void get_readers_pair_read_callback(
+ void * userdata,
+ struct rc_instruction * inst,
+ struct rc_pair_instruction_arg * arg,
+ struct rc_pair_instruction_source * src)
+{
+ unsigned int shared_mask;
+ struct get_readers_callback_data * d = userdata;
+
+ shared_mask = get_readers_read_callback(d,
+ 0 /*Pair Instructions don't use RelAddr*/,
+ src->File, src->Index, arg->Swizzle);
+
+ if (shared_mask == RC_MASK_NONE)
+ return;
+
+ if (d->ReadPairCB)
+ d->ReadPairCB(d->ReaderData, inst, arg, src);
+
+ if (d->ReaderData->Abort)
+ return;
+
+ add_reader(&d->C->Pool, d->ReaderData, inst, shared_mask, arg);
}
/**
@@ -464,37 +594,18 @@ static void get_readers_normal_read_callback(
struct rc_src_register * src)
{
struct get_readers_callback_data * d = userdata;
- unsigned int read_mask;
unsigned int shared_mask;
- if (src->RelAddr)
- d->ReaderData->Abort = 1;
-
- shared_mask = rc_src_reads_dst_mask(src->File, src->Index,
- src->Swizzle,
- d->ReaderData->Writer->U.I.DstReg.File,
- d->ReaderData->Writer->U.I.DstReg.Index,
- d->AliveWriteMask);
+ shared_mask = get_readers_read_callback(d,
+ src->RelAddr, src->File, src->Index, src->Swizzle);
if (shared_mask == RC_MASK_NONE)
return;
+ /* The callback function could potentially clear d->ReaderData->Abort,
+ * so we need to call it before we return. */
+ if (d->ReadNormalCB)
+ d->ReadNormalCB(d->ReaderData, inst, src);
- /* If we make it this far, it means that this source reads from the
- * same register written to by d->ReaderData->Writer. */
-
- if (d->ReaderData->AbortOnRead) {
- d->ReaderData->Abort = 1;
- return;
- }
-
- read_mask = rc_swizzle_to_writemask(src->Swizzle);
- /* XXX The behavior in this case should be configurable. */
- if ((read_mask & d->AliveWriteMask) != read_mask) {
- d->ReaderData->Abort = 1;
- return;
- }
-
- d->ReadCB(d->ReaderData, inst, src);
if (d->ReaderData->Abort)
return;
@@ -515,29 +626,132 @@ static void get_readers_write_callback(
{
struct get_readers_callback_data * d = userdata;
- if (index == d->ReaderData->Writer->U.I.DstReg.Index
- && file == d->ReaderData->Writer->U.I.DstReg.File) {
- unsigned int shared_mask = mask
- & d->ReaderData->Writer->U.I.DstReg.WriteMask;
- if (d->ReaderData->InElse) {
- if (shared_mask & d->AliveWriteMask) {
- /* We set AbortOnRead here because the
- * destination register of d->ReaderData->Writer
- * is written to in both the IF and the
- * ELSE block of this IF/ELSE statement.
- * This means that readers of this
- * destination register that follow this IF/ELSE
- * statement use the value of different
- * instructions depending on the control flow
- * decisions made by the program. */
- d->ReaderData->AbortOnRead = 1;
+ if (index == d->DstIndex && file == d->DstFile) {
+ unsigned int shared_mask = mask & d->DstMask;
+ d->ReaderData->AbortOnRead &= ~shared_mask;
+ d->AliveWriteMask &= ~shared_mask;
+ }
+
+ if(d->WriteCB)
+ d->WriteCB(d->ReaderData, inst, file, index, mask);
+}
+
+static void get_readers_for_single_write(
+ void * userdata,
+ struct rc_instruction * writer,
+ rc_register_file dst_file,
+ unsigned int dst_index,
+ unsigned int dst_mask)
+{
+ struct rc_instruction * tmp;
+ unsigned int branch_depth = 0;
+ struct get_readers_callback_data * d = userdata;
+
+ d->ReaderData->Writer = writer;
+ d->ReaderData->AbortOnRead = 0;
+ d->ReaderData->InElse = 0;
+ d->DstFile = dst_file;
+ d->DstIndex = dst_index;
+ d->DstMask = dst_mask;
+ d->AliveWriteMask = dst_mask;
+ memset(d->BranchMasks, 0, sizeof(d->BranchMasks));
+
+ if (!dst_mask)
+ return;
+
+ for(tmp = writer->Next; tmp != &d->C->Program.Instructions;
+ tmp = tmp->Next){
+ rc_opcode opcode = get_flow_control_inst(tmp);
+ switch(opcode) {
+ case RC_OPCODE_BGNLOOP:
+ /* XXX We can do better when we see a BGNLOOP if we
+ * add a flag called AbortOnWrite to struct
+ * rc_reader_data and leave it set until the next
+ * ENDLOOP. */
+ case RC_OPCODE_ENDLOOP:
+ /* XXX We can do better when we see an ENDLOOP by
+ * searching backwards from writer and looking for
+ * readers of writer's destination index. If we find a
+ * reader before we get to the BGNLOOP, we must abort
+ * unless there is another writer between that reader
+ * and the BGNLOOP. */
+ case RC_OPCODE_BRK:
+ case RC_OPCODE_CONT:
+ d->ReaderData->Abort = 1;
+ return;
+ case RC_OPCODE_IF:
+ branch_depth++;
+ if (branch_depth > R500_PFS_MAX_BRANCH_DEPTH_FULL) {
+ d->ReaderData->Abort = 1;
+ return;
+ }
+ d->BranchMasks[branch_depth].IfWriteMask =
+ d->AliveWriteMask;
+ break;
+ case RC_OPCODE_ELSE:
+ if (branch_depth == 0) {
+ d->ReaderData->InElse = 1;
+ } else {
+ unsigned int temp_mask = d->AliveWriteMask;
+ d->AliveWriteMask =
+ d->BranchMasks[branch_depth].IfWriteMask;
+ d->BranchMasks[branch_depth].ElseWriteMask =
+ temp_mask;
+ d->BranchMasks[branch_depth].HasElse = 1;
}
+ break;
+ case RC_OPCODE_ENDIF:
+ if (branch_depth == 0) {
+ d->ReaderData->AbortOnRead = d->AliveWriteMask;
+ d->ReaderData->InElse = 0;
+ }
+ else {
+ struct branch_write_mask * masks =
+ &d->BranchMasks[branch_depth];
+
+ if (masks->HasElse) {
+ d->ReaderData->AbortOnRead |=
+ masks->IfWriteMask
+ & ~masks->ElseWriteMask;
+ d->AliveWriteMask = masks->IfWriteMask
+ ^ ((masks->IfWriteMask ^
+ masks->ElseWriteMask)
+ & (masks->IfWriteMask
+ ^ d->AliveWriteMask));
+ } else {
+ d->ReaderData->AbortOnRead |=
+ masks->IfWriteMask
+ & ~d->AliveWriteMask;
+ d->AliveWriteMask = masks->IfWriteMask;
+
+ }
+ memset(masks, 0,
+ sizeof(struct branch_write_mask));
+ branch_depth--;
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (d->ReaderData->InElse)
+ continue;
+
+ if (tmp->Type == RC_INSTRUCTION_NORMAL) {
+ rc_for_all_reads_src(tmp,
+ get_readers_normal_read_callback, d);
} else {
- d->AliveWriteMask &= ~shared_mask;
+ rc_pair_for_all_reads_arg(tmp,
+ get_readers_pair_read_callback, d);
}
- }
+ rc_for_all_writes_mask(tmp, get_readers_write_callback, d);
- d->WriteCB(d->ReaderData, inst, file, index, mask);
+ if (d->ReaderData->Abort)
+ return;
+
+ if (branch_depth == 0 && !d->AliveWriteMask)
+ return;
+ }
}
/**
@@ -578,83 +792,26 @@ static void get_readers_write_callback(
* @param write_cb This function will be called for every instruction after
* writer.
*/
-void rc_get_readers_normal(
+void rc_get_readers(
struct radeon_compiler * c,
struct rc_instruction * writer,
struct rc_reader_data * data,
- rc_read_src_fn read_cb,
+ rc_read_src_fn read_normal_cb,
+ rc_pair_read_arg_fn read_pair_cb,
rc_read_write_mask_fn write_cb)
{
- struct rc_instruction * tmp;
struct get_readers_callback_data d;
- unsigned int branch_depth = 0;
- data->Writer = writer;
data->Abort = 0;
- data->AbortOnRead = 0;
- data->InElse = 0;
data->ReaderCount = 0;
data->ReadersReserved = 0;
data->Readers = NULL;
d.C = c;
- d.AliveWriteMask = writer->U.I.DstReg.WriteMask;
d.ReaderData = data;
- d.ReadCB = read_cb;
+ d.ReadNormalCB = read_normal_cb;
+ d.ReadPairCB = read_pair_cb;
d.WriteCB = write_cb;
- if (!writer->U.I.DstReg.WriteMask)
- return;
-
- for(tmp = writer->Next; tmp != &c->Program.Instructions;
- tmp = tmp->Next){
- rc_opcode opcode = get_flow_control_inst(tmp);
- switch(opcode) {
- case RC_OPCODE_BGNLOOP:
- /* XXX We can do better when we see a BGNLOOP if we
- * add a flag called AbortOnWrite to struct
- * rc_reader_data and leave it set until the next
- * ENDLOOP. */
- case RC_OPCODE_ENDLOOP:
- /* XXX We can do better when we see an ENDLOOP by
- * searching backwards from writer and looking for
- * readers of writer's destination index. If we find a
- * reader before we get to the BGNLOOP, we must abort
- * unless there is another writer between that reader
- * and the BGNLOOP. */
- data->Abort = 1;
- return;
- case RC_OPCODE_IF:
- /* XXX We can do better here, but this will have to
- * do until this dataflow analysis is more mature. */
- data->Abort = 1;
- branch_depth++;
- break;
- case RC_OPCODE_ELSE:
- if (branch_depth == 0)
- data->InElse = 1;
- break;
- case RC_OPCODE_ENDIF:
- if (branch_depth == 0) {
- data->AbortOnRead = 1;
- data->InElse = 0;
- }
- else {
- branch_depth--;
- }
- break;
- default:
- break;
- }
-
- if (!data->InElse)
- rc_for_all_reads_src(tmp, get_readers_normal_read_callback, &d);
- rc_for_all_writes_mask(tmp, get_readers_write_callback, &d);
-
- if (data->Abort)
- return;
-
- if (!d.AliveWriteMask)
- return;
- }
+ rc_for_all_writes_mask(writer, get_readers_for_single_write, &d);
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h
index 7de6b98f76..ef971c5b23 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h
@@ -36,6 +36,7 @@ struct rc_instruction;
struct rc_swizzle_caps;
struct rc_src_register;
struct rc_pair_instruction_arg;
+struct rc_pair_instruction_source;
struct rc_compiler;
@@ -59,7 +60,8 @@ void rc_for_all_reads_src(struct rc_instruction * inst, rc_read_src_fn cb,
void * userdata);
typedef void (*rc_pair_read_arg_fn)(void * userdata,
- struct rc_instruction * inst, struct rc_pair_instruction_arg * arg);
+ struct rc_instruction * inst, struct rc_pair_instruction_arg * arg,
+ struct rc_pair_instruction_source * src);
void rc_pair_for_all_reads_arg(struct rc_instruction * inst,
rc_pair_read_arg_fn cb, void * userdata);
@@ -71,7 +73,10 @@ void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, v
struct rc_reader {
struct rc_instruction * Inst;
unsigned int WriteMask;
- struct rc_src_register * Src;
+ union {
+ struct rc_src_register * Src;
+ struct rc_pair_instruction_arg * Arg;
+ } U;
};
struct rc_reader_data {
@@ -87,14 +92,13 @@ struct rc_reader_data {
void * CbData;
};
-void rc_get_readers_normal(
+void rc_get_readers(
struct radeon_compiler * c,
- struct rc_instruction * inst,
+ struct rc_instruction * writer,
struct rc_reader_data * data,
- /*XXX: These should be their own function types. */
- rc_read_src_fn read_cb,
+ rc_read_src_fn read_normal_cb,
+ rc_pair_read_arg_fn read_pair_cb,
rc_read_write_mask_fn write_cb);
-
/**
* Compiler passes based on dataflow analysis.
*/
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c
index a0f7bd8174..133a9f72ec 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c
@@ -56,6 +56,7 @@ static void rewrite_source(struct radeon_compiler * c,
mov->U.I.DstReg.Index = tempreg;
mov->U.I.DstReg.WriteMask = split.Phase[phase];
mov->U.I.SrcReg[0] = inst->U.I.SrcReg[src];
+ mov->U.I.PreSub = inst->U.I.PreSub;
phase_refmask = 0;
for(unsigned int chan = 0; chan < 4; ++chan) {
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
index da495a3afa..25afd272be 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
@@ -67,6 +67,13 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
.IsComponentwise = 1
},
{
+ .Opcode = RC_OPCODE_CLAMP,
+ .Name = "CLAMP",
+ .NumSrcRegs = 3,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
.Opcode = RC_OPCODE_CMP,
.Name = "CMP",
.NumSrcRegs = 3,
@@ -453,6 +460,7 @@ void rc_compute_sources_for_writemask(
srcmasks[1] |= RC_MASK_XY;
break;
case RC_OPCODE_DP3:
+ case RC_OPCODE_XPD:
srcmasks[0] |= RC_MASK_XYZ;
srcmasks[1] |= RC_MASK_XYZ;
break;
@@ -460,6 +468,10 @@ void rc_compute_sources_for_writemask(
srcmasks[0] |= RC_MASK_XYZW;
srcmasks[1] |= RC_MASK_XYZW;
break;
+ case RC_OPCODE_DPH:
+ srcmasks[0] |= RC_MASK_XYZ;
+ srcmasks[1] |= RC_MASK_XYZW;
+ break;
case RC_OPCODE_TXB:
case RC_OPCODE_TXP:
srcmasks[0] |= RC_MASK_W;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
index d3f639c870..7e66610127 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
@@ -50,6 +50,9 @@ typedef enum {
/** vec4 instruction: dst.c = ceil(src0.c) */
RC_OPCODE_CEIL,
+ /** vec4 instruction: dst.c = clamp(src0.c, src1.c, src2.c) */
+ RC_OPCODE_CLAMP,
+
/** vec4 instruction: dst.c = src0.c < 0.0 ? src1.c : src2.c */
RC_OPCODE_CMP,
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
index 15b9c5e7dc..44f4c0fbdc 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
@@ -54,12 +54,7 @@ static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct
combine.Negate = outer.Negate;
} else {
combine.Abs = inner.Abs;
- combine.Negate = 0;
- for(unsigned int chan = 0; chan < 4; ++chan) {
- unsigned int swz = GET_SWZ(outer.Swizzle, chan);
- if (swz < 4)
- combine.Negate |= GET_BIT(inner.Negate, swz) << chan;
- }
+ combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate);
combine.Negate ^= outer.Negate;
}
combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle);
@@ -71,12 +66,13 @@ static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
{
rc_register_file file = src->File;
struct rc_reader_data * reader_data = data;
- const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
- /* It is possible to do copy propigation in this situation,
- * just not right now, see peephole_add_presub_inv() */
- if (reader_data->Writer->U.I.PreSub.Opcode != RC_PRESUB_NONE &&
- (info->NumSrcRegs > 2 || info->HasTexture)) {
+ if(!rc_inst_can_use_presub(inst,
+ reader_data->Writer->U.I.PreSub.Opcode,
+ rc_swizzle_to_writemask(src->Swizzle),
+ *src,
+ reader_data->Writer->U.I.PreSub.SrcReg[0],
+ reader_data->Writer->U.I.PreSub.SrcReg[1])) {
reader_data->Abort = 1;
return;
}
@@ -112,11 +108,11 @@ static void src_clobbered_reads_cb(
&& src->Index == sc_data->Index
&& (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) {
- sc_data->ReaderData->AbortOnRead = 1;
+ sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
}
if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) {
- sc_data->ReaderData->AbortOnRead = 1;
+ sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
}
}
@@ -149,8 +145,9 @@ static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * i
return;
/* Get a list of all the readers of this MOV instruction. */
- rc_get_readers_normal(c, inst_mov, &reader_data,
- copy_propagate_scan_read, is_src_clobbered_scan_write);
+ rc_get_readers(c, inst_mov, &reader_data,
+ copy_propagate_scan_read, NULL,
+ is_src_clobbered_scan_write);
if (reader_data.Abort || reader_data.ReaderCount == 0)
return;
@@ -158,7 +155,7 @@ static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * i
/* Propagate the MOV instruction. */
for (i = 0; i < reader_data.ReaderCount; i++) {
struct rc_instruction * inst = reader_data.Readers[i].Inst;
- *reader_data.Readers[i].Src = chain_srcregs(*reader_data.Readers[i].Src, inst_mov->U.I.SrcReg[0]);
+ *reader_data.Readers[i].U.Src = chain_srcregs(*reader_data.Readers[i].U.Src, inst_mov->U.I.SrcReg[0]);
if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
inst->U.I.PreSub = inst_mov->U.I.PreSub;
@@ -423,24 +420,13 @@ static void presub_scan_read(
struct rc_src_register * src)
{
struct rc_reader_data * reader_data = data;
- const struct rc_opcode_info * info =
- rc_get_opcode_info(inst->U.I.Opcode);
- /* XXX: There are some situations where instructions
- * with more than 2 src registers can use the
- * presubtract select, but to keep things simple we
- * will disable presubtract on these instructions for
- * now. */
- if (info->NumSrcRegs > 2 || info->HasTexture) {
- reader_data->Abort = 1;
- return;
- }
+ rc_presubtract_op * presub_opcode = reader_data->CbData;
- /* We can't use more than one presubtract value in an
- * instruction, unless the two prsubtract operations
- * are the same and read from the same registers.
- * XXX For now we will limit instructions to only one presubtract
- * value.*/
- if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) {
+ if (!rc_inst_can_use_presub(inst, *presub_opcode,
+ reader_data->Writer->U.I.DstReg.WriteMask,
+ *src,
+ reader_data->Writer->U.I.SrcReg[0],
+ reader_data->Writer->U.I.SrcReg[1])) {
reader_data->Abort = 1;
return;
}
@@ -454,8 +440,10 @@ static int presub_helper(
{
struct rc_reader_data reader_data;
unsigned int i;
+ rc_presubtract_op cb_op = presub_opcode;
- rc_get_readers_normal(c, inst_add, &reader_data, presub_scan_read,
+ reader_data.CbData = &cb_op;
+ rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL,
is_src_clobbered_scan_write);
if (reader_data.Abort || reader_data.ReaderCount == 0)
@@ -468,7 +456,7 @@ static int presub_helper(
rc_get_opcode_info(reader.Inst->U.I.Opcode);
for (src_index = 0; src_index < info->NumSrcRegs; src_index++) {
- if (&reader.Inst->U.I.SrcReg[src_index] == reader.Src)
+ if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.Src)
presub_replace(inst_add, reader.Inst, src_index);
}
}
@@ -505,7 +493,9 @@ static void presub_replace_add(
inst_reader->U.I.SrcReg[src_index].Index = presub_opcode;
}
-static int is_presub_candidate(struct rc_instruction * inst)
+static int is_presub_candidate(
+ struct radeon_compiler * c,
+ struct rc_instruction * inst)
{
const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
unsigned int i;
@@ -514,7 +504,12 @@ static int is_presub_candidate(struct rc_instruction * inst)
return 0;
for(i = 0; i < info->NumSrcRegs; i++) {
- if (src_reads_dst_mask(inst->U.I.SrcReg[i], inst->U.I.DstReg))
+ struct rc_src_register src = inst->U.I.SrcReg[i];
+ if (src_reads_dst_mask(src, inst->U.I.DstReg))
+ return 0;
+
+ src.File = RC_FILE_PRESUB;
+ if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src))
return 0;
}
return 1;
@@ -528,7 +523,7 @@ static int peephole_add_presub_add(
struct rc_src_register * src1 = NULL;
unsigned int i;
- if (!is_presub_candidate(inst_add))
+ if (!is_presub_candidate(c, inst_add))
return 0;
if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle)
@@ -592,7 +587,7 @@ static int peephole_add_presub_inv(
{
unsigned int i, swz, mask;
- if (!is_presub_candidate(inst_add))
+ if (!is_presub_candidate(c, inst_add))
return 0;
mask = inst_add->U.I.DstReg.WriteMask;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
index 91524f5ec6..d53181e1f7 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
@@ -66,10 +66,13 @@ struct regalloc_state {
struct hardware_register * HwTemporary;
unsigned int NumHwTemporaries;
/**
- * If an instruction is inside of a loop, end_loop will be the
- * IP of the ENDLOOP instruction, otherwise end_loop will be 0
+ * If an instruction is inside of a loop, EndLoop will be the
+ * IP of the ENDLOOP instruction, and BeginLoop will be the IP
+ * of the BGNLOOP instruction. Otherwise, EndLoop and BeginLoop
+ * will be -1.
*/
- int end_loop;
+ int EndLoop;
+ int BeginLoop;
};
static void print_live_intervals(struct live_intervals * src)
@@ -180,11 +183,13 @@ static void scan_callback(void * data, struct rc_instruction * inst,
reg->Used = 1;
if (file == RC_FILE_INPUT)
reg->Live.Start = -1;
+ else if (s->BeginLoop >= 0)
+ reg->Live.Start = s->BeginLoop;
else
reg->Live.Start = inst->IP;
reg->Live.End = inst->IP;
- } else if (s->end_loop)
- reg->Live.End = s->end_loop;
+ } else if (s->EndLoop >= 0)
+ reg->Live.End = s->EndLoop;
else if (inst->IP > reg->Live.End)
reg->Live.End = inst->IP;
}
@@ -195,6 +200,8 @@ static void compute_live_intervals(struct radeon_compiler *c,
memset(s, 0, sizeof(*s));
s->C = c;
s->NumHwTemporaries = c->max_temp_regs;
+ s->BeginLoop = -1;
+ s->EndLoop = -1;
s->HwTemporary =
memory_pool_malloc(&c->Pool,
s->NumHwTemporaries * sizeof(struct hardware_register));
@@ -207,10 +214,12 @@ static void compute_live_intervals(struct radeon_compiler *c,
inst = inst->Next) {
/* For all instructions inside of a loop, the ENDLOOP
- * instruction is used as the end of the live interval. */
- if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP && !s->end_loop) {
+ * instruction is used as the end of the live interval and
+ * the BGNLOOP instruction is used as the beginning. */
+ if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP && s->EndLoop < 0) {
int loops = 1;
struct rc_instruction * tmp;
+ s->BeginLoop = inst->IP;
for(tmp = inst->Next;
tmp != &s->C->Program.Instructions;
tmp = tmp->Next) {
@@ -219,15 +228,17 @@ static void compute_live_intervals(struct radeon_compiler *c,
} else if (tmp->U.I.Opcode
== RC_OPCODE_ENDLOOP) {
if(!--loops) {
- s->end_loop = tmp->IP;
+ s->EndLoop = tmp->IP;
break;
}
}
}
}
- if (inst->IP == s->end_loop)
- s->end_loop = 0;
+ if (inst->IP == s->EndLoop) {
+ s->EndLoop = -1;
+ s->BeginLoop = -1;
+ }
rc_for_all_reads_mask(inst, scan_callback, s);
rc_for_all_writes_mask(inst, scan_callback, s);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
index 553e9dcf7c..9beb5d6357 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
@@ -30,6 +30,7 @@
#include <stdio.h>
#include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
#include "radeon_dataflow.h"
@@ -54,6 +55,11 @@ struct schedule_instruction {
* this instruction can be scheduled.
*/
unsigned int NumDependencies:5;
+
+ /** List of all readers (see rc_get_readers() for the definition of
+ * "all readers"), even those outside the basic block this instruction
+ * lives in. */
+ struct rc_reader_data GlobalReaders;
};
@@ -94,6 +100,16 @@ struct register_state {
struct reg_value * Values[4];
};
+struct remap_reg {
+ struct rc_instruciont * Inst;
+ unsigned int OldIndex:(RC_REGISTER_INDEX_BITS+1);
+ unsigned int OldSwizzle:3;
+ unsigned int NewIndex:(RC_REGISTER_INDEX_BITS+1);
+ unsigned int NewSwizzle:3;
+ unsigned int OnlyTexReads:1;
+ struct remap_reg * Next;
+};
+
struct schedule_state {
struct radeon_compiler * C;
struct schedule_instruction * Current;
@@ -126,15 +142,6 @@ static struct reg_value ** get_reg_valuep(struct schedule_state * s,
return &s->Temporary[index].Values[chan];
}
-static struct reg_value * get_reg_value(struct schedule_state * s,
- rc_register_file file, unsigned int index, unsigned int chan)
-{
- struct reg_value ** pv = get_reg_valuep(s, file, index, chan);
- if (!pv)
- return 0;
- return *pv;
-}
-
static void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst)
{
inst->NextReady = *list;
@@ -295,12 +302,12 @@ static int merge_presub_sources(
assert(dst_full->Alpha.Opcode == RC_OPCODE_NOP);
switch(type) {
- case RC_PAIR_SOURCE_RGB:
+ case RC_SOURCE_RGB:
is_rgb = 1;
is_alpha = 0;
dst_sub = &dst_full->RGB;
break;
- case RC_PAIR_SOURCE_ALPHA:
+ case RC_SOURCE_ALPHA:
is_rgb = 0;
is_alpha = 1;
dst_sub = &dst_full->Alpha;
@@ -341,6 +348,8 @@ static int merge_presub_sources(
continue;
free_source = rc_pair_alloc_source(dst_full, is_rgb,
is_alpha, temp.File, temp.Index);
+ if (free_source < 0)
+ return 0;
one_way = 1;
} else {
dst_sub->Src[free_source] = temp;
@@ -356,11 +365,11 @@ static int merge_presub_sources(
for(arg = 0; arg < info->NumSrcRegs; arg++) {
/*If this arg does not read from an rgb source,
* do nothing. */
- if (!(rc_source_type_that_arg_reads(
- dst_full->RGB.Arg[arg].Source,
- dst_full->RGB.Arg[arg].Swizzle) & type)) {
+ if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle,
+ 3) & type)) {
continue;
}
+
if (dst_full->RGB.Arg[arg].Source == srcp_src)
dst_full->RGB.Arg[arg].Source = free_source;
/* We need to do this just in case register
@@ -392,13 +401,13 @@ static int destructive_merge_instructions(
/* Merge the rgb presubtract registers. */
if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
- if (!merge_presub_sources(rgb, alpha->RGB, RC_PAIR_SOURCE_RGB)) {
+ if (!merge_presub_sources(rgb, alpha->RGB, RC_SOURCE_RGB)) {
return 0;
}
}
/* Merge the alpha presubtract registers */
if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
- if(!merge_presub_sources(rgb, alpha->Alpha, RC_PAIR_SOURCE_ALPHA)){
+ if(!merge_presub_sources(rgb, alpha->Alpha, RC_SOURCE_ALPHA)){
return 0;
}
}
@@ -525,6 +534,222 @@ static void presub_nop(struct rc_instruction * emitted) {
}
}
}
+
+static void rgb_to_alpha_remap (
+ struct rc_instruction * inst,
+ struct rc_pair_instruction_arg * arg,
+ rc_register_file old_file,
+ rc_swizzle old_swz,
+ unsigned int new_index)
+{
+ int new_src_index;
+ unsigned int i;
+ struct rc_pair_instruction_source * old_src =
+ rc_pair_get_src(&inst->U.P, arg);
+ if (!old_src) {
+ return;
+ }
+
+ for (i = 0; i < 3; i++) {
+ if (get_swz(arg->Swizzle, i) == old_swz) {
+ SET_SWZ(arg->Swizzle, i, RC_SWIZZLE_W);
+ }
+ }
+ memset(old_src, 0, sizeof(struct rc_pair_instruction_source));
+ new_src_index = rc_pair_alloc_source(&inst->U.P, 0, 1,
+ old_file, new_index);
+ /* This conversion is not possible, we must have made a mistake in
+ * is_rgb_to_alpha_possible. */
+ if (new_src_index < 0) {
+ assert(0);
+ return;
+ }
+
+ arg->Source = new_src_index;
+}
+
+static int can_remap(unsigned int opcode)
+{
+ switch(opcode) {
+ case RC_OPCODE_DDX:
+ case RC_OPCODE_DDY:
+ return 0;
+ default:
+ return 1;
+ }
+}
+
+static int can_convert_opcode_to_alpha(unsigned int opcode)
+{
+ switch(opcode) {
+ case RC_OPCODE_DDX:
+ case RC_OPCODE_DDY:
+ case RC_OPCODE_DP2:
+ case RC_OPCODE_DP3:
+ case RC_OPCODE_DP4:
+ case RC_OPCODE_DPH:
+ return 0;
+ default:
+ return 1;
+ }
+}
+
+static void is_rgb_to_alpha_possible(
+ void * userdata,
+ struct rc_instruction * inst,
+ struct rc_pair_instruction_arg * arg,
+ struct rc_pair_instruction_source * src)
+{
+ unsigned int chan_count = 0;
+ unsigned int alpha_sources = 0;
+ unsigned int i;
+ struct rc_reader_data * reader_data = userdata;
+
+ if (!can_remap(inst->U.P.RGB.Opcode)
+ || !can_remap(inst->U.P.Alpha.Opcode)) {
+ reader_data->Abort = 1;
+ return;
+ }
+
+ if (!src)
+ return;
+
+ /* XXX There are some cases where we can still do the conversion if
+ * a reader reads from a presubtract source, but for now we'll prevent
+ * it. */
+ if (arg->Source == RC_PAIR_PRESUB_SRC) {
+ reader_data->Abort = 1;
+ return;
+ }
+
+ /* Make sure the source only reads from one component.
+ * XXX We should allow the source to read from the same component twice.
+ * XXX If the index we will be converting to is the same as the
+ * current index, then it is OK to read from more than one component.
+ */
+ for (i = 0; i < 3; i++) {
+ rc_swizzle swz = get_swz(arg->Swizzle, i);
+ switch(swz) {
+ case RC_SWIZZLE_X:
+ case RC_SWIZZLE_Y:
+ case RC_SWIZZLE_Z:
+ case RC_SWIZZLE_W:
+ chan_count++;
+ break;
+ default:
+ break;
+ }
+ }
+ if (chan_count > 1) {
+ reader_data->Abort = 1;
+ return;
+ }
+
+ /* Make sure there are enough alpha sources.
+ * XXX If we know what register all the readers are going
+ * to be remapped to, then in some situations we can still do
+ * the subsitution, even if all 3 alpha sources are being used.*/
+ for (i = 0; i < 3; i++) {
+ if (inst->U.P.Alpha.Src[i].Used) {
+ alpha_sources++;
+ }
+ }
+ if (alpha_sources > 2) {
+ reader_data->Abort = 1;
+ return;
+ }
+}
+
+static int convert_rgb_to_alpha(
+ struct schedule_state * s,
+ struct schedule_instruction * sched_inst)
+{
+ struct rc_pair_instruction * pair_inst = &sched_inst->Instruction->U.P;
+ unsigned int old_mask = pair_inst->RGB.WriteMask;
+ unsigned int old_swz = rc_mask_to_swizzle(old_mask);
+ const struct rc_opcode_info * info =
+ rc_get_opcode_info(pair_inst->RGB.Opcode);
+ int new_index = -1;
+ unsigned int i;
+
+ if (sched_inst->GlobalReaders.Abort)
+ return 0;
+
+ if (!pair_inst->RGB.WriteMask)
+ return 0;
+
+ if (!can_convert_opcode_to_alpha(pair_inst->RGB.Opcode)
+ || !can_convert_opcode_to_alpha(pair_inst->Alpha.Opcode)) {
+ return 0;
+ }
+
+ assert(sched_inst->NumWriteValues == 1);
+
+ if (!sched_inst->WriteValues[0]) {
+ assert(0);
+ return 0;
+ }
+
+ /* We start at the old index, because if we can reuse the same
+ * register and just change the swizzle then it is more likely we
+ * will be able to convert all the readers. */
+ for (i = pair_inst->RGB.DestIndex; i < RC_REGISTER_MAX_INDEX; i++) {
+ struct reg_value ** new_regvalp = get_reg_valuep(
+ s, RC_FILE_TEMPORARY, i, 3);
+ if (!*new_regvalp) {
+ struct reg_value ** old_regvalp =
+ get_reg_valuep(s,
+ RC_FILE_TEMPORARY,
+ pair_inst->RGB.DestIndex,
+ rc_mask_to_swizzle(old_mask));
+ new_index = i;
+ *new_regvalp = *old_regvalp;
+ *old_regvalp = NULL;
+ new_regvalp = get_reg_valuep(s, RC_FILE_TEMPORARY, i, 3);
+ break;
+ }
+ }
+ if (new_index < 0) {
+ return 0;
+ }
+
+ pair_inst->Alpha.Opcode = pair_inst->RGB.Opcode;
+ pair_inst->Alpha.DestIndex = new_index;
+ pair_inst->Alpha.WriteMask = 1;
+ pair_inst->Alpha.Target = pair_inst->RGB.Target;
+ pair_inst->Alpha.OutputWriteMask = pair_inst->RGB.OutputWriteMask;
+ pair_inst->Alpha.DepthWriteMask = pair_inst->RGB.DepthWriteMask;
+ pair_inst->Alpha.Saturate = pair_inst->RGB.Saturate;
+ memcpy(pair_inst->Alpha.Arg, pair_inst->RGB.Arg,
+ sizeof(pair_inst->Alpha.Arg));
+ /* Move the swizzles into the first chan */
+ for (i = 0; i < info->NumSrcRegs; i++) {
+ unsigned int j;
+ for (j = 0; j < 3; j++) {
+ unsigned int swz = get_swz(pair_inst->Alpha.Arg[i].Swizzle, j);
+ if (swz != RC_SWIZZLE_UNUSED) {
+ pair_inst->Alpha.Arg[i].Swizzle = swz;
+ break;
+ }
+ }
+ }
+ pair_inst->RGB.Opcode = RC_OPCODE_NOP;
+ pair_inst->RGB.DestIndex = 0;
+ pair_inst->RGB.WriteMask = 0;
+ pair_inst->RGB.Target = 0;
+ pair_inst->RGB.OutputWriteMask = 0;
+ pair_inst->RGB.DepthWriteMask = 0;
+ pair_inst->RGB.Saturate = 0;
+ memset(pair_inst->RGB.Arg, 0, sizeof(pair_inst->RGB.Arg));
+
+ for(i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) {
+ struct rc_reader reader = sched_inst->GlobalReaders.Readers[i];
+ rgb_to_alpha_remap(reader.Inst, reader.U.Arg,
+ RC_FILE_TEMPORARY, old_swz, new_index);
+ }
+ return 1;
+}
+
/**
* Find a good ALU instruction or pair of ALU instruction and emit it.
*
@@ -536,24 +761,16 @@ static void emit_one_alu(struct schedule_state *s, struct rc_instruction * befor
{
struct schedule_instruction * sinst;
- if (s->ReadyFullALU || !(s->ReadyRGB && s->ReadyAlpha)) {
- if (s->ReadyFullALU) {
- sinst = s->ReadyFullALU;
- s->ReadyFullALU = s->ReadyFullALU->NextReady;
- } else if (s->ReadyRGB) {
- sinst = s->ReadyRGB;
- s->ReadyRGB = s->ReadyRGB->NextReady;
- } else {
- sinst = s->ReadyAlpha;
- s->ReadyAlpha = s->ReadyAlpha->NextReady;
- }
-
+ if (s->ReadyFullALU) {
+ sinst = s->ReadyFullALU;
+ s->ReadyFullALU = s->ReadyFullALU->NextReady;
rc_insert_instruction(before->Prev, sinst->Instruction);
commit_alu_instruction(s, sinst);
} else {
struct schedule_instruction **prgb;
struct schedule_instruction **palpha;
-
+ struct schedule_instruction *prev;
+pair:
/* Some pairings might fail because they require too
* many source slots; try all possible pairings if necessary */
for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) {
@@ -572,10 +789,43 @@ static void emit_one_alu(struct schedule_state *s, struct rc_instruction * befor
goto success;
}
}
-
- /* No success in pairing; just take the first RGB instruction */
- sinst = s->ReadyRGB;
- s->ReadyRGB = s->ReadyRGB->NextReady;
+ prev = NULL;
+ /* No success in pairing, now try to convert one of the RGB
+ * instructions to an Alpha so we can pair it with another RGB.
+ */
+ if (s->ReadyRGB && s->ReadyRGB->NextReady) {
+ for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) {
+ if ((*prgb)->NumWriteValues == 1) {
+ struct schedule_instruction * prgb_next;
+ if (!convert_rgb_to_alpha(s, *prgb))
+ goto cont_loop;
+ prgb_next = (*prgb)->NextReady;
+ /* Add instruction to the Alpha ready list. */
+ (*prgb)->NextReady = s->ReadyAlpha;
+ s->ReadyAlpha = *prgb;
+ /* Remove instruction from the RGB ready list.*/
+ if (prev)
+ prev->NextReady = prgb_next;
+ else
+ s->ReadyRGB = prgb_next;
+ goto pair;
+ }
+cont_loop:
+ prev = *prgb;
+ }
+ }
+ /* Still no success in pairing, just take the first RGB
+ * or alpha instruction. */
+ if (s->ReadyRGB) {
+ sinst = s->ReadyRGB;
+ s->ReadyRGB = s->ReadyRGB->NextReady;
+ } else if (s->ReadyAlpha) {
+ sinst = s->ReadyAlpha;
+ s->ReadyAlpha = s->ReadyAlpha->NextReady;
+ } else {
+ /*XXX Something real bad has happened. */
+ assert(0);
+ }
rc_insert_instruction(before->Prev, sinst->Instruction);
commit_alu_instruction(s, sinst);
@@ -591,13 +841,13 @@ static void scan_read(void * data, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int chan)
{
struct schedule_state * s = data;
- struct reg_value * v = get_reg_value(s, file, index, chan);
+ struct reg_value ** v = get_reg_valuep(s, file, index, chan);
struct reg_value_reader * reader;
if (!v)
return;
- if (v->Writer == s->Current) {
+ if (*v && (*v)->Writer == s->Current) {
/* The instruction reads and writes to a register component.
* In this case, we only want to increment dependencies by one.
*/
@@ -608,16 +858,28 @@ static void scan_read(void * data, struct rc_instruction * inst,
reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader));
reader->Reader = s->Current;
- reader->Next = v->Readers;
- v->Readers = reader;
- v->NumReaders++;
-
- s->Current->NumDependencies++;
+ if (!*v) {
+ /* In this situation, the instruction reads from a register
+ * that hasn't been written to or read from in the current
+ * block. */
+ *v = memory_pool_malloc(&s->C->Pool, sizeof(struct reg_value));
+ memset(*v, 0, sizeof(struct reg_value));
+ (*v)->Readers = reader;
+ } else {
+ reader->Next = (*v)->Readers;
+ (*v)->Readers = reader;
+ /* Only update the current instruction's dependencies if the
+ * register it reads from has been written to in this block. */
+ if ((*v)->Writer) {
+ s->Current->NumDependencies++;
+ }
+ }
+ (*v)->NumReaders++;
if (s->Current->NumReadValues >= 12) {
rc_error(s->C, "%s: NumReadValues overflow\n", __FUNCTION__);
} else {
- s->Current->ReadValues[s->Current->NumReadValues++] = v;
+ s->Current->ReadValues[s->Current->NumReadValues++] = *v;
}
}
@@ -652,6 +914,16 @@ static void scan_write(void * data, struct rc_instruction * inst,
}
}
+static void is_rgb_to_alpha_possible_normal(
+ void * userdata,
+ struct rc_instruction * inst,
+ struct rc_src_register * src)
+{
+ struct rc_reader_data * reader_data = userdata;
+ reader_data->Abort = 1;
+
+}
+
static void schedule_block(struct r300_fragment_program_compiler * c,
struct rc_instruction * begin, struct rc_instruction * end)
{
@@ -683,6 +955,11 @@ static void schedule_block(struct r300_fragment_program_compiler * c,
if (!s.Current->NumDependencies)
instruction_ready(&s, s.Current);
+
+ /* Get global readers for possible RGB->Alpha conversion. */
+ rc_get_readers(s.C, inst, &s.Current->GlobalReaders,
+ is_rgb_to_alpha_possible_normal,
+ is_rgb_to_alpha_possible, NULL);
}
/* Temporarily unlink all instructions */
@@ -711,8 +988,13 @@ static int is_controlflow(struct rc_instruction * inst)
void rc_pair_schedule(struct radeon_compiler *cc, void *user)
{
+ struct schedule_state s;
+
struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
struct rc_instruction * inst = c->Base.Program.Instructions.Next;
+
+ memset(&s, 0, sizeof(s));
+ s.C = &c->Base;
while(inst != &c->Base.Program.Instructions) {
struct rc_instruction * first;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
index c549be5218..fc05366f50 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
@@ -280,9 +280,12 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c,
pair->RGB.DestIndex = inst->DstReg.Index;
pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ;
}
+
if (needalpha) {
- pair->Alpha.DestIndex = inst->DstReg.Index;
pair->Alpha.WriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
+ if (pair->Alpha.WriteMask) {
+ pair->Alpha.DestIndex = inst->DstReg.Index;
+ }
}
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.c b/src/mesa/drivers/dri/r300/compiler/radeon_program.c
index 24b685fbeb..fe5756ebc4 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.c
@@ -30,6 +30,7 @@
#include <stdio.h>
#include "radeon_compiler.h"
+#include "radeon_dataflow.h"
/**
@@ -70,58 +71,98 @@ void rc_local_transform(
}
}
+struct get_used_temporaries_data {
+ unsigned char * Used;
+ unsigned int UsedLength;
+};
+
+static void get_used_temporaries_cb(
+ void * userdata,
+ struct rc_instruction * inst,
+ rc_register_file file,
+ unsigned int index,
+ unsigned int mask)
+{
+ struct get_used_temporaries_data * d = userdata;
+
+ if (file != RC_FILE_TEMPORARY)
+ return;
+
+ if (index >= d->UsedLength)
+ return;
+
+ d->Used[index] |= mask;
+}
+
/**
- * Left multiplication of a register with a swizzle
+ * This function fills in the parameter 'used' with a writemask that
+ * represent which components of each temporary register are used by the
+ * program. This is meant to be combined with rc_find_free_temporary_list as a
+ * more efficient version of rc_find_free_temporary.
+ * @param used The function does not initialize this parameter.
*/
-struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg)
+void rc_get_used_temporaries(
+ struct radeon_compiler * c,
+ unsigned char * used,
+ unsigned int used_length)
+{
+ struct rc_instruction * inst;
+ struct get_used_temporaries_data d;
+ d.Used = used;
+ d.UsedLength = used_length;
+
+ for(inst = c->Program.Instructions.Next;
+ inst != &c->Program.Instructions; inst = inst->Next) {
+
+ rc_for_all_reads_mask(inst, get_used_temporaries_cb, &d);
+ rc_for_all_writes_mask(inst, get_used_temporaries_cb, &d);
+ }
+}
+
+/* Search a list of used temporaries for a free one
+ * \sa rc_get_used_temporaries
+ * @note If this functions finds a free temporary, it will mark it as used
+ * in the used temporary list (param 'used')
+ * @param used list of used temporaries
+ * @param used_length number of items in param 'used'
+ * @param mask which components must be free in the temporary index that is
+ * returned.
+ * @return -1 If there are no more free temporaries, otherwise the index of
+ * a temporary register where the components specified in param 'mask' are
+ * not being used.
+ */
+int rc_find_free_temporary_list(
+ struct radeon_compiler * c,
+ unsigned char * used,
+ unsigned int used_length,
+ unsigned int mask)
{
- struct rc_src_register tmp = srcreg;
int i;
- tmp.Swizzle = 0;
- tmp.Negate = 0;
- for(i = 0; i < 4; ++i) {
- rc_swizzle swz = GET_SWZ(swizzle, i);
- if (swz < 4) {
- tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3);
- tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i;
- } else {
- tmp.Swizzle |= swz << (i*3);
+ for(i = 0; i < used_length; i++) {
+ if ((~used[i] & mask) == mask) {
+ used[i] |= mask;
+ return i;
}
}
- return tmp;
+ return -1;
}
unsigned int rc_find_free_temporary(struct radeon_compiler * c)
{
- char used[RC_REGISTER_MAX_INDEX];
- unsigned int i;
- struct rc_instruction * rcinst;
+ unsigned char used[RC_REGISTER_MAX_INDEX];
+ int free;
memset(used, 0, sizeof(used));
- for (rcinst = c->Program.Instructions.Next; rcinst != &c->Program.Instructions; rcinst = rcinst->Next) {
- const struct rc_sub_instruction *inst = &rcinst->U.I;
- const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->Opcode);
- unsigned int k;
-
- for (k = 0; k < opcode->NumSrcRegs; k++) {
- if (inst->SrcReg[k].File == RC_FILE_TEMPORARY)
- used[inst->SrcReg[k].Index] = 1;
- }
-
- if (opcode->HasDstReg) {
- if (inst->DstReg.File == RC_FILE_TEMPORARY)
- used[inst->DstReg.Index] = 1;
- }
- }
+ rc_get_used_temporaries(c, used, RC_REGISTER_MAX_INDEX);
- for (i = 0; i < RC_REGISTER_MAX_INDEX; i++) {
- if (!used[i])
- return i;
+ free = rc_find_free_temporary_list(c, used, RC_REGISTER_MAX_INDEX,
+ RC_MASK_XYZW);
+ if (free < 0) {
+ rc_error(c, "Ran out of temporary registers\n");
+ return 0;
}
-
- rc_error(c, "Ran out of temporary registers\n");
- return 0;
+ return free;
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
index f0a77d7b53..df6c94b35f 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
@@ -159,47 +159,6 @@ struct rc_program {
struct rc_constant_list Constants;
};
-static inline rc_swizzle get_swz(unsigned int swz, rc_swizzle idx)
-{
- if (idx & 0x4)
- return idx;
- return GET_SWZ(swz, idx);
-}
-
-static inline unsigned int combine_swizzles4(unsigned int src,
- rc_swizzle swz_x, rc_swizzle swz_y, rc_swizzle swz_z, rc_swizzle swz_w)
-{
- unsigned int ret = 0;
-
- ret |= get_swz(src, swz_x);
- ret |= get_swz(src, swz_y) << 3;
- ret |= get_swz(src, swz_z) << 6;
- ret |= get_swz(src, swz_w) << 9;
-
- return ret;
-}
-
-static inline unsigned int combine_swizzles(unsigned int src, unsigned int swz)
-{
- unsigned int ret = 0;
-
- ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_X));
- ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Y)) << 3;
- ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Z)) << 6;
- ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_W)) << 9;
-
- return ret;
-}
-
-struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg);
-
-static inline void reset_srcreg(struct rc_src_register* reg)
-{
- memset(reg, 0, sizeof(struct rc_src_register));
- reg->Swizzle = RC_SWIZZLE_XYZW;
-}
-
-
/**
* A transformation that can be passed to \ref rc_local_transform.
*
@@ -222,6 +181,17 @@ void rc_local_transform(
struct radeon_compiler *c,
void *user);
+void rc_get_used_temporaries(
+ struct radeon_compiler * c,
+ unsigned char * used,
+ unsigned int used_length);
+
+int rc_find_free_temporary_list(
+ struct radeon_compiler * c,
+ unsigned char * used,
+ unsigned int used_length,
+ unsigned int mask);
+
unsigned int rc_find_free_temporary(struct radeon_compiler * c);
struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c);
@@ -233,4 +203,5 @@ unsigned int rc_recompute_ips(struct radeon_compiler * c);
void rc_print_program(const struct rc_program *prog);
+rc_swizzle rc_mask_to_swizzle(unsigned int mask);
#endif
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
index 39408845d5..c8063171b8 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
@@ -36,6 +36,7 @@
#include "radeon_program_alu.h"
#include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
static struct rc_instruction *emit1(
@@ -84,16 +85,6 @@ static struct rc_instruction *emit3(
return fpi;
}
-static struct rc_dst_register dstreg(int file, int index)
-{
- struct rc_dst_register dst;
- dst.File = file;
- dst.Index = index;
- dst.WriteMask = RC_MASK_XYZW;
- dst.RelAddr = 0;
- return dst;
-}
-
static struct rc_dst_register dstregtmpmask(int index, int mask)
{
struct rc_dst_register dst = {0};
@@ -186,6 +177,38 @@ static struct rc_src_register swizzle_wwww(struct rc_src_register reg)
return swizzle_smear(reg, RC_SWIZZLE_W);
}
+static int is_dst_safe_to_reuse(struct rc_instruction *inst)
+{
+ const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode);
+ unsigned i;
+
+ assert(info->HasDstReg);
+
+ if (inst->U.I.DstReg.File != RC_FILE_TEMPORARY)
+ return 0;
+
+ for (i = 0; i < info->NumSrcRegs; i++) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY &&
+ inst->U.I.SrcReg[i].Index == inst->U.I.DstReg.Index)
+ return 0;
+ }
+
+ return 1;
+}
+
+static struct rc_dst_register try_to_reuse_dst(struct radeon_compiler *c,
+ struct rc_instruction *inst)
+{
+ unsigned tmp;
+
+ if (is_dst_safe_to_reuse(inst))
+ tmp = inst->U.I.DstReg.Index;
+ else
+ tmp = rc_find_free_temporary(c);
+
+ return dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask);
+}
+
static void transform_ABS(struct radeon_compiler* c,
struct rc_instruction* inst)
{
@@ -209,10 +232,26 @@ static void transform_CEIL(struct radeon_compiler* c,
* ceil(x) = x+frac(-x)
*/
- int tempreg = rc_find_free_temporary(c);
- emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0]));
+ struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+ emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, negate(inst->U.I.SrcReg[0]));
emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg,
- inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, tempreg));
+ inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index));
+ rc_remove_instruction(inst);
+}
+
+static void transform_CLAMP(struct radeon_compiler *c,
+ struct rc_instruction *inst)
+{
+ /* CLAMP dst, src, min, max
+ * into:
+ * MIN tmp, src, max
+ * MAX dst, tmp, min
+ */
+ struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+ emit2(c, inst->Prev, RC_OPCODE_MIN, 0, dst,
+ inst->U.I.SrcReg[0], inst->U.I.SrcReg[2]);
+ emit2(c, inst->Prev, RC_OPCODE_MAX, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1]);
rc_remove_instruction(inst);
}
@@ -258,10 +297,10 @@ static void transform_DST(struct radeon_compiler* c,
static void transform_FLR(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- int tempreg = rc_find_free_temporary(c);
- emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0]);
+ struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+ emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, inst->U.I.SrcReg[0]);
emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg,
- inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, tempreg)));
+ inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, dst.Index)));
rc_remove_instruction(inst);
}
@@ -351,14 +390,14 @@ static void transform_LIT(struct radeon_compiler* c,
static void transform_LRP(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- int tempreg = rc_find_free_temporary(c);
+ struct rc_dst_register dst = try_to_reuse_dst(c, inst);
emit2(c, inst->Prev, RC_OPCODE_ADD, 0,
- dstreg(RC_FILE_TEMPORARY, tempreg),
+ dst,
inst->U.I.SrcReg[1], negate(inst->U.I.SrcReg[2]));
emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode,
inst->U.I.DstReg,
- inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[2]);
+ inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[2]);
rc_remove_instruction(inst);
}
@@ -366,9 +405,8 @@ static void transform_LRP(struct radeon_compiler* c,
static void transform_POW(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- int tempreg = rc_find_free_temporary(c);
- struct rc_dst_register tempdst = dstreg(RC_FILE_TEMPORARY, tempreg);
- struct rc_src_register tempsrc = srcreg(RC_FILE_TEMPORARY, tempreg);
+ struct rc_dst_register tempdst = try_to_reuse_dst(c, inst);
+ struct rc_src_register tempsrc = srcreg(RC_FILE_TEMPORARY, tempdst.Index);
tempdst.WriteMask = RC_MASK_W;
tempsrc.Swizzle = RC_SWIZZLE_WWWW;
@@ -388,11 +426,11 @@ static void transform_RSQ(struct radeon_compiler* c,
static void transform_SEQ(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- int tempreg = rc_find_free_temporary(c);
+ struct rc_dst_register dst = try_to_reuse_dst(c, inst);
- emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
- negate(absolute(srcreg(RC_FILE_TEMPORARY, tempreg))), builtin_zero, builtin_one);
+ negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_zero, builtin_one);
rc_remove_instruction(inst);
}
@@ -407,11 +445,11 @@ static void transform_SFL(struct radeon_compiler* c,
static void transform_SGE(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- int tempreg = rc_find_free_temporary(c);
+ struct rc_dst_register dst = try_to_reuse_dst(c, inst);
- emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
- srcreg(RC_FILE_TEMPORARY, tempreg), builtin_zero, builtin_one);
+ srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one);
rc_remove_instruction(inst);
}
@@ -419,11 +457,11 @@ static void transform_SGE(struct radeon_compiler* c,
static void transform_SGT(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- int tempreg = rc_find_free_temporary(c);
+ struct rc_dst_register dst = try_to_reuse_dst(c, inst);
- emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]);
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]);
emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
- srcreg(RC_FILE_TEMPORARY, tempreg), builtin_one, builtin_zero);
+ srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero);
rc_remove_instruction(inst);
}
@@ -431,11 +469,11 @@ static void transform_SGT(struct radeon_compiler* c,
static void transform_SLE(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- int tempreg = rc_find_free_temporary(c);
+ struct rc_dst_register dst = try_to_reuse_dst(c, inst);
- emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]);
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]);
emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
- srcreg(RC_FILE_TEMPORARY, tempreg), builtin_zero, builtin_one);
+ srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one);
rc_remove_instruction(inst);
}
@@ -443,11 +481,11 @@ static void transform_SLE(struct radeon_compiler* c,
static void transform_SLT(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- int tempreg = rc_find_free_temporary(c);
+ struct rc_dst_register dst = try_to_reuse_dst(c, inst);
- emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
- srcreg(RC_FILE_TEMPORARY, tempreg), builtin_one, builtin_zero);
+ srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero);
rc_remove_instruction(inst);
}
@@ -455,11 +493,11 @@ static void transform_SLT(struct radeon_compiler* c,
static void transform_SNE(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- int tempreg = rc_find_free_temporary(c);
+ struct rc_dst_register dst = try_to_reuse_dst(c, inst);
- emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
- negate(absolute(srcreg(RC_FILE_TEMPORARY, tempreg))), builtin_one, builtin_zero);
+ negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_one, builtin_zero);
rc_remove_instruction(inst);
}
@@ -473,12 +511,13 @@ static void transform_SSG(struct radeon_compiler* c,
* CMP tmp1, x, 1, 0
* ADD result, tmp0, -tmp1;
*/
- unsigned tmp0, tmp1;
+ struct rc_dst_register dst0;
+ unsigned tmp1;
/* 0 < x */
- tmp0 = rc_find_free_temporary(c);
+ dst0 = try_to_reuse_dst(c, inst);
emit3(c, inst->Prev, RC_OPCODE_CMP, 0,
- dstregtmpmask(tmp0, inst->U.I.DstReg.WriteMask),
+ dst0,
negate(inst->U.I.SrcReg[0]),
builtin_one,
builtin_zero);
@@ -495,7 +534,7 @@ static void transform_SSG(struct radeon_compiler* c,
/* result = tmp0 - tmp1 */
emit2(c, inst->Prev, RC_OPCODE_ADD, 0,
inst->U.I.DstReg,
- srcreg(RC_FILE_TEMPORARY, tmp0),
+ srcreg(RC_FILE_TEMPORARY, dst0.Index),
negate(srcreg(RC_FILE_TEMPORARY, tmp1)));
rc_remove_instruction(inst);
@@ -517,15 +556,15 @@ static void transform_SWZ(struct radeon_compiler* c,
static void transform_XPD(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- int tempreg = rc_find_free_temporary(c);
+ struct rc_dst_register dst = try_to_reuse_dst(c, inst);
- emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstreg(RC_FILE_TEMPORARY, tempreg),
+ emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dst,
swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W),
swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W));
emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, inst->U.I.DstReg,
swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W),
swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W),
- negate(srcreg(RC_FILE_TEMPORARY, tempreg)));
+ negate(srcreg(RC_FILE_TEMPORARY, dst.Index)));
rc_remove_instruction(inst);
}
@@ -553,6 +592,7 @@ int radeonTransformALU(
switch(inst->U.I.Opcode) {
case RC_OPCODE_ABS: transform_ABS(c, inst); return 1;
case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1;
+ case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1;
case RC_OPCODE_DP2: transform_DP2(c, inst); return 1;
case RC_OPCODE_DPH: transform_DPH(c, inst); return 1;
case RC_OPCODE_DST: transform_DST(c, inst); return 1;
@@ -592,7 +632,7 @@ static void transform_r300_vertex_CMP(struct radeon_compiler* c,
{
/* There is no decent CMP available, so let's rig one up.
* CMP is defined as dst = src0 < 0.0 ? src1 : src2
- * The following sequence consumes two temps and two extra slots
+ * The following sequence consumes zero to two temps and two extra slots
* (the second temp and the second slot is consumed by transform_LRP),
* but should be equivalent:
*
@@ -600,18 +640,18 @@ static void transform_r300_vertex_CMP(struct radeon_compiler* c,
* LRP dst, tmp0, src1, src2
*
* Yes, I know, I'm a mad scientist. ~ C. & M. */
- int tempreg0 = rc_find_free_temporary(c);
+ struct rc_dst_register dst = try_to_reuse_dst(c, inst);
/* SLT tmp0, src0, 0.0 */
emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
- dstreg(RC_FILE_TEMPORARY, tempreg0),
+ dst,
inst->U.I.SrcReg[0], builtin_zero);
/* LRP dst, tmp0, src1, src2 */
transform_LRP(c,
emit3(c, inst->Prev, RC_OPCODE_LRP, 0,
inst->U.I.DstReg,
- srcreg(RC_FILE_TEMPORARY, tempreg0), inst->U.I.SrcReg[1], inst->U.I.SrcReg[2]));
+ srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1], inst->U.I.SrcReg[2]));
rc_remove_instruction(inst);
}
@@ -642,24 +682,25 @@ static void transform_r300_vertex_DP3(struct radeon_compiler* c,
static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- int tempreg = rc_find_free_temporary(c);
+ struct rc_dst_register dst = try_to_reuse_dst(c, inst);
unsigned constant_swizzle;
int constant = rc_constants_add_immediate_scalar(&c->Program.Constants,
0.0000000000000000001,
&constant_swizzle);
/* MOV dst, src */
+ dst.WriteMask = RC_MASK_XYZW;
emit1(c, inst->Prev, RC_OPCODE_MOV, 0,
- dstreg(RC_FILE_TEMPORARY, tempreg),
+ dst,
inst->U.I.SrcReg[0]);
- /* MAX dst.z, src, 0.00...001 */
+ /* MAX dst.y, src, 0.00...001 */
emit2(c, inst->Prev, RC_OPCODE_MAX, 0,
- dstregtmpmask(tempreg, RC_MASK_Y),
- srcreg(RC_FILE_TEMPORARY, tempreg),
+ dstregtmpmask(dst.Index, RC_MASK_Y),
+ srcreg(RC_FILE_TEMPORARY, dst.Index),
srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle));
- inst->U.I.SrcReg[0] = srcreg(RC_FILE_TEMPORARY, tempreg);
+ inst->U.I.SrcReg[0] = srcreg(RC_FILE_TEMPORARY, dst.Index);
}
static void transform_r300_vertex_SEQ(struct radeon_compiler *c,
@@ -743,12 +784,13 @@ static void transform_r300_vertex_SSG(struct radeon_compiler* c,
* SLT tmp1, x, 0;
* ADD result, tmp0, -tmp1;
*/
- unsigned tmp0, tmp1;
+ struct rc_dst_register dst0 = try_to_reuse_dst(c, inst);
+ unsigned tmp1;
/* 0 < x */
- tmp0 = rc_find_free_temporary(c);
+ dst0 = try_to_reuse_dst(c, inst);
emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
- dstregtmpmask(tmp0, inst->U.I.DstReg.WriteMask),
+ dst0,
builtin_zero,
inst->U.I.SrcReg[0]);
@@ -763,7 +805,7 @@ static void transform_r300_vertex_SSG(struct radeon_compiler* c,
/* result = tmp0 - tmp1 */
emit2(c, inst->Prev, RC_OPCODE_ADD, 0,
inst->U.I.DstReg,
- srcreg(RC_FILE_TEMPORARY, tmp0),
+ srcreg(RC_FILE_TEMPORARY, dst0.Index),
negate(srcreg(RC_FILE_TEMPORARY, tmp1)));
rc_remove_instruction(inst);
@@ -781,6 +823,7 @@ int r300_transform_vertex_alu(
switch(inst->U.I.Opcode) {
case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1;
case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1;
+ case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1;
case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1;
case RC_OPCODE_DP2: transform_r300_vertex_DP2(c, inst); return 1;
case RC_OPCODE_DP3: transform_r300_vertex_DP3(c, inst); return 1;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h
index 9dcd44c522..45f79ece5b 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h
@@ -181,4 +181,9 @@ static inline int rc_presubtract_src_reg_count(rc_presubtract_op op){
return 0;
}
}
+
+#define RC_SOURCE_NONE 0x0
+#define RC_SOURCE_RGB 0x1
+#define RC_SOURCE_ALPHA 0x2
+
#endif /* RADEON_PROGRAM_CONSTANTS_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
index a21fe8d3df..5905d26e52 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
@@ -27,6 +27,9 @@
#include "radeon_program_pair.h"
+#include "radeon_compiler_util.h"
+
+#include <stdlib.h>
/**
* Return the source slot where we installed the given register access,
@@ -204,24 +207,37 @@ void rc_pair_foreach_source_that_rgb_reads(
}
}
-/*return 0 for rgb, 1 for alpha -1 for error. */
-
-unsigned int rc_source_type_that_arg_reads(
- unsigned int source,
- unsigned int swizzle)
+struct rc_pair_instruction_source * rc_pair_get_src(
+ struct rc_pair_instruction * pair_inst,
+ struct rc_pair_instruction_arg * arg)
{
- unsigned int chan;
- unsigned int swz = RC_SWIZZLE_UNUSED;
- unsigned int ret = RC_PAIR_SOURCE_NONE;
-
- for(chan = 0; chan < 3; chan++) {
- swz = GET_SWZ(swizzle, chan);
- if (swz == RC_SWIZZLE_W) {
- ret |= RC_PAIR_SOURCE_ALPHA;
- } else if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y
- || swz == RC_SWIZZLE_Z) {
- ret |= RC_PAIR_SOURCE_RGB;
+ unsigned int i, type;
+ unsigned int channels = 0;
+
+ for(i = 0; i < 3; i++) {
+ if (arg == pair_inst->RGB.Arg + i) {
+ channels = 3;
+ break;
}
}
- return ret;
+
+ if (channels == 0) {
+ for (i = 0; i < 3; i++) {
+ if (arg == pair_inst->Alpha.Arg + i) {
+ channels = 1;
+ break;
+ }
+ }
+ }
+
+ assert(channels > 0);
+ type = rc_source_type_swz(arg->Swizzle, channels);
+
+ if (type & RC_SOURCE_RGB) {
+ return &pair_inst->RGB.Src[arg->Source];
+ } else if (type & RC_SOURCE_ALPHA) {
+ return &pair_inst->Alpha.Src[arg->Source];
+ } else {
+ return NULL;
+ }
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
index 54d44a2098..ccf7a0070c 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
@@ -55,10 +55,6 @@ struct radeon_compiler;
*/
#define RC_PAIR_PRESUB_SRC 3
-#define RC_PAIR_SOURCE_NONE 0x0
-#define RC_PAIR_SOURCE_RGB 0x1
-#define RC_PAIR_SOURCE_ALPHA 0x2
-
struct rc_pair_instruction_source {
unsigned int Used:1;
unsigned int File:3;
@@ -115,9 +111,9 @@ void rc_pair_foreach_source_that_rgb_reads(
void * data,
rc_pair_foreach_src_fn cb);
-unsigned int rc_source_type_that_arg_reads(
- unsigned int source,
- unsigned int swizzle);
+struct rc_pair_instruction_source * rc_pair_get_src(
+ struct rc_pair_instruction * pair_inst,
+ struct rc_pair_instruction_arg * arg);
/*@}*/
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c
index 618ab5a099..ae13f6742f 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c
@@ -129,6 +129,7 @@ static char rc_swizzle_char(unsigned int swz)
case RC_SWIZZLE_HALF: return 'H';
case RC_SWIZZLE_UNUSED: return '_';
}
+ fprintf(stderr, "bad swz: %u\n", swz);
return '?';
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c
index 530afa5e08..f9d9f34b6a 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c
@@ -28,6 +28,8 @@
#include "radeon_program_tex.h"
+#include "radeon_compiler_util.h"
+
/* Series of transformations to be done on textures. */
static struct rc_src_register shadow_ambient(struct r300_fragment_program_compiler *compiler,
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c b/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c
index 5f67f536f6..7d76585a59 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c
@@ -87,8 +87,9 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user)
rc_for_all_reads_src(inst, mark_used, &d);
}
- /* Pass 2: If there is relative addressing, mark all externals as used. */
- if (has_rel_addr) {
+ /* Pass 2: If there is relative addressing or dead constant elimination
+ * is disabled, mark all externals as used. */
+ if (has_rel_addr || !c->remove_unused_constants) {
for (unsigned i = 0; i < c->Program.Constants.Count; i++)
if (constants[i].Type == RC_CONSTANT_EXTERNAL)
const_used[i] = 1;
@@ -119,7 +120,7 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user)
/* is_identity ==> new_count == old_count
* !is_identity ==> new_count < old_count */
assert( is_identity || new_count < c->Program.Constants.Count);
- assert(!(has_rel_addr && are_externals_remapped));
+ assert(!((has_rel_addr || !c->remove_unused_constants) && are_externals_remapped));
/* Pass 4: Redirect reads of all constants to their new locations. */
if (!is_identity) {
@@ -127,7 +128,6 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user)
inst != &c->Program.Instructions; inst = inst->Next) {
rc_remap_registers(inst, remap_regs, inv_remap_table);
}
-
}
/* Set the new constant count. Note that new_count may be less than
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c
index 60e228be5b..88165f7895 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c
@@ -33,100 +33,51 @@
#include "radeon_compiler.h"
#include "radeon_dataflow.h"
-
-struct reg_rename {
- int old_index;
- int new_index;
- int temp_index;
-};
-
-static void rename_reg(void * data, struct rc_instruction * inst,
- rc_register_file * file, unsigned int * index)
-{
- struct reg_rename *r = data;
-
- if(r->old_index == *index && *file == RC_FILE_TEMPORARY) {
- *index = r->new_index;
- }
- else if(r->new_index == *index && *file == RC_FILE_TEMPORARY) {
- *index = r->temp_index;
- }
-}
-
-static void rename_all(
- struct radeon_compiler *c,
- struct rc_instruction * start,
- unsigned int old,
- unsigned int new,
- unsigned int temp)
-{
- struct rc_instruction * inst;
- struct reg_rename r;
- r.old_index = old;
- r.new_index = new;
- r.temp_index = temp;
- for(inst = start; inst != &c->Program.Instructions;
- inst = inst->Next) {
- rc_remap_registers(inst, rename_reg, &r);
- }
-}
+#include "radeon_program.h"
/**
* This function renames registers in an attempt to get the code close to
* SSA form. After this function has completed, most of the register are only
- * written to one time, with a few exceptions. For example, this block of code
- * will not be modified by this function:
- * Mov Temp[0].x Const[0].x
- * Mov Temp[0].y Const[0].y
- * Basically, destination registers will be renamed if:
- * 1. There have been no previous writes to that register
- * or
- * 2. If the instruction is writting to the exact components (no more, no less)
- * of a register that has been written to by previous instructions.
+ * written to one time, with a few exceptions.
*
* This function assumes all the instructions are still of type
* RC_INSTRUCTION_NORMAL.
*/
void rc_rename_regs(struct radeon_compiler *c, void *user)
{
- unsigned int cur_index = 0;
- unsigned int icount;
+ unsigned int i, used_length;
+ int new_index;
struct rc_instruction * inst;
- unsigned int * masks;
+ struct rc_reader_data reader_data;
+ unsigned char * used;
- /* The number of instructions in the program is also the maximum
- * number of temp registers that could potentially be used. */
- icount = rc_recompute_ips(c);
- masks = memory_pool_malloc(&c->Pool, icount * sizeof(unsigned int));
- memset(masks, 0, icount * sizeof(unsigned int));
+ used_length = 2 * rc_recompute_ips(c);
+ used = memory_pool_malloc(&c->Pool, sizeof(unsigned char) * used_length);
+ memset(used, 0, sizeof(unsigned char) * used_length);
+ rc_get_used_temporaries(c, used, used_length);
for(inst = c->Program.Instructions.Next;
inst != &c->Program.Instructions;
inst = inst->Next) {
- const struct rc_opcode_info * info;
- unsigned int old_index, temp_index;
- struct rc_dst_register * dst;
- if(inst->Type != RC_INSTRUCTION_NORMAL) {
- rc_error(c, "%s only works with normal instructions.",
- __FUNCTION__);
- return;
- }
- dst = &inst->U.I.DstReg;
- info = rc_get_opcode_info(inst->U.I.Opcode);
- if(!info->HasDstReg || dst->File != RC_FILE_TEMPORARY) {
+
+ if (inst->U.I.DstReg.File != RC_FILE_TEMPORARY)
continue;
+
+ rc_get_readers(c, inst, &reader_data, NULL, NULL, NULL);
+
+ if (reader_data.Abort || reader_data.ReaderCount == 0)
+ continue;
+
+ new_index = rc_find_free_temporary_list(c, used, used_length,
+ RC_MASK_XYZW);
+ if (new_index < 0) {
+ rc_error(c, "Ran out of temporary registers\n");
+ return;
}
- if(dst->Index >= icount || !masks[dst->Index] ||
- masks[dst->Index] == dst->WriteMask) {
- old_index = dst->Index;
- /* We need to set dst->Index here so get free temporary
- * will work. */
- dst->Index = cur_index++;
- temp_index = rc_find_free_temporary(c);
- rename_all(c, inst->Next, old_index,
- dst->Index, temp_index);
+
+ reader_data.Writer->U.I.DstReg.Index = new_index;
+ for(i = 0; i < reader_data.ReaderCount; i++) {
+ reader_data.Readers[i].U.Src->Index = new_index;
}
- assert(dst->Index < icount);
- masks[dst->Index] |= dst->WriteMask;
}
}
diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c
index 0116c5d2fa..ed9955b05d 100644
--- a/src/mesa/drivers/dri/r300/r300_texstate.c
+++ b/src/mesa/drivers/dri/r300/r300_texstate.c
@@ -428,6 +428,7 @@ void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_format
radeonTexObjPtr t;
uint32_t pitch_val;
uint32_t internalFormat, type, format;
+ gl_format texFormat;
type = GL_BGRA;
format = GL_UNSIGNED_BYTE;
@@ -467,9 +468,6 @@ void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_format
radeon_miptree_unreference(&t->mt);
radeon_miptree_unreference(&rImage->mt);
- _mesa_init_teximage_fields(radeon->glCtx, target, texImage,
- rb->base.Width, rb->base.Height, 1, 0, rb->cpp);
- texImage->RowStride = rb->pitch / rb->cpp;
rImage->bo = rb->bo;
radeon_bo_ref(rImage->bo);
t->bo = rb->bo;
@@ -481,22 +479,35 @@ void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_format
pitch_val = rb->pitch;
switch (rb->cpp) {
case 4:
- if (texture_format == __DRI_TEXTURE_FORMAT_RGB)
+ if (texture_format == __DRI_TEXTURE_FORMAT_RGB) {
+ texFormat = MESA_FORMAT_RGB888;
t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8);
- else
+ }
+ else {
+ texFormat = MESA_FORMAT_ARGB8888;
t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8);
+ }
pitch_val /= 4;
break;
case 3:
default:
+ texFormat = MESA_FORMAT_RGB888;
t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8);
pitch_val /= 4;
break;
case 2:
+ texFormat = MESA_FORMAT_RGB565;
t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5);
pitch_val /= 2;
break;
}
+
+ _mesa_init_teximage_fields(radeon->glCtx, target, texImage,
+ rb->base.Width, rb->base.Height, 1, 0,
+ rb->cpp, texFormat);
+ texImage->RowStride = rb->pitch / rb->cpp;
+
+
pitch_val--;
t->pp_txsize = (((R300_TX_WIDTHMASK_MASK & ((rb->base.Width - 1) << R300_TX_WIDTHMASK_SHIFT)))
| ((R300_TX_HEIGHTMASK_MASK & ((rb->base.Height - 1) << R300_TX_HEIGHTMASK_SHIFT))));