summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/r300/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/r300/compiler')
-rw-r--r--src/mesa/drivers/dri/r300/compiler/Makefile1
-rwxr-xr-xsrc/mesa/drivers/dri/r300/compiler/SConscript1
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c28
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c11
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c4
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c4
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog.c7
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c16
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler.c66
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler.h5
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c61
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h16
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c252
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h28
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c14
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c29
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c11
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_optimize.c197
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c3
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c245
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c15
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c28
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c28
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h15
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c5
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c5
26 files changed, 732 insertions, 363 deletions
diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile
index d0eb170784..51b896ae91 100644
--- a/src/mesa/drivers/dri/r300/compiler/Makefile
+++ b/src/mesa/drivers/dri/r300/compiler/Makefile
@@ -8,6 +8,7 @@ LIBNAME = r300compiler
C_SOURCES = \
radeon_code.c \
radeon_compiler.c \
+ radeon_compiler_util.c \
radeon_emulate_branches.c \
radeon_emulate_loops.c \
radeon_program.c \
diff --git a/src/mesa/drivers/dri/r300/compiler/SConscript b/src/mesa/drivers/dri/r300/compiler/SConscript
index 847857b142..2b4bce1c08 100755
--- a/src/mesa/drivers/dri/r300/compiler/SConscript
+++ b/src/mesa/drivers/dri/r300/compiler/SConscript
@@ -12,6 +12,7 @@ r300compiler = env.ConvenienceLibrary(
source = [
'radeon_code.c',
'radeon_compiler.c',
+ 'radeon_compiler_util.c',
'radeon_program.c',
'radeon_program_print.c',
'radeon_opcodes.c',
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
index 4f13e51bcc..8be32ea91f 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
@@ -133,6 +133,8 @@ static unsigned int translate_alpha_opcode(struct r300_fragment_program_compiler
*/
static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* inst)
{
+ int ip;
+ int j;
PROG_CODE;
if (code->alu.length >= c->Base.max_alu_insts) {
@@ -140,20 +142,20 @@ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* i
return 0;
}
- int ip = code->alu.length++;
- int j;
+ ip = code->alu.length++;
code->alu.inst[ip].rgb_inst = translate_rgb_opcode(c, inst->RGB.Opcode);
code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode);
for(j = 0; j < 3; ++j) {
unsigned int src = use_source(code, inst->RGB.Src[j]);
+ unsigned int arg;
code->alu.inst[ip].rgb_addr |= src << (6*j);
src = use_source(code, inst->Alpha.Src[j]);
code->alu.inst[ip].alpha_addr |= src << (6*j);
- unsigned int arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle);
+ arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle);
arg |= inst->RGB.Arg[j].Abs << 6;
arg |= inst->RGB.Arg[j].Negate << 5;
code->alu.inst[ip].rgb_inst |= arg << (7*j);
@@ -259,6 +261,10 @@ static int finish_node(struct r300_emit_state * emit)
{
struct r300_fragment_program_compiler * c = emit->compiler;
struct r300_fragment_program_code *code = &emit->compiler->code->code.r300;
+ unsigned alu_offset;
+ unsigned alu_end;
+ unsigned tex_offset;
+ unsigned tex_end;
if (code->alu.length == emit->node_first_alu) {
/* Generate a single NOP for this node */
@@ -268,10 +274,10 @@ static int finish_node(struct r300_emit_state * emit)
return 0;
}
- unsigned alu_offset = emit->node_first_alu;
- unsigned alu_end = code->alu.length - alu_offset - 1;
- unsigned tex_offset = emit->node_first_tex;
- unsigned tex_end = code->tex.length - tex_offset - 1;
+ alu_offset = emit->node_first_alu;
+ alu_end = code->alu.length - alu_offset - 1;
+ tex_offset = emit->node_first_tex;
+ tex_end = code->tex.length - tex_offset - 1;
if (code->tex.length == emit->node_first_tex) {
if (emit->current_node > 0) {
@@ -334,6 +340,9 @@ static int begin_tex(struct r300_emit_state * emit)
static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst)
{
+ unsigned int unit;
+ unsigned int dest;
+ unsigned int opcode;
PROG_CODE;
if (code->tex.length >= R300_PFS_MAX_TEX_INST) {
@@ -341,9 +350,8 @@ static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst)
return 0;
}
- unsigned int unit = inst->U.I.TexSrcUnit;
- unsigned int dest = inst->U.I.DstReg.Index;
- unsigned int opcode;
+ unit = inst->U.I.TexSrcUnit;
+ dest = inst->U.I.DstReg.Index;
switch(inst->U.I.Opcode) {
case RC_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break;
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
index caa48fe478..2d28b06539 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
@@ -94,6 +94,9 @@ static const struct swizzle_data* lookup_native_swizzle(unsigned int swizzle)
*/
static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
{
+ unsigned int relevant;
+ int j;
+
if (reg.Abs)
reg.Negate = RC_MASK_NONE;
@@ -101,8 +104,6 @@ static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
opcode == RC_OPCODE_TEX ||
opcode == RC_OPCODE_TXB ||
opcode == RC_OPCODE_TXP) {
- int j;
-
if (reg.Abs || reg.Negate)
return 0;
@@ -117,8 +118,7 @@ static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
return 1;
}
- unsigned int relevant = 0;
- int j;
+ relevant = 0;
for(j = 0; j < 3; ++j)
if (GET_SWZ(reg.Swizzle, j) != RC_SWIZZLE_UNUSED)
@@ -154,9 +154,10 @@ static void r300_swizzle_split(
unsigned int matchcount = 0;
unsigned int matchmask = 0;
for(comp = 0; comp < 3; ++comp) {
+ unsigned int swz;
if (!GET_BIT(mask, comp))
continue;
- unsigned int swz = GET_SWZ(src.Swizzle, comp);
+ swz = GET_SWZ(src.Swizzle, comp);
if (swz == RC_SWIZZLE_UNUSED)
continue;
if (swz == GET_SWZ(sd->hash, comp)) {
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
index 4793f33577..2f130198d3 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
@@ -145,8 +145,8 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
{"final code validation", 0, 1, rc_validate_final_shader, NULL},
{"machine code generation", 0, is_r500, r500BuildFragmentProgramHwCode, NULL},
{"machine code generation", 0, !is_r500, r300BuildFragmentProgramHwCode, NULL},
- {"dump machine code", 0, is_r500 && c->Base.Debug, r500FragmentProgramDump, NULL},
- {"dump machine code", 0, !is_r500 && c->Base.Debug, r300FragmentProgramDump, NULL},
+ {"dump machine code", 0, is_r500 && (c->Base.Debug & RC_DBG_LOG), r500FragmentProgramDump, NULL},
+ {"dump machine code", 0, !is_r500 && (c->Base.Debug & RC_DBG_LOG), r300FragmentProgramDump, NULL},
{NULL, 0, 0, NULL, NULL}
};
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
index 08785716db..bf8341f017 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
@@ -392,9 +392,9 @@ static void ei_if(struct r300_vertex_program_compiler * compiler,
* don't already have one. */
if (!compiler->PredicateMask) {
unsigned int writemasks[RC_REGISTER_MAX_INDEX];
- memset(writemasks, 0, sizeof(writemasks));
struct rc_instruction * inst;
unsigned int i;
+ memset(writemasks, 0, sizeof(writemasks));
for(inst = compiler->Base.Program.Instructions.Next;
inst != &compiler->Base.Program.Instructions;
inst = inst->Next) {
@@ -1067,7 +1067,7 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c)
{"dead constants", 1, kill_consts, rc_remove_unused_constants, &c->code->constants_remap_table},
{"final code validation", 0, 1, rc_validate_final_shader, NULL},
{"machine code generation", 0, 1, translate_vertex_program, NULL},
- {"dump machine code", 0,c->Base.Debug,r300_vertex_program_dump, NULL},
+ {"dump machine code", 0, c->Base.Debug & RC_DBG_LOG, r300_vertex_program_dump, NULL},
{NULL, 0, 0, NULL, NULL}
};
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
index 627ce374ef..289bb87ae5 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
@@ -39,10 +39,12 @@ int r500_transform_IF(
struct rc_instruction * inst,
void* data)
{
+ struct rc_instruction * inst_mov;
+
if (inst->U.I.Opcode != RC_OPCODE_IF)
return 0;
- struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
+ inst_mov = rc_insert_new_instruction(c, inst->Prev);
inst_mov->U.I.Opcode = RC_OPCODE_MOV;
inst_mov->U.I.DstReg.WriteMask = 0;
inst_mov->U.I.WriteALUResult = RC_ALURESULT_W;
@@ -251,12 +253,11 @@ void r500FragmentProgramDump(struct radeon_compiler *c, void *user)
{
struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
struct r500_fragment_program_code *code = &compiler->code->code.r500;
- fprintf(stderr, "R500 Fragment Program:\n--------\n");
-
int n, i;
uint32_t inst;
uint32_t inst0;
char *str = NULL;
+ fprintf(stderr, "R500 Fragment Program:\n--------\n");
for (n = 0; n < code->inst_end+1; n++) {
inst0 = inst = code->inst[n].inst0;
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
index bad1684696..6f101c68eb 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
@@ -227,6 +227,7 @@ static void alu_nop(struct r300_fragment_program_compiler *c, int ip)
*/
static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst)
{
+ int ip;
PROG_CODE;
if (code->inst_end >= c->Base.max_alu_insts-1) {
@@ -234,7 +235,7 @@ static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair
return;
}
- int ip = ++code->inst_end;
+ ip = ++code->inst_end;
/* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */
if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX ||
@@ -250,7 +251,7 @@ static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair
if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) {
code->inst[ip].inst0 = R500_INST_TYPE_OUT;
if (inst->WriteALUResult) {
- error("%s: cannot write output and ALU result at the same time");
+ error("Cannot write output and ALU result at the same time");
return;
}
} else {
@@ -357,6 +358,7 @@ static unsigned int translate_strq_swizzle(unsigned int swizzle)
*/
static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst)
{
+ int ip;
PROG_CODE;
if (code->inst_end >= c->Base.max_alu_insts-1) {
@@ -364,7 +366,7 @@ static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_inst
return 0;
}
- int ip = ++code->inst_end;
+ ip = ++code->inst_end;
code->inst[ip].inst0 = R500_INST_TYPE_TEX
| (inst->DstReg.WriteMask << 11)
@@ -407,12 +409,14 @@ static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_inst
static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst)
{
+ unsigned int newip;
+
if (s->Code->inst_end >= s->C->max_alu_insts-1) {
rc_error(s->C, "emit_tex: Too many instructions");
return;
}
- unsigned int newip = ++s->Code->inst_end;
+ newip = ++s->Code->inst_end;
/* Currently all loops use the same integer constant to intialize
* the loop variables. */
@@ -623,6 +627,8 @@ void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
if (code->inst_end == -1 ||
(code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
+ int ip;
+
/* This may happen when dead-code elimination is disabled or
* when most of the fragment program logic is leading to a KIL */
if (code->inst_end >= compiler->Base.max_alu_insts-1) {
@@ -630,7 +636,7 @@ void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
return;
}
- int ip = ++code->inst_end;
+ ip = ++code->inst_end;
code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
index b410b2daf4..4286baed0c 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
@@ -26,7 +26,9 @@
#include <stdio.h>
#include <stdlib.h>
+#include "radeon_dataflow.h"
#include "radeon_program.h"
+#include "radeon_program_pair.h"
void rc_init(struct radeon_compiler * c)
@@ -50,7 +52,7 @@ void rc_debug(struct radeon_compiler * c, const char * fmt, ...)
{
va_list ap;
- if (!c->Debug)
+ if (!(c->Debug & RC_DBG_LOG))
return;
va_start(ap, fmt);
@@ -84,7 +86,7 @@ void rc_error(struct radeon_compiler * c, const char * fmt, ...)
}
}
- if (c->Debug) {
+ if (c->Debug & RC_DBG_LOG) {
fprintf(stderr, "r300compiler error: ");
va_start(ap, fmt);
@@ -351,11 +353,65 @@ void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face)
}
}
+static void reg_count_callback(void * userdata, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int mask)
+{
+ unsigned int * max_reg = userdata;
+ if (file == RC_FILE_TEMPORARY)
+ index > *max_reg ? *max_reg = index : 0;
+}
+
+static void print_stats(struct radeon_compiler * c)
+{
+ struct rc_instruction * tmp;
+ unsigned max_reg, insts, fc, tex, alpha, rgb, presub;
+ max_reg = insts = fc = tex = alpha = rgb = presub = 0;
+ for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions;
+ tmp = tmp->Next){
+ const struct rc_opcode_info * info;
+ rc_for_all_reads_mask(tmp, reg_count_callback, &max_reg);
+ if (tmp->Type == RC_INSTRUCTION_NORMAL) {
+ if (tmp->U.I.PreSub.Opcode != RC_PRESUB_NONE)
+ presub++;
+ info = rc_get_opcode_info(tmp->U.I.Opcode);
+ } else {
+ if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used)
+ presub++;
+ if (tmp->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
+ presub++;
+ /* Assuming alpha will never be a flow control or
+ * a tex instruction. */
+ if (tmp->U.P.Alpha.Opcode != RC_OPCODE_NOP)
+ alpha++;
+ if (tmp->U.P.RGB.Opcode != RC_OPCODE_NOP)
+ rgb++;
+ info = rc_get_opcode_info(tmp->U.P.RGB.Opcode);
+ }
+ if (info->IsFlowControl)
+ fc++;
+ if (info->HasTexture)
+ tex++;
+ insts++;
+ }
+ if (insts < 4)
+ return;
+ fprintf(stderr,"~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"
+ "~%4u Instructions\n"
+ "~%4u Vector Instructions (RGB)\n"
+ "~%4u Scalar Instructions (Alpha)\n"
+ "~%4u Flow Control Instructions\n"
+ "~%4u Texture Instructions\n"
+ "~%4u Presub Operations\n"
+ "~%4u Temporary Registers\n"
+ "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n",
+ insts, rgb, alpha, fc, tex, presub, max_reg + 1);
+}
+
/* Executes a list of compiler passes given in the parameter 'list'. */
void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list,
const char *shader_name)
{
- if (c->Debug) {
+ if (c->Debug & RC_DBG_LOG) {
fprintf(stderr, "%s: before compilation\n", shader_name);
rc_print_program(&c->Program);
}
@@ -367,12 +423,14 @@ void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *lis
if (c->Error)
return;
- if (c->Debug && list[i].dump) {
+ if ((c->Debug & RC_DBG_LOG) && list[i].dump) {
fprintf(stderr, "%s: after '%s'\n", shader_name, list[i].name);
rc_print_program(&c->Program);
}
}
}
+ if (c->Debug & RC_DBG_STATS)
+ print_stats(c);
}
void rc_validate_final_shader(struct radeon_compiler *c, void *user)
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
index 6d96ac9fdd..31fd469a04 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
@@ -30,12 +30,15 @@
#include "radeon_program.h"
#include "radeon_emulate_loops.h"
+#define RC_DBG_LOG (1 << 0)
+#define RC_DBG_STATS (1 << 1)
+
struct rc_swizzle_caps;
struct radeon_compiler {
struct memory_pool Pool;
struct rc_program Program;
- unsigned Debug:1;
+ unsigned Debug:2;
unsigned Error:1;
char * ErrorMsg;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c
new file mode 100644
index 0000000000..97f4c75849
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2010 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * \file
+ */
+
+#include "radeon_compiler_util.h"
+
+/**
+ */
+unsigned int rc_swizzle_to_writemask(unsigned int swz)
+{
+ unsigned int mask = 0;
+ unsigned int i;
+
+ for(i = 0; i < 4; i++) {
+ mask |= 1 << GET_SWZ(swz, i);
+ }
+ mask &= RC_MASK_XYZW;
+
+ return mask;
+}
+
+unsigned int rc_src_reads_dst_mask(
+ rc_register_file src_file,
+ unsigned int src_idx,
+ unsigned int src_swz,
+ rc_register_file dst_file,
+ unsigned int dst_idx,
+ unsigned int dst_mask)
+{
+ if (src_file != dst_file || src_idx != dst_idx) {
+ return RC_MASK_NONE;
+ }
+ return dst_mask & rc_swizzle_to_writemask(src_swz);
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h
new file mode 100644
index 0000000000..1a14e7cb0e
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h
@@ -0,0 +1,16 @@
+#include "radeon_program_constants.h"
+
+#ifndef RADEON_PROGRAM_UTIL_H
+#define RADEON_PROGRAM_UTIL_H
+
+unsigned int rc_swizzle_to_writemask(unsigned int swz);
+
+unsigned int rc_src_reads_dst_mask(
+ rc_register_file src_file,
+ unsigned int src_idx,
+ unsigned int src_swz,
+ rc_register_file dst_file,
+ unsigned int dst_idx,
+ unsigned int dst_mask);
+
+#endif /* RADEON_PROGRAM_UTIL_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
index a27d395587..5927498818 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
@@ -1,5 +1,6 @@
/*
* Copyright (C) 2009 Nicolai Haehnle.
+ * Copyright 2010 Tom Stellard <tstellar@gmail.com>
*
* All Rights Reserved.
*
@@ -27,6 +28,8 @@
#include "radeon_dataflow.h"
+#include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
#include "radeon_program.h"
struct read_write_mask_data {
@@ -402,3 +405,252 @@ void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, v
else
remap_pair_instruction(inst, cb, userdata);
}
+
+/**
+ * @return RC_OPCODE_NOOP if inst is not a flow control instruction.
+ * @return The opcode of inst if it is a flow control instruction.
+ */
+static rc_opcode get_flow_control_inst(struct rc_instruction * inst)
+{
+ const struct rc_opcode_info * info;
+ if (inst->Type == RC_INSTRUCTION_NORMAL) {
+ info = rc_get_opcode_info(inst->U.I.Opcode);
+ } else {
+ info = rc_get_opcode_info(inst->U.P.RGB.Opcode);
+ /*A flow control instruction shouldn't have an alpha
+ * instruction.*/
+ assert(!info->IsFlowControl ||
+ inst->U.P.Alpha.Opcode == RC_OPCODE_NOP);
+ }
+
+ if (info->IsFlowControl)
+ return info->Opcode;
+ else
+ return RC_OPCODE_NOP;
+
+}
+
+struct get_readers_callback_data {
+ struct radeon_compiler * C;
+ struct rc_reader_data * ReaderData;
+ rc_read_src_fn ReadCB;
+ rc_read_write_mask_fn WriteCB;
+ unsigned int AliveWriteMask;
+};
+
+static void add_reader(
+ struct memory_pool * pool,
+ struct rc_reader_data * data,
+ struct rc_instruction * inst,
+ unsigned int mask,
+ struct rc_src_register * src)
+{
+ struct rc_reader * new;
+ memory_pool_array_reserve(pool, struct rc_reader, data->Readers,
+ data->ReaderCount, data->ReadersReserved, 1);
+ new = &data->Readers[data->ReaderCount++];
+ new->Inst = inst;
+ new->WriteMask = mask;
+ new->Src = src;
+}
+
+/**
+ * This function is used by rc_get_readers_normal() to determine whether inst
+ * is a reader of userdata->ReaderData->Writer
+ */
+static void get_readers_normal_read_callback(
+ void * userdata,
+ struct rc_instruction * inst,
+ struct rc_src_register * src)
+{
+ struct get_readers_callback_data * d = userdata;
+ unsigned int read_mask;
+
+ if (src->RelAddr)
+ d->ReaderData->Abort = 1;
+
+ unsigned int shared_mask = rc_src_reads_dst_mask(src->File, src->Index,
+ src->Swizzle,
+ d->ReaderData->Writer->U.I.DstReg.File,
+ d->ReaderData->Writer->U.I.DstReg.Index,
+ d->AliveWriteMask);
+
+ if (shared_mask == RC_MASK_NONE)
+ return;
+
+ /* If we make it this far, it means that this source reads from the
+ * same register written to by d->ReaderData->Writer. */
+
+ if (d->ReaderData->AbortOnRead) {
+ d->ReaderData->Abort = 1;
+ return;
+ }
+
+ read_mask = rc_swizzle_to_writemask(src->Swizzle);
+ /* XXX The behavior in this case should be configurable. */
+ if ((read_mask & d->AliveWriteMask) != read_mask) {
+ d->ReaderData->Abort = 1;
+ return;
+ }
+
+ d->ReadCB(d->ReaderData, inst, src);
+ if (d->ReaderData->Abort)
+ return;
+
+ add_reader(&d->C->Pool, d->ReaderData, inst, shared_mask, src);
+}
+
+/**
+ * This function is used by rc_get_readers_normal() to determine when
+ * userdata->ReaderData->Writer is dead (i. e. All compontents of its
+ * destination register have been overwritten by other instructions).
+ */
+static void get_readers_write_callback(
+ void *userdata,
+ struct rc_instruction * inst,
+ rc_register_file file,
+ unsigned int index,
+ unsigned int mask)
+{
+ struct get_readers_callback_data * d = userdata;
+
+ if (index == d->ReaderData->Writer->U.I.DstReg.Index
+ && file == d->ReaderData->Writer->U.I.DstReg.File) {
+ unsigned int shared_mask = mask
+ & d->ReaderData->Writer->U.I.DstReg.WriteMask;
+ if (d->ReaderData->InElse) {
+ if (shared_mask & d->AliveWriteMask) {
+ /* We set AbortOnRead here because the
+ * destination register of d->ReaderData->Writer
+ * is written to in both the IF and the
+ * ELSE block of this IF/ELSE statement.
+ * This means that readers of this
+ * destination register that follow this IF/ELSE
+ * statement use the value of different
+ * instructions depending on the control flow
+ * decisions made by the program. */
+ d->ReaderData->AbortOnRead = 1;
+ }
+ } else {
+ d->AliveWriteMask &= ~shared_mask;
+ }
+ }
+
+ d->WriteCB(d->ReaderData, inst, file, index, mask);
+}
+
+/**
+ * This function will create a list of readers via the rc_reader_data struct.
+ * This function will abort (set the flag data->Abort) and return if it
+ * encounters an instruction that reads from @param writer and also a different
+ * instruction. Here are some examples:
+ *
+ * writer = instruction 0;
+ * 0 MOV TEMP[0].xy, TEMP[1].xy
+ * 1 MOV TEMP[0].zw, TEMP[2].xy
+ * 2 MOV TEMP[3], TEMP[0]
+ * The Abort flag will be set on instruction 2, because it reads values written
+ * by instructions 0 and 1.
+ *
+ * writer = instruction 1;
+ * 0 IF TEMP[0].x
+ * 1 MOV TEMP[1], TEMP[2]
+ * 2 ELSE
+ * 3 MOV TEMP[1], TEMP[2]
+ * 4 ENDIF
+ * 5 MOV TEMP[3], TEMP[1]
+ * The Abort flag will be set on instruction 5, because it could read from the
+ * value written by either instruction 1 or 3, depending on the jump decision
+ * made at instruction 0.
+ *
+ * writer = instruction 0;
+ * 0 MOV TEMP[0], TEMP[1]
+ * 2 BGNLOOP
+ * 3 ADD TEMP[0], TEMP[0], none.1
+ * 4 ENDLOOP
+ * The Abort flag will be set on instruction 3, because in the first iteration
+ * of the loop it reads the value written by instruction 0 and in all other
+ * iterations it reads the value written by instruction 3.
+ *
+ * @param read_cb This function will be called for for every instruction that
+ * has been determined to be a reader of writer.
+ * @param write_cb This function will be called for every instruction after
+ * writer.
+ */
+void rc_get_readers_normal(
+ struct radeon_compiler * c,
+ struct rc_instruction * writer,
+ struct rc_reader_data * data,
+ rc_read_src_fn read_cb,
+ rc_read_write_mask_fn write_cb)
+{
+ struct rc_instruction * tmp;
+ struct get_readers_callback_data d;
+ unsigned int branch_depth = 0;
+
+ data->Writer = writer;
+ data->Abort = 0;
+ data->AbortOnRead = 0;
+ data->InElse = 0;
+ data->ReaderCount = 0;
+ data->ReadersReserved = 0;
+ data->Readers = NULL;
+
+ d.C = c;
+ d.AliveWriteMask = writer->U.I.DstReg.WriteMask;
+ d.ReaderData = data;
+ d.ReadCB = read_cb;
+ d.WriteCB = write_cb;
+
+ if (!writer->U.I.DstReg.WriteMask)
+ return;
+
+ for(tmp = writer->Next; tmp != &c->Program.Instructions;
+ tmp = tmp->Next){
+ rc_opcode opcode = get_flow_control_inst(tmp);
+ switch(opcode) {
+ case RC_OPCODE_BGNLOOP:
+ /* XXX We can do better when we see a BGNLOOP if we
+ * add a flag called AbortOnWrite to struct
+ * rc_reader_data and leave it set until the next
+ * ENDLOOP. */
+ case RC_OPCODE_ENDLOOP:
+ /* XXX We can do better when we see an ENDLOOP by
+ * searching backwards from writer and looking for
+ * readers of writer's destination index. If we find a
+ * reader before we get to the BGNLOOP, we must abort
+ * unless there is another writer between that reader
+ * and the BGNLOOP. */
+ data->Abort = 1;
+ return;
+ case RC_OPCODE_IF:
+ branch_depth++;
+ break;
+ case RC_OPCODE_ELSE:
+ if (branch_depth == 0)
+ data->InElse = 1;
+ break;
+ case RC_OPCODE_ENDIF:
+ if (branch_depth == 0) {
+ data->AbortOnRead = 1;
+ data->InElse = 0;
+ }
+ else {
+ branch_depth--;
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (!data->InElse)
+ rc_for_all_reads_src(tmp, get_readers_normal_read_callback, &d);
+ rc_for_all_writes_mask(tmp, get_readers_write_callback, &d);
+
+ if (data->Abort)
+ return;
+
+ if (!d.AliveWriteMask)
+ return;
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h
index d10ae3c7b7..7de6b98f76 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h
@@ -1,5 +1,6 @@
/*
* Copyright (C) 2009 Nicolai Haehnle.
+ * Copyright 2010 Tom Stellard <tstellar@gmail.com>
*
* All Rights Reserved.
*
@@ -35,6 +36,7 @@ struct rc_instruction;
struct rc_swizzle_caps;
struct rc_src_register;
struct rc_pair_instruction_arg;
+struct rc_compiler;
/**
@@ -66,6 +68,32 @@ typedef void (*rc_remap_register_fn)(void * userdata, struct rc_instruction * in
void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, void * userdata);
/*@}*/
+struct rc_reader {
+ struct rc_instruction * Inst;
+ unsigned int WriteMask;
+ struct rc_src_register * Src;
+};
+
+struct rc_reader_data {
+ unsigned int Abort;
+ unsigned int AbortOnRead;
+ unsigned int InElse;
+ struct rc_instruction * Writer;
+
+ unsigned int ReaderCount;
+ unsigned int ReadersReserved;
+ struct rc_reader * Readers;
+
+ void * CbData;
+};
+
+void rc_get_readers_normal(
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
+ struct rc_reader_data * data,
+ /*XXX: These should be their own function types. */
+ rc_read_src_fn read_cb,
+ rc_read_write_mask_fn write_cb);
/**
* Compiler passes based on dataflow analysis.
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
index 9d17b4772a..87906f37b1 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
@@ -106,10 +106,12 @@ static void push_loop(struct deadcode_state * s)
static void push_branch(struct deadcode_state * s)
{
+ struct branchinfo * branch;
+
memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack,
s->BranchStackSize, s->BranchStackReserved, 1);
- struct branchinfo * branch = &s->BranchStack[s->BranchStackSize++];
+ branch = &s->BranchStack[s->BranchStackSize++];
branch->HaveElse = 0;
memcpy(&branch->StoreEndif, &s->R, sizeof(s->R));
}
@@ -152,6 +154,7 @@ static void update_instruction(struct deadcode_state * s, struct rc_instruction
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
struct instruction_state * insts = &s->Instructions[inst->IP];
unsigned int usedmask = 0;
+ unsigned int srcmasks[3];
if (opcode->HasDstReg) {
unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index);
@@ -180,7 +183,6 @@ static void update_instruction(struct deadcode_state * s, struct rc_instruction
}
}
- unsigned int srcmasks[3];
rc_compute_sources_for_writemask(inst, usedmask, srcmasks);
for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
@@ -219,6 +221,7 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, void *user)
unsigned int nr_instructions;
unsigned has_temp_reladdr_src = 0;
rc_dataflow_mark_outputs_fn dce = (rc_dataflow_mark_outputs_fn)user;
+ unsigned int ip;
/* Give up if there is relative addressing of destination operands. */
for(struct rc_instruction * inst = c->Program.Instructions.Next;
@@ -349,12 +352,14 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, void *user)
update_instruction(&s, inst);
}
- unsigned int ip = 0;
+ ip = 0;
for(struct rc_instruction * inst = c->Program.Instructions.Next;
inst != &c->Program.Instructions;
inst = inst->Next, ++ip) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
int dead = 1;
+ unsigned int srcmasks[3];
+ unsigned int usemask;
if (!opcode->HasDstReg) {
dead = 0;
@@ -376,8 +381,7 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, void *user)
continue;
}
- unsigned int srcmasks[3];
- unsigned int usemask = s.Instructions[ip].WriteMask;
+ usemask = s.Instructions[ip].WriteMask;
if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
usemask |= RC_MASK_X;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c
index 222e5b7e02..7bede344f3 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c
@@ -57,18 +57,21 @@ struct emulate_branch_state {
static void handle_if(struct emulate_branch_state * s, struct rc_instruction * inst)
{
+ struct branch_info * branch;
+ struct rc_instruction * inst_mov;
+
memory_pool_array_reserve(&s->C->Pool, struct branch_info,
s->Branches, s->BranchCount, s->BranchReserved, 1);
DBG("%s\n", __FUNCTION__);
- struct branch_info * branch = &s->Branches[s->BranchCount++];
+ branch = &s->Branches[s->BranchCount++];
memset(branch, 0, sizeof(struct branch_info));
branch->If = inst;
/* Make a safety copy of the decision register, because we will need
* it at ENDIF time and it might be overwritten in both branches. */
- struct rc_instruction * inst_mov = rc_insert_new_instruction(s->C, inst->Prev);
+ inst_mov = rc_insert_new_instruction(s->C, inst->Prev);
inst_mov->U.I.Opcode = RC_OPCODE_MOV;
inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_mov->U.I.DstReg.Index = rc_find_free_temporary(s->C);
@@ -84,6 +87,8 @@ static void handle_if(struct emulate_branch_state * s, struct rc_instruction * i
static void handle_else(struct emulate_branch_state * s, struct rc_instruction * inst)
{
+ struct branch_info * branch;
+
if (!s->BranchCount) {
rc_error(s->C, "Encountered ELSE outside of branches");
return;
@@ -91,7 +96,7 @@ static void handle_else(struct emulate_branch_state * s, struct rc_instruction *
DBG("%s\n", __FUNCTION__);
- struct branch_info * branch = &s->Branches[s->BranchCount - 1];
+ branch = &s->Branches[s->BranchCount - 1];
branch->Else = inst;
}
@@ -191,6 +196,10 @@ static void inject_cmp(struct emulate_branch_state * s,
static void handle_endif(struct emulate_branch_state * s, struct rc_instruction * inst)
{
+ struct branch_info * branch;
+ struct register_proxies IfProxies;
+ struct register_proxies ElseProxies;
+
if (!s->BranchCount) {
rc_error(s->C, "Encountered ENDIF outside of branches");
return;
@@ -198,9 +207,7 @@ static void handle_endif(struct emulate_branch_state * s, struct rc_instruction
DBG("%s\n", __FUNCTION__);
- struct branch_info * branch = &s->Branches[s->BranchCount - 1];
- struct register_proxies IfProxies;
- struct register_proxies ElseProxies;
+ branch = &s->Branches[s->BranchCount - 1];
memset(&IfProxies, 0, sizeof(IfProxies));
memset(&ElseProxies, 0, sizeof(ElseProxies));
@@ -261,16 +268,19 @@ static void remap_output_function(void * userdata, struct rc_instruction * inst,
*/
static void fix_output_writes(struct emulate_branch_state * s, struct rc_instruction * inst)
{
+ const struct rc_opcode_info * opcode;
+
if (!s->BranchCount)
return;
- const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ opcode = rc_get_opcode_info(inst->U.I.Opcode);
if (!opcode->HasDstReg)
return;
if (inst->U.I.DstReg.File == RC_FILE_OUTPUT) {
struct remap_output_data remap;
+ struct rc_instruction * inst_mov;
remap.Output = inst->U.I.DstReg.Index;
remap.Temporary = rc_find_free_temporary(s->C);
@@ -281,7 +291,7 @@ static void fix_output_writes(struct emulate_branch_state * s, struct rc_instruc
rc_remap_registers(inst, &remap_output_function, &remap);
}
- struct rc_instruction * inst_mov = rc_insert_new_instruction(s->C, s->C->Program.Instructions.Prev);
+ inst_mov = rc_insert_new_instruction(s->C, s->C->Program.Instructions.Prev);
inst_mov->U.I.Opcode = RC_OPCODE_MOV;
inst_mov->U.I.DstReg.File = RC_FILE_OUTPUT;
inst_mov->U.I.DstReg.Index = remap.Output;
@@ -299,12 +309,13 @@ static void fix_output_writes(struct emulate_branch_state * s, struct rc_instruc
void rc_emulate_branches(struct radeon_compiler *c, void *user)
{
struct emulate_branch_state s;
+ struct rc_instruction * ptr;
memset(&s, 0, sizeof(s));
s.C = c;
/* Untypical loop because we may remove the current instruction */
- struct rc_instruction * ptr = c->Program.Instructions.Next;
+ ptr = c->Program.Instructions.Next;
while(ptr != &c->Program.Instructions) {
struct rc_instruction * inst = ptr;
ptr = ptr->Next;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c
index cd4fcbabb9..205eecd112 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c
@@ -386,8 +386,6 @@ static int build_loop_info(struct radeon_compiler * c, struct loop_info * loop,
case RC_OPCODE_SNE:
break;
default:
- rc_error(c, "%s: expected conditional",
- __FUNCTION__);
return 0;
}
loop->Cond = loop->If->Prev;
@@ -431,8 +429,10 @@ static int transform_loop(struct emulate_loop_state * s,
loop = &s->Loops[s->LoopCount++];
- if (!build_loop_info(s->C, loop, inst))
+ if (!build_loop_info(s->C, loop, inst)) {
+ rc_error(s->C, "Failed to build loop info\n");
return 0;
+ }
if(try_unroll_loop(s->C, loop)){
return 1;
@@ -511,11 +511,12 @@ void rc_emulate_loops(struct radeon_compiler *c, void *user)
* loops are unrolled first.
*/
for( i = s->LoopCount - 1; i >= 0; i-- ){
+ unsigned int iterations;
+
if(!s->Loops[i].EndLoop){
continue;
}
- unsigned int iterations = loop_max_possible_iterations(
- s->C, &s->Loops[i]);
+ iterations = loop_max_possible_iterations(s->C, &s->Loops[i]);
unroll_loop(s->C, &s->Loops[i], iterations);
}
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
index 3be50b93e4..4d9120ffd0 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
@@ -1,5 +1,6 @@
/*
* Copyright (C) 2009 Nicolai Haehnle.
+ * Copyright 2010 Tom Stellard <tstellar@gmail.com>
*
* All Rights Reserved.
*
@@ -28,6 +29,7 @@
#include "radeon_dataflow.h"
#include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
#include "radeon_swizzle.h"
struct peephole_state {
@@ -86,80 +88,60 @@ struct copy_propagate_state {
int BranchDepth;
};
-/**
- * This is a callback function that is meant to be passed to
- * rc_for_all_reads_mask. This function will be called once for each source
- * register in inst.
- * @param inst The instruction that the source register belongs to.
- * @param file The register file of the source register.
- * @param index The index of the source register.
- * @param mask The components of the source register that are being read from.
- */
static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
- rc_register_file file, unsigned int index, unsigned int mask)
+ struct rc_src_register * src)
{
- struct copy_propagate_state * s = data;
+ rc_register_file file = src->File;
+ struct rc_reader_data * reader_data = data;
+ const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
- /* XXX This could probably be handled better. */
- if (file == RC_FILE_ADDRESS) {
- s->Conflict = 1;
+ /* It is possible to do copy propigation in this situation,
+ * just not right now, see peephole_add_presub_inv() */
+ if (reader_data->Writer->U.I.PreSub.Opcode != RC_PRESUB_NONE &&
+ (info->NumSrcRegs > 2 || info->HasTexture)) {
+ reader_data->Abort = 1;
return;
}
- if (file != RC_FILE_TEMPORARY || index != s->Mov->U.I.DstReg.Index)
+ /* XXX This could probably be handled better. */
+ if (file == RC_FILE_ADDRESS) {
+ reader_data->Abort = 1;
return;
+ }
/* These instructions cannot read from the constants file.
* see radeonTransformTEX()
*/
- if(s->Mov->U.I.SrcReg[0].File != RC_FILE_TEMPORARY &&
- s->Mov->U.I.SrcReg[0].File != RC_FILE_INPUT &&
+ if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY &&
+ reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT &&
(inst->U.I.Opcode == RC_OPCODE_TEX ||
inst->U.I.Opcode == RC_OPCODE_TXB ||
inst->U.I.Opcode == RC_OPCODE_TXP ||
inst->U.I.Opcode == RC_OPCODE_KIL)){
- s->Conflict = 1;
+ reader_data->Abort = 1;
return;
}
- if ((mask & s->MovMask) == mask) {
- if (s->SourceClobbered) {
- s->Conflict = 1;
- }
- } else if ((mask & s->DefinedMask) == mask) {
- /* read from something entirely written by other instruction: this is okay */
- } else {
- /* read from component combination that is not well-defined without
- * the MOV: cannot remove it */
- s->Conflict = 1;
- }
}
static void copy_propagate_scan_write(void * data, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int mask)
{
- struct copy_propagate_state * s = data;
-
- if (s->BranchDepth < 0)
- return;
+ struct rc_reader_data * reader_data = data;
+ struct copy_propagate_state * s = reader_data->CbData;
- if (file == s->Mov->U.I.DstReg.File && index == s->Mov->U.I.DstReg.Index) {
- s->MovMask &= ~mask;
- if (s->BranchDepth == 0)
- s->DefinedMask |= mask;
- else
- s->DefinedMask &= ~mask;
- }
- if (file == s->Mov->U.I.SrcReg[0].File && index == s->Mov->U.I.SrcReg[0].Index) {
+ if (file == reader_data->Writer->U.I.SrcReg[0].File && index == reader_data->Writer->U.I.SrcReg[0].Index) {
if (mask & s->SourcedMask)
- s->SourceClobbered = 1;
+ reader_data->AbortOnRead = 1;
} else if (s->Mov->U.I.SrcReg[0].RelAddr && file == RC_FILE_ADDRESS) {
- s->SourceClobbered = 1;
+ reader_data->AbortOnRead = 1;
}
}
static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov)
{
struct copy_propagate_state s;
+ struct rc_reader_data reader_data;
+ unsigned int i;
if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY ||
inst_mov->U.I.DstReg.RelAddr ||
@@ -173,95 +155,27 @@ static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * i
s.MovMask = inst_mov->U.I.DstReg.WriteMask;
s.DefinedMask = RC_MASK_XYZW & ~s.MovMask;
+ reader_data.CbData = &s;
+
for(unsigned int chan = 0; chan < 4; ++chan) {
unsigned int swz = GET_SWZ(inst_mov->U.I.SrcReg[0].Swizzle, chan);
s.SourcedMask |= (1 << swz) & RC_MASK_XYZW;
}
- /* 1st pass: Check whether all subsequent readers can be changed */
- for(struct rc_instruction * inst = inst_mov->Next;
- inst != &c->Program.Instructions;
- inst = inst->Next) {
- const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
- /* XXX In the future we might be able to make the optimizer
- * smart enough to handle loops. */
- if(inst->U.I.Opcode == RC_OPCODE_BGNLOOP
- || inst->U.I.Opcode == RC_OPCODE_ENDLOOP){
- return;
- }
+ /* Get a list of all the readers of this MOV instruction. */
+ rc_get_readers_normal(c, inst_mov, &reader_data,
+ copy_propagate_scan_read, copy_propagate_scan_write);
- /* It is possible to do copy propigation in this situation,
- * just not right now, see peephole_add_presub_inv() */
- if (inst_mov->U.I.PreSub.Opcode != RC_PRESUB_NONE &&
- (info->NumSrcRegs > 2 || info->HasTexture)) {
- return;
- }
-
- rc_for_all_reads_mask(inst, copy_propagate_scan_read, &s);
- rc_for_all_writes_mask(inst, copy_propagate_scan_write, &s);
- if (s.Conflict)
- return;
-
- if (s.BranchDepth >= 0) {
- if (inst->U.I.Opcode == RC_OPCODE_IF) {
- s.BranchDepth++;
- } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF
- || inst->U.I.Opcode == RC_OPCODE_ELSE) {
- s.BranchDepth--;
- if (s.BranchDepth < 0) {
- s.DefinedMask &= ~s.MovMask;
- s.MovMask = 0;
- }
- }
- }
- }
-
- if (s.Conflict)
+ if (reader_data.Abort || reader_data.ReaderCount == 0)
return;
- /* 2nd pass: We can satisfy all readers, so switch them over all at once */
- s.MovMask = inst_mov->U.I.DstReg.WriteMask;
- s.BranchDepth = 0;
-
- for(struct rc_instruction * inst = inst_mov->Next;
- inst != &c->Program.Instructions;
- inst = inst->Next) {
- const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
- for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
- if (inst->U.I.SrcReg[src].File == RC_FILE_TEMPORARY &&
- inst->U.I.SrcReg[src].Index == s.Mov->U.I.DstReg.Index) {
- unsigned int refmask = 0;
-
- for(unsigned int chan = 0; chan < 4; ++chan) {
- unsigned int swz = GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
- refmask |= (1 << swz) & RC_MASK_XYZW;
- }
+ /* Propagate the MOV instruction. */
+ for (i = 0; i < reader_data.ReaderCount; i++) {
+ struct rc_instruction * inst = reader_data.Readers[i].Inst;
+ *reader_data.Readers[i].Src = chain_srcregs(*reader_data.Readers[i].Src, s.Mov->U.I.SrcReg[0]);
- if ((refmask & s.MovMask) == refmask) {
- inst->U.I.SrcReg[src] = chain_srcregs(inst->U.I.SrcReg[src], s.Mov->U.I.SrcReg[0]);
- if (s.Mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
- inst->U.I.PreSub = s.Mov->U.I.PreSub;
- }
- }
- }
-
- if (opcode->HasDstReg) {
- if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY &&
- inst->U.I.DstReg.Index == s.Mov->U.I.DstReg.Index) {
- s.MovMask &= ~inst->U.I.DstReg.WriteMask;
- }
- }
-
- if (s.BranchDepth >= 0) {
- if (inst->U.I.Opcode == RC_OPCODE_IF) {
- s.BranchDepth++;
- } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF
- || inst->U.I.Opcode == RC_OPCODE_ELSE) {
- s.BranchDepth--;
- if (s.BranchDepth < 0)
- break; /* no more readers after this point */
- }
- }
+ if (s.Mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
+ inst->U.I.PreSub = s.Mov->U.I.PreSub;
}
/* Finally, remove the original MOV instruction */
@@ -408,30 +322,38 @@ static void constant_folding_add(struct rc_instruction * inst)
static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst)
{
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ unsigned int i;
/* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
+ struct rc_constant * constant;
+ struct rc_src_register newsrc;
+ int have_real_reference;
+
if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT ||
inst->U.I.SrcReg[src].RelAddr ||
inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count)
continue;
- struct rc_constant * constant =
+ constant =
&c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index];
if (constant->Type != RC_CONSTANT_IMMEDIATE)
continue;
- struct rc_src_register newsrc = inst->U.I.SrcReg[src];
- int have_real_reference = 0;
+ newsrc = inst->U.I.SrcReg[src];
+ have_real_reference = 0;
for(unsigned int chan = 0; chan < 4; ++chan) {
unsigned int swz = GET_SWZ(newsrc.Swizzle, chan);
+ unsigned int newswz;
+ float imm;
+ float baseimm;
+
if (swz >= 4)
continue;
- unsigned int newswz;
- float imm = constant->u.Immediate[swz];
- float baseimm = imm;
+ imm = constant->u.Immediate[swz];
+ baseimm = imm;
if (imm < 0.0)
baseimm = -baseimm;
@@ -473,6 +395,13 @@ static void constant_folding(struct radeon_compiler * c, struct rc_instruction *
constant_folding_mul(inst);
else if (inst->U.I.Opcode == RC_OPCODE_ADD)
constant_folding_add(inst);
+
+ /* In case this instruction has been converted, make sure all of the
+ * registers that are no longer used are empty. */
+ opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ for(i = opcode->NumSrcRegs; i < 3; i++) {
+ memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register));
+ }
}
/**
@@ -482,18 +411,10 @@ static void constant_folding(struct radeon_compiler * c, struct rc_instruction *
static unsigned int src_reads_dst_mask(struct rc_src_register src,
struct rc_dst_register dst)
{
- unsigned int mask = 0;
- unsigned int i;
if (dst.File != src.File || dst.Index != src.Index) {
return 0;
}
-
- for(i = 0; i < 4; i++) {
- mask |= 1 << GET_SWZ(src.Swizzle, i);
- }
- mask &= RC_MASK_XYZW;
-
- return mask;
+ return rc_swizzle_to_writemask(src.Swizzle);
}
/* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0)
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
index c73845512f..91524f5ec6 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
@@ -322,12 +322,13 @@ void rc_pair_regalloc_inputs_only(struct radeon_compiler *cc, void *user)
{
struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
struct regalloc_state s;
+ int temp_reg_offset;
compute_live_intervals(cc, &s);
c->AllocateHwInputs(c, &alloc_input, &s);
- int temp_reg_offset = 0;
+ temp_reg_offset = 0;
for (unsigned i = 0; i < RC_REGISTER_MAX_INDEX; i++) {
if (s.Input[i].Allocated && temp_reg_offset <= s.Input[i].Index)
temp_reg_offset = s.Input[i].Index + 1;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
index a33b2fde7b..d4a38607d9 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
@@ -237,11 +237,12 @@ static void commit_alu_instruction(struct schedule_state * s, struct schedule_in
static void emit_all_tex(struct schedule_state * s, struct rc_instruction * before)
{
struct schedule_instruction *readytex;
+ struct rc_instruction * inst_begin;
assert(s->ReadyTEX);
/* Node marker for R300 */
- struct rc_instruction * inst_begin = rc_insert_new_instruction(s->C, before->Prev);
+ inst_begin = rc_insert_new_instruction(s->C, before->Prev);
inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX;
/* Link texture instructions back in */
@@ -274,12 +275,113 @@ static void emit_all_tex(struct schedule_state * s, struct rc_instruction * befo
}
}
+/* This is a helper function for destructive_merge_instructions(). It helps
+ * merge presubtract sources from two instructions and makes sure the
+ * presubtract sources end up in the correct spot. This function assumes that
+ * dst_full is an rgb instruction, meaning that it has a vector instruction(rgb)
+ * but no scalar instruction (alpha).
+ * @return 0 if merging the presubtract sources fails.
+ * @retrun 1 if merging the presubtract sources succeeds.
+ */
+static int merge_presub_sources(
+ struct rc_pair_instruction * dst_full,
+ struct rc_pair_sub_instruction src,
+ unsigned int type)
+{
+ unsigned int srcp_src, srcp_regs, is_rgb, is_alpha;
+ struct rc_pair_sub_instruction * dst_sub;
+
+ assert(dst_full->Alpha.Opcode == RC_OPCODE_NOP);
+
+ switch(type) {
+ case RC_PAIR_SOURCE_RGB:
+ is_rgb = 1;
+ is_alpha = 0;
+ dst_sub = &dst_full->RGB;
+ break;
+ case RC_PAIR_SOURCE_ALPHA:
+ is_rgb = 0;
+ is_alpha = 1;
+ dst_sub = &dst_full->Alpha;
+ break;
+ default:
+ assert(0);
+ return 0;
+ }
+
+ const struct rc_opcode_info * info =
+ rc_get_opcode_info(dst_full->RGB.Opcode);
+ if (dst_sub->Src[RC_PAIR_PRESUB_SRC].Used)
+ return 0;
+
+ srcp_regs = rc_presubtract_src_reg_count(
+ src.Src[RC_PAIR_PRESUB_SRC].Index);
+ for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) {
+ unsigned int arg;
+ int free_source;
+ unsigned int one_way = 0;
+ struct rc_pair_instruction_source srcp = src.Src[srcp_src];
+ struct rc_pair_instruction_source temp;
+
+ free_source = rc_pair_alloc_source(dst_full, is_rgb, is_alpha,
+ srcp.File, srcp.Index);
+
+ /* If free_source < 0 then there are no free source
+ * slots. */
+ if (free_source < 0)
+ return 0;
+
+ temp = dst_sub->Src[srcp_src];
+ dst_sub->Src[srcp_src] = dst_sub->Src[free_source];
+
+ /* srcp needs src0 and src1 to be the same */
+ if (free_source < srcp_src) {
+ if (!temp.Used)
+ continue;
+ free_source = rc_pair_alloc_source(dst_full, is_rgb,
+ is_alpha, temp.File, temp.Index);
+ one_way = 1;
+ } else {
+ dst_sub->Src[free_source] = temp;
+ }
+
+ /* If free_source == srcp_src, then the presubtract
+ * source is already in the correct place. */
+ if (free_source == srcp_src)
+ continue;
+
+ /* Shuffle the sources, so we can put the
+ * presubtract source in the correct place. */
+ for(arg = 0; arg < info->NumSrcRegs; arg++) {
+ /*If this arg does not read from an rgb source,
+ * do nothing. */
+ if (!(rc_source_type_that_arg_reads(
+ dst_full->RGB.Arg[arg].Source,
+ dst_full->RGB.Arg[arg].Swizzle) & type)) {
+ continue;
+ }
+ if (dst_full->RGB.Arg[arg].Source == srcp_src)
+ dst_full->RGB.Arg[arg].Source = free_source;
+ /* We need to do this just in case register
+ * is one of the sources already, but in the
+ * wrong spot. */
+ else if(dst_full->RGB.Arg[arg].Source == free_source
+ && !one_way) {
+ dst_full->RGB.Arg[arg].Source = srcp_src;
+ }
+ }
+ }
+ return 1;
+}
+
+/* This function assumes that rgb.Alpha and alpha.RGB are unused */
static int destructive_merge_instructions(
struct rc_pair_instruction * rgb,
struct rc_pair_instruction * alpha)
{
const struct rc_opcode_info * opcode;
+
assert(rgb->Alpha.Opcode == RC_OPCODE_NOP);
assert(alpha->RGB.Opcode == RC_OPCODE_NOP);
@@ -288,130 +390,15 @@ static int destructive_merge_instructions(
* src1. */
/* Merge the rgb presubtract registers. */
- const struct rc_opcode_info * rgb_info =
- rc_get_opcode_info(rgb->RGB.Opcode);
if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
- unsigned int srcp_src;
- unsigned int srcp_regs;
- if (rgb->RGB.Src[RC_PAIR_PRESUB_SRC].Used)
+ if (!merge_presub_sources(rgb, alpha->RGB, RC_PAIR_SOURCE_RGB)) {
return 0;
- srcp_regs = rc_presubtract_src_reg_count(
- alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Index);
- for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) {
- unsigned int arg;
- int free_source;
- unsigned int one_way = 0;
- struct rc_pair_instruction_source srcp =
- alpha->RGB.Src[srcp_src];
- struct rc_pair_instruction_source temp;
- /* 2nd arg of 1 means this is an rgb source.
- * 3rd arg of 0 means this is not an alpha source. */
- free_source = rc_pair_alloc_source(rgb, 1, 0,
- srcp.File, srcp.Index);
- /* If free_source < 0 then there are no free source
- * slots. */
- if (free_source < 0)
- return 0;
-
- temp = rgb->RGB.Src[srcp_src];
- rgb->RGB.Src[srcp_src] = rgb->RGB.Src[free_source];
- /* srcp needs src0 and src1 to be the same */
- if (free_source < srcp_src) {
- if (!temp.Used)
- continue;
- free_source = rc_pair_alloc_source(rgb, 1, 0,
- srcp.File, srcp.Index);
- one_way = 1;
- } else {
- rgb->RGB.Src[free_source] = temp;
- }
- /* If free_source == srcp_src, then the presubtract
- * source is already in the correct place. */
- if (free_source == srcp_src)
- continue;
- /* Shuffle the sources, so we can put the
- * presubtract source in the correct place. */
- for (arg = 0; arg < rgb_info->NumSrcRegs; arg++) {
- /*If this arg does not read from an rgb source,
- * do nothing. */
- if (rc_source_type_that_arg_reads(
- rgb->RGB.Arg[arg].Source,
- rgb->RGB.Arg[arg].Swizzle, 3)
- != RC_PAIR_SOURCE_RGB) {
- continue;
- }
- if (rgb->RGB.Arg[arg].Source == srcp_src)
- rgb->RGB.Arg[arg].Source = free_source;
- /* We need to do this just in case register
- * is one of the sources already, but in the
- * wrong spot. */
- else if(rgb->RGB.Arg[arg].Source == free_source
- && !one_way) {
- rgb->RGB.Arg[arg].Source = srcp_src;
- }
- }
}
}
-
/* Merge the alpha presubtract registers */
if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
- unsigned int srcp_src;
- unsigned int srcp_regs;
- if(rgb->Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
+ if(!merge_presub_sources(rgb, alpha->Alpha, RC_PAIR_SOURCE_ALPHA)){
return 0;
-
- srcp_regs = rc_presubtract_src_reg_count(
- alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Index);
- for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) {
- unsigned int arg;
- int free_source;
- unsigned int one_way = 0;
- struct rc_pair_instruction_source srcp =
- alpha->Alpha.Src[srcp_src];
- struct rc_pair_instruction_source temp;
- /* 2nd arg of 0 means this is not an rgb source.
- * 3rd arg of 1 means this is an alpha source. */
- free_source = rc_pair_alloc_source(rgb, 0, 1,
- srcp.File, srcp.Index);
- /* If free_source < 0 then there are no free source
- * slots. */
- if (free_source < 0)
- return 0;
-
- temp = rgb->Alpha.Src[srcp_src];
- rgb->Alpha.Src[srcp_src] = rgb->Alpha.Src[free_source];
- /* srcp needs src0 and src1 to be the same. */
- if (free_source < srcp_src) {
- if (!temp.Used)
- continue;
- free_source = rc_pair_alloc_source(rgb, 0, 1,
- temp.File, temp.Index);
- one_way = 1;
- } else {
- rgb->Alpha.Src[free_source] = temp;
- }
- /* If free_source == srcp_src, then the presubtract
- * source is already in the correct place. */
- if (free_source == srcp_src)
- continue;
- /* Shuffle the sources, so we can put the
- * presubtract source in the correct place. */
- for(arg = 0; arg < rgb_info->NumSrcRegs; arg++) {
- /*If this arg does not read from an alpha
- * source, do nothing. */
- if (rc_source_type_that_arg_reads(
- rgb->RGB.Arg[arg].Source,
- rgb->RGB.Arg[arg].Swizzle, 3)
- != RC_PAIR_SOURCE_ALPHA) {
- continue;
- }
- if (rgb->RGB.Arg[arg].Source == srcp_src)
- rgb->RGB.Arg[arg].Source = free_source;
- else if (rgb->RGB.Arg[arg].Source == free_source
- && !one_way) {
- rgb->RGB.Arg[arg].Source = srcp_src;
- }
- }
}
}
@@ -424,6 +411,7 @@ static int destructive_merge_instructions(
unsigned int oldsrc = alpha->Alpha.Arg[arg].Source;
rc_register_file file = 0;
unsigned int index = 0;
+ int source;
if (alpha->Alpha.Arg[arg].Swizzle < 3) {
srcrgb = 1;
@@ -435,7 +423,7 @@ static int destructive_merge_instructions(
index = alpha->Alpha.Src[oldsrc].Index;
}
- int source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index);
+ source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index);
if (source < 0)
return 0;
@@ -475,6 +463,12 @@ static int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_i
{
struct rc_pair_instruction backup;
+ /*Instructions can't write output registers and ALU result at the
+ * same time. */
+ if ((rgb->WriteALUResult && alpha->Alpha.OutputWriteMask)
+ || (rgb->RGB.OutputWriteMask && alpha->WriteALUResult)) {
+ return 0;
+ }
memcpy(&backup, rgb, sizeof(struct rc_pair_instruction));
if (destructive_merge_instructions(rgb, alpha))
@@ -597,6 +591,7 @@ static void scan_read(void * data, struct rc_instruction * inst,
{
struct schedule_state * s = data;
struct reg_value * v = get_reg_value(s, file, index, chan);
+ struct reg_value_reader * reader;
if (!v)
return;
@@ -610,7 +605,7 @@ static void scan_read(void * data, struct rc_instruction * inst,
DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
- struct reg_value_reader * reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader));
+ reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader));
reader->Reader = s->Current;
reader->Next = v->Readers;
v->Readers = reader;
@@ -630,13 +625,14 @@ static void scan_write(void * data, struct rc_instruction * inst,
{
struct schedule_state * s = data;
struct reg_value ** pv = get_reg_valuep(s, file, index, chan);
+ struct reg_value * newv;
if (!pv)
return;
DBG("%i: write %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
- struct reg_value * newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv));
+ newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv));
memset(newv, 0, sizeof(*newv));
newv->Writer = s->Current;
@@ -659,12 +655,13 @@ static void schedule_block(struct r300_fragment_program_compiler * c,
struct rc_instruction * begin, struct rc_instruction * end)
{
struct schedule_state s;
+ unsigned int ip;
memset(&s, 0, sizeof(s));
s.C = &c->Base;
/* Scan instructions for data dependencies */
- unsigned int ip = 0;
+ ip = 0;
for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
s.Current = memory_pool_malloc(&c->Base.Pool, sizeof(*s.Current));
memset(s.Current, 0, sizeof(struct schedule_instruction));
@@ -716,12 +713,14 @@ void rc_pair_schedule(struct radeon_compiler *cc, void *user)
struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
struct rc_instruction * inst = c->Base.Program.Instructions.Next;
while(inst != &c->Base.Program.Instructions) {
+ struct rc_instruction * first;
+
if (is_controlflow(inst)) {
inst = inst->Next;
continue;
}
- struct rc_instruction * first = inst;
+ first = inst;
while(inst != &c->Base.Program.Instructions && !is_controlflow(inst))
inst = inst->Next;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
index ff82584466..c549be5218 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
@@ -148,9 +148,12 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c,
struct rc_pair_instruction * pair,
struct rc_sub_instruction * inst)
{
+ int needrgb, needalpha, istranscendent;
+ const struct rc_opcode_info * opcode;
+ int i;
+
memset(pair, 0, sizeof(struct rc_pair_instruction));
- int needrgb, needalpha, istranscendent;
classify_instruction(inst, &needrgb, &needalpha, &istranscendent);
if (needrgb) {
@@ -167,8 +170,7 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c,
pair->Alpha.Saturate = 1;
}
- const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
- int i;
+ opcode = rc_get_opcode_info(inst->Opcode);
/* Presubtract handling:
* We need to make sure that the values used by the presubtract
@@ -330,15 +332,18 @@ void rc_pair_translate(struct radeon_compiler *cc, void *user)
for(struct rc_instruction * inst = c->Base.Program.Instructions.Next;
inst != &c->Base.Program.Instructions;
inst = inst->Next) {
+ const struct rc_opcode_info * opcode;
+ struct rc_sub_instruction copy;
+
if (inst->Type != RC_INSTRUCTION_NORMAL)
continue;
- const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ opcode = rc_get_opcode_info(inst->U.I.Opcode);
if (opcode->HasTexture || opcode->IsFlowControl || opcode->Opcode == RC_OPCODE_KIL)
continue;
- struct rc_sub_instruction copy = inst->U.I;
+ copy = inst->U.I;
check_opcode_support(c, &copy);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
index 4d3e26f28c..39408845d5 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
@@ -874,13 +874,15 @@ int r300_transform_trig_simple(struct radeon_compiler* c,
struct rc_instruction* inst,
void* unused)
{
+ unsigned int constants[2];
+ unsigned int tempreg;
+
if (inst->U.I.Opcode != RC_OPCODE_COS &&
inst->U.I.Opcode != RC_OPCODE_SIN &&
inst->U.I.Opcode != RC_OPCODE_SCS)
return 0;
- unsigned int constants[2];
- unsigned int tempreg = rc_find_free_temporary(c);
+ tempreg = rc_find_free_temporary(c);
sincos_constants(c, constants);
@@ -918,6 +920,8 @@ int r300_transform_trig_simple(struct radeon_compiler* c,
swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)),
constants);
} else {
+ struct rc_dst_register dst;
+
emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY),
swizzle_xxxx(inst->U.I.SrcReg[0]),
swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])),
@@ -929,7 +933,7 @@ int r300_transform_trig_simple(struct radeon_compiler* c,
swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])),
negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0]))));
- struct rc_dst_register dst = inst->U.I.DstReg;
+ dst = inst->U.I.DstReg;
dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_X;
sin_approx(c, inst, dst,
@@ -988,16 +992,16 @@ int radeonTransformTrigScale(struct radeon_compiler* c,
struct rc_instruction* inst,
void* unused)
{
- if (inst->U.I.Opcode != RC_OPCODE_COS &&
- inst->U.I.Opcode != RC_OPCODE_SIN &&
- inst->U.I.Opcode != RC_OPCODE_SCS)
- return 0;
-
static const float RCP_2PI = 0.15915494309189535;
unsigned int temp;
unsigned int constant;
unsigned int constant_swizzle;
+ if (inst->U.I.Opcode != RC_OPCODE_COS &&
+ inst->U.I.Opcode != RC_OPCODE_SIN &&
+ inst->U.I.Opcode != RC_OPCODE_SCS)
+ return 0;
+
temp = rc_find_free_temporary(c);
constant = rc_constants_add_immediate_scalar(&c->Program.Constants, RCP_2PI, &constant_swizzle);
@@ -1020,6 +1024,10 @@ int r300_transform_trig_scale_vertex(struct radeon_compiler *c,
struct rc_instruction *inst,
void *unused)
{
+ static const float cons[4] = {0.15915494309189535, 0.5, 6.28318530717959, -3.14159265358979};
+ unsigned int temp;
+ unsigned int constant;
+
if (inst->U.I.Opcode != RC_OPCODE_COS &&
inst->U.I.Opcode != RC_OPCODE_SIN &&
inst->U.I.Opcode != RC_OPCODE_SCS)
@@ -1030,10 +1038,6 @@ int r300_transform_trig_scale_vertex(struct radeon_compiler *c,
* repeat(x) = frac(x / 2PI + 0.5) * 2PI - PI
*/
- static const float cons[4] = {0.15915494309189535, 0.5, 6.28318530717959, -3.14159265358979};
- unsigned int temp;
- unsigned int constant;
-
temp = rc_find_free_temporary(c);
constant = rc_constants_add_immediate_vec4(&c->Program.Constants, cons);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
index c31efdb059..a21fe8d3df 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
@@ -206,34 +206,22 @@ void rc_pair_foreach_source_that_rgb_reads(
/*return 0 for rgb, 1 for alpha -1 for error. */
-rc_pair_source_type rc_source_type_that_arg_reads(
+unsigned int rc_source_type_that_arg_reads(
unsigned int source,
- unsigned int swizzle,
- unsigned int channels)
+ unsigned int swizzle)
{
unsigned int chan;
unsigned int swz = RC_SWIZZLE_UNUSED;
- int isRGB = 0;
- int isAlpha = 0;
- /* Find a swizzle that is either X,Y,Z,or W. We assume here
- * that if one channel swizzles X,Y, or Z, then none of the
- * other channels swizzle W, and vice-versa. */
- for(chan = 0; chan < channels; chan++) {
+ unsigned int ret = RC_PAIR_SOURCE_NONE;
+
+ for(chan = 0; chan < 3; chan++) {
swz = GET_SWZ(swizzle, chan);
if (swz == RC_SWIZZLE_W) {
- isAlpha = 1;
+ ret |= RC_PAIR_SOURCE_ALPHA;
} else if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y
|| swz == RC_SWIZZLE_Z) {
- isRGB = 1;
+ ret |= RC_PAIR_SOURCE_RGB;
}
}
- assert(!isRGB || !isAlpha);
-
- if(!isRGB && !isAlpha)
- return RC_PAIR_SOURCE_NONE;
-
- if (isRGB)
- return RC_PAIR_SOURCE_RGB;
- /*isAlpha*/
- return RC_PAIR_SOURCE_ALPHA;
+ return ret;
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
index 01cdb15424..54d44a2098 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
@@ -55,6 +55,10 @@ struct radeon_compiler;
*/
#define RC_PAIR_PRESUB_SRC 3
+#define RC_PAIR_SOURCE_NONE 0x0
+#define RC_PAIR_SOURCE_RGB 0x1
+#define RC_PAIR_SOURCE_ALPHA 0x2
+
struct rc_pair_instruction_source {
unsigned int Used:1;
unsigned int File:3;
@@ -93,12 +97,6 @@ struct rc_pair_instruction {
typedef void (*rc_pair_foreach_src_fn)
(void *, struct rc_pair_instruction_source *);
-typedef enum {
- RC_PAIR_SOURCE_NONE = 0,
- RC_PAIR_SOURCE_RGB,
- RC_PAIR_SOURCE_ALPHA
-} rc_pair_source_type;
-
/**
* General helper functions for dealing with the paired instruction format.
*/
@@ -117,10 +115,9 @@ void rc_pair_foreach_source_that_rgb_reads(
void * data,
rc_pair_foreach_src_fn cb);
-rc_pair_source_type rc_source_type_that_arg_reads(
+unsigned int rc_source_type_that_arg_reads(
unsigned int source,
- unsigned int swizzle,
- unsigned int channels);
+ unsigned int swizzle);
/*@}*/
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c b/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c
index c59161640c..5f67f536f6 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c
@@ -68,6 +68,7 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user)
unsigned are_externals_remapped = 0;
struct rc_constant *constants = c->Program.Constants.Constants;
struct mark_used_data d;
+ unsigned new_count;
if (!c->Program.Constants.Count) {
*out_remap_table = NULL;
@@ -97,7 +98,7 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user)
* This pass removes unused constants simply by overwriting them by other constants. */
remap_table = malloc(c->Program.Constants.Count * sizeof(unsigned));
inv_remap_table = malloc(c->Program.Constants.Count * sizeof(unsigned));
- unsigned new_count = 0;
+ new_count = 0;
for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
if (const_used[i]) {
@@ -144,6 +145,6 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user)
free(const_used);
free(inv_remap_table);
- if (c->Debug)
+ if (c->Debug & RC_DBG_LOG)
rc_constants_print(&c->Program.Constants);
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c
index 31d25f9ab8..60e228be5b 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c
@@ -104,13 +104,14 @@ void rc_rename_regs(struct radeon_compiler *c, void *user)
inst != &c->Program.Instructions;
inst = inst->Next) {
const struct rc_opcode_info * info;
+ unsigned int old_index, temp_index;
+ struct rc_dst_register * dst;
if(inst->Type != RC_INSTRUCTION_NORMAL) {
rc_error(c, "%s only works with normal instructions.",
__FUNCTION__);
return;
}
- unsigned int old_index, temp_index;
- struct rc_dst_register * dst = &inst->U.I.DstReg;
+ dst = &inst->U.I.DstReg;
info = rc_get_opcode_info(inst->U.I.Opcode);
if(!info->HasDstReg || dst->File != RC_FILE_TEMPORARY) {
continue;