diff options
Diffstat (limited to 'src/mesa/drivers/dri/r300/compiler')
26 files changed, 732 insertions, 363 deletions
diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile index d0eb170784..51b896ae91 100644 --- a/src/mesa/drivers/dri/r300/compiler/Makefile +++ b/src/mesa/drivers/dri/r300/compiler/Makefile @@ -8,6 +8,7 @@ LIBNAME = r300compiler C_SOURCES = \ radeon_code.c \ radeon_compiler.c \ + radeon_compiler_util.c \ radeon_emulate_branches.c \ radeon_emulate_loops.c \ radeon_program.c \ diff --git a/src/mesa/drivers/dri/r300/compiler/SConscript b/src/mesa/drivers/dri/r300/compiler/SConscript index 847857b142..2b4bce1c08 100755 --- a/src/mesa/drivers/dri/r300/compiler/SConscript +++ b/src/mesa/drivers/dri/r300/compiler/SConscript @@ -12,6 +12,7 @@ r300compiler = env.ConvenienceLibrary( source = [ 'radeon_code.c', 'radeon_compiler.c', + 'radeon_compiler_util.c', 'radeon_program.c', 'radeon_program_print.c', 'radeon_opcodes.c', diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c index 4f13e51bcc..8be32ea91f 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c @@ -133,6 +133,8 @@ static unsigned int translate_alpha_opcode(struct r300_fragment_program_compiler */ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* inst) { + int ip; + int j; PROG_CODE; if (code->alu.length >= c->Base.max_alu_insts) { @@ -140,20 +142,20 @@ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* i return 0; } - int ip = code->alu.length++; - int j; + ip = code->alu.length++; code->alu.inst[ip].rgb_inst = translate_rgb_opcode(c, inst->RGB.Opcode); code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode); for(j = 0; j < 3; ++j) { unsigned int src = use_source(code, inst->RGB.Src[j]); + unsigned int arg; code->alu.inst[ip].rgb_addr |= src << (6*j); src = use_source(code, inst->Alpha.Src[j]); code->alu.inst[ip].alpha_addr |= src << (6*j); - unsigned int arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle); + arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle); arg |= inst->RGB.Arg[j].Abs << 6; arg |= inst->RGB.Arg[j].Negate << 5; code->alu.inst[ip].rgb_inst |= arg << (7*j); @@ -259,6 +261,10 @@ static int finish_node(struct r300_emit_state * emit) { struct r300_fragment_program_compiler * c = emit->compiler; struct r300_fragment_program_code *code = &emit->compiler->code->code.r300; + unsigned alu_offset; + unsigned alu_end; + unsigned tex_offset; + unsigned tex_end; if (code->alu.length == emit->node_first_alu) { /* Generate a single NOP for this node */ @@ -268,10 +274,10 @@ static int finish_node(struct r300_emit_state * emit) return 0; } - unsigned alu_offset = emit->node_first_alu; - unsigned alu_end = code->alu.length - alu_offset - 1; - unsigned tex_offset = emit->node_first_tex; - unsigned tex_end = code->tex.length - tex_offset - 1; + alu_offset = emit->node_first_alu; + alu_end = code->alu.length - alu_offset - 1; + tex_offset = emit->node_first_tex; + tex_end = code->tex.length - tex_offset - 1; if (code->tex.length == emit->node_first_tex) { if (emit->current_node > 0) { @@ -334,6 +340,9 @@ static int begin_tex(struct r300_emit_state * emit) static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst) { + unsigned int unit; + unsigned int dest; + unsigned int opcode; PROG_CODE; if (code->tex.length >= R300_PFS_MAX_TEX_INST) { @@ -341,9 +350,8 @@ static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst) return 0; } - unsigned int unit = inst->U.I.TexSrcUnit; - unsigned int dest = inst->U.I.DstReg.Index; - unsigned int opcode; + unit = inst->U.I.TexSrcUnit; + dest = inst->U.I.DstReg.Index; switch(inst->U.I.Opcode) { case RC_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break; diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c index caa48fe478..2d28b06539 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c @@ -94,6 +94,9 @@ static const struct swizzle_data* lookup_native_swizzle(unsigned int swizzle) */ static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) { + unsigned int relevant; + int j; + if (reg.Abs) reg.Negate = RC_MASK_NONE; @@ -101,8 +104,6 @@ static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) opcode == RC_OPCODE_TEX || opcode == RC_OPCODE_TXB || opcode == RC_OPCODE_TXP) { - int j; - if (reg.Abs || reg.Negate) return 0; @@ -117,8 +118,7 @@ static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) return 1; } - unsigned int relevant = 0; - int j; + relevant = 0; for(j = 0; j < 3; ++j) if (GET_SWZ(reg.Swizzle, j) != RC_SWIZZLE_UNUSED) @@ -154,9 +154,10 @@ static void r300_swizzle_split( unsigned int matchcount = 0; unsigned int matchmask = 0; for(comp = 0; comp < 3; ++comp) { + unsigned int swz; if (!GET_BIT(mask, comp)) continue; - unsigned int swz = GET_SWZ(src.Swizzle, comp); + swz = GET_SWZ(src.Swizzle, comp); if (swz == RC_SWIZZLE_UNUSED) continue; if (swz == GET_SWZ(sd->hash, comp)) { diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index 4793f33577..2f130198d3 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -145,8 +145,8 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) {"final code validation", 0, 1, rc_validate_final_shader, NULL}, {"machine code generation", 0, is_r500, r500BuildFragmentProgramHwCode, NULL}, {"machine code generation", 0, !is_r500, r300BuildFragmentProgramHwCode, NULL}, - {"dump machine code", 0, is_r500 && c->Base.Debug, r500FragmentProgramDump, NULL}, - {"dump machine code", 0, !is_r500 && c->Base.Debug, r300FragmentProgramDump, NULL}, + {"dump machine code", 0, is_r500 && (c->Base.Debug & RC_DBG_LOG), r500FragmentProgramDump, NULL}, + {"dump machine code", 0, !is_r500 && (c->Base.Debug & RC_DBG_LOG), r300FragmentProgramDump, NULL}, {NULL, 0, 0, NULL, NULL} }; diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index 08785716db..bf8341f017 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -392,9 +392,9 @@ static void ei_if(struct r300_vertex_program_compiler * compiler, * don't already have one. */ if (!compiler->PredicateMask) { unsigned int writemasks[RC_REGISTER_MAX_INDEX]; - memset(writemasks, 0, sizeof(writemasks)); struct rc_instruction * inst; unsigned int i; + memset(writemasks, 0, sizeof(writemasks)); for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { @@ -1067,7 +1067,7 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c) {"dead constants", 1, kill_consts, rc_remove_unused_constants, &c->code->constants_remap_table}, {"final code validation", 0, 1, rc_validate_final_shader, NULL}, {"machine code generation", 0, 1, translate_vertex_program, NULL}, - {"dump machine code", 0,c->Base.Debug,r300_vertex_program_dump, NULL}, + {"dump machine code", 0, c->Base.Debug & RC_DBG_LOG, r300_vertex_program_dump, NULL}, {NULL, 0, 0, NULL, NULL} }; diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c index 627ce374ef..289bb87ae5 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c @@ -39,10 +39,12 @@ int r500_transform_IF( struct rc_instruction * inst, void* data) { + struct rc_instruction * inst_mov; + if (inst->U.I.Opcode != RC_OPCODE_IF) return 0; - struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); + inst_mov = rc_insert_new_instruction(c, inst->Prev); inst_mov->U.I.Opcode = RC_OPCODE_MOV; inst_mov->U.I.DstReg.WriteMask = 0; inst_mov->U.I.WriteALUResult = RC_ALURESULT_W; @@ -251,12 +253,11 @@ void r500FragmentProgramDump(struct radeon_compiler *c, void *user) { struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c; struct r500_fragment_program_code *code = &compiler->code->code.r500; - fprintf(stderr, "R500 Fragment Program:\n--------\n"); - int n, i; uint32_t inst; uint32_t inst0; char *str = NULL; + fprintf(stderr, "R500 Fragment Program:\n--------\n"); for (n = 0; n < code->inst_end+1; n++) { inst0 = inst = code->inst[n].inst0; diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c index bad1684696..6f101c68eb 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c @@ -227,6 +227,7 @@ static void alu_nop(struct r300_fragment_program_compiler *c, int ip) */ static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst) { + int ip; PROG_CODE; if (code->inst_end >= c->Base.max_alu_insts-1) { @@ -234,7 +235,7 @@ static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair return; } - int ip = ++code->inst_end; + ip = ++code->inst_end; /* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */ if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX || @@ -250,7 +251,7 @@ static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) { code->inst[ip].inst0 = R500_INST_TYPE_OUT; if (inst->WriteALUResult) { - error("%s: cannot write output and ALU result at the same time"); + error("Cannot write output and ALU result at the same time"); return; } } else { @@ -357,6 +358,7 @@ static unsigned int translate_strq_swizzle(unsigned int swizzle) */ static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst) { + int ip; PROG_CODE; if (code->inst_end >= c->Base.max_alu_insts-1) { @@ -364,7 +366,7 @@ static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_inst return 0; } - int ip = ++code->inst_end; + ip = ++code->inst_end; code->inst[ip].inst0 = R500_INST_TYPE_TEX | (inst->DstReg.WriteMask << 11) @@ -407,12 +409,14 @@ static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_inst static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst) { + unsigned int newip; + if (s->Code->inst_end >= s->C->max_alu_insts-1) { rc_error(s->C, "emit_tex: Too many instructions"); return; } - unsigned int newip = ++s->Code->inst_end; + newip = ++s->Code->inst_end; /* Currently all loops use the same integer constant to intialize * the loop variables. */ @@ -623,6 +627,8 @@ void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user) if (code->inst_end == -1 || (code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) { + int ip; + /* This may happen when dead-code elimination is disabled or * when most of the fragment program logic is leading to a KIL */ if (code->inst_end >= compiler->Base.max_alu_insts-1) { @@ -630,7 +636,7 @@ void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user) return; } - int ip = ++code->inst_end; + ip = ++code->inst_end; code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT; } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c index b410b2daf4..4286baed0c 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c @@ -26,7 +26,9 @@ #include <stdio.h> #include <stdlib.h> +#include "radeon_dataflow.h" #include "radeon_program.h" +#include "radeon_program_pair.h" void rc_init(struct radeon_compiler * c) @@ -50,7 +52,7 @@ void rc_debug(struct radeon_compiler * c, const char * fmt, ...) { va_list ap; - if (!c->Debug) + if (!(c->Debug & RC_DBG_LOG)) return; va_start(ap, fmt); @@ -84,7 +86,7 @@ void rc_error(struct radeon_compiler * c, const char * fmt, ...) } } - if (c->Debug) { + if (c->Debug & RC_DBG_LOG) { fprintf(stderr, "r300compiler error: "); va_start(ap, fmt); @@ -351,11 +353,65 @@ void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face) } } +static void reg_count_callback(void * userdata, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + unsigned int * max_reg = userdata; + if (file == RC_FILE_TEMPORARY) + index > *max_reg ? *max_reg = index : 0; +} + +static void print_stats(struct radeon_compiler * c) +{ + struct rc_instruction * tmp; + unsigned max_reg, insts, fc, tex, alpha, rgb, presub; + max_reg = insts = fc = tex = alpha = rgb = presub = 0; + for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions; + tmp = tmp->Next){ + const struct rc_opcode_info * info; + rc_for_all_reads_mask(tmp, reg_count_callback, &max_reg); + if (tmp->Type == RC_INSTRUCTION_NORMAL) { + if (tmp->U.I.PreSub.Opcode != RC_PRESUB_NONE) + presub++; + info = rc_get_opcode_info(tmp->U.I.Opcode); + } else { + if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) + presub++; + if (tmp->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used) + presub++; + /* Assuming alpha will never be a flow control or + * a tex instruction. */ + if (tmp->U.P.Alpha.Opcode != RC_OPCODE_NOP) + alpha++; + if (tmp->U.P.RGB.Opcode != RC_OPCODE_NOP) + rgb++; + info = rc_get_opcode_info(tmp->U.P.RGB.Opcode); + } + if (info->IsFlowControl) + fc++; + if (info->HasTexture) + tex++; + insts++; + } + if (insts < 4) + return; + fprintf(stderr,"~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + "~%4u Instructions\n" + "~%4u Vector Instructions (RGB)\n" + "~%4u Scalar Instructions (Alpha)\n" + "~%4u Flow Control Instructions\n" + "~%4u Texture Instructions\n" + "~%4u Presub Operations\n" + "~%4u Temporary Registers\n" + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + insts, rgb, alpha, fc, tex, presub, max_reg + 1); +} + /* Executes a list of compiler passes given in the parameter 'list'. */ void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list, const char *shader_name) { - if (c->Debug) { + if (c->Debug & RC_DBG_LOG) { fprintf(stderr, "%s: before compilation\n", shader_name); rc_print_program(&c->Program); } @@ -367,12 +423,14 @@ void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *lis if (c->Error) return; - if (c->Debug && list[i].dump) { + if ((c->Debug & RC_DBG_LOG) && list[i].dump) { fprintf(stderr, "%s: after '%s'\n", shader_name, list[i].name); rc_print_program(&c->Program); } } } + if (c->Debug & RC_DBG_STATS) + print_stats(c); } void rc_validate_final_shader(struct radeon_compiler *c, void *user) diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h index 6d96ac9fdd..31fd469a04 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h @@ -30,12 +30,15 @@ #include "radeon_program.h" #include "radeon_emulate_loops.h" +#define RC_DBG_LOG (1 << 0) +#define RC_DBG_STATS (1 << 1) + struct rc_swizzle_caps; struct radeon_compiler { struct memory_pool Pool; struct rc_program Program; - unsigned Debug:1; + unsigned Debug:2; unsigned Error:1; char * ErrorMsg; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c new file mode 100644 index 0000000000..97f4c75849 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c @@ -0,0 +1,61 @@ +/* + * Copyright 2010 Tom Stellard <tstellar@gmail.com> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + */ + +#include "radeon_compiler_util.h" + +/** + */ +unsigned int rc_swizzle_to_writemask(unsigned int swz) +{ + unsigned int mask = 0; + unsigned int i; + + for(i = 0; i < 4; i++) { + mask |= 1 << GET_SWZ(swz, i); + } + mask &= RC_MASK_XYZW; + + return mask; +} + +unsigned int rc_src_reads_dst_mask( + rc_register_file src_file, + unsigned int src_idx, + unsigned int src_swz, + rc_register_file dst_file, + unsigned int dst_idx, + unsigned int dst_mask) +{ + if (src_file != dst_file || src_idx != dst_idx) { + return RC_MASK_NONE; + } + return dst_mask & rc_swizzle_to_writemask(src_swz); +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h new file mode 100644 index 0000000000..1a14e7cb0e --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h @@ -0,0 +1,16 @@ +#include "radeon_program_constants.h" + +#ifndef RADEON_PROGRAM_UTIL_H +#define RADEON_PROGRAM_UTIL_H + +unsigned int rc_swizzle_to_writemask(unsigned int swz); + +unsigned int rc_src_reads_dst_mask( + rc_register_file src_file, + unsigned int src_idx, + unsigned int src_swz, + rc_register_file dst_file, + unsigned int dst_idx, + unsigned int dst_mask); + +#endif /* RADEON_PROGRAM_UTIL_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c index a27d395587..5927498818 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c @@ -1,5 +1,6 @@ /* * Copyright (C) 2009 Nicolai Haehnle. + * Copyright 2010 Tom Stellard <tstellar@gmail.com> * * All Rights Reserved. * @@ -27,6 +28,8 @@ #include "radeon_dataflow.h" +#include "radeon_compiler.h" +#include "radeon_compiler_util.h" #include "radeon_program.h" struct read_write_mask_data { @@ -402,3 +405,252 @@ void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, v else remap_pair_instruction(inst, cb, userdata); } + +/** + * @return RC_OPCODE_NOOP if inst is not a flow control instruction. + * @return The opcode of inst if it is a flow control instruction. + */ +static rc_opcode get_flow_control_inst(struct rc_instruction * inst) +{ + const struct rc_opcode_info * info; + if (inst->Type == RC_INSTRUCTION_NORMAL) { + info = rc_get_opcode_info(inst->U.I.Opcode); + } else { + info = rc_get_opcode_info(inst->U.P.RGB.Opcode); + /*A flow control instruction shouldn't have an alpha + * instruction.*/ + assert(!info->IsFlowControl || + inst->U.P.Alpha.Opcode == RC_OPCODE_NOP); + } + + if (info->IsFlowControl) + return info->Opcode; + else + return RC_OPCODE_NOP; + +} + +struct get_readers_callback_data { + struct radeon_compiler * C; + struct rc_reader_data * ReaderData; + rc_read_src_fn ReadCB; + rc_read_write_mask_fn WriteCB; + unsigned int AliveWriteMask; +}; + +static void add_reader( + struct memory_pool * pool, + struct rc_reader_data * data, + struct rc_instruction * inst, + unsigned int mask, + struct rc_src_register * src) +{ + struct rc_reader * new; + memory_pool_array_reserve(pool, struct rc_reader, data->Readers, + data->ReaderCount, data->ReadersReserved, 1); + new = &data->Readers[data->ReaderCount++]; + new->Inst = inst; + new->WriteMask = mask; + new->Src = src; +} + +/** + * This function is used by rc_get_readers_normal() to determine whether inst + * is a reader of userdata->ReaderData->Writer + */ +static void get_readers_normal_read_callback( + void * userdata, + struct rc_instruction * inst, + struct rc_src_register * src) +{ + struct get_readers_callback_data * d = userdata; + unsigned int read_mask; + + if (src->RelAddr) + d->ReaderData->Abort = 1; + + unsigned int shared_mask = rc_src_reads_dst_mask(src->File, src->Index, + src->Swizzle, + d->ReaderData->Writer->U.I.DstReg.File, + d->ReaderData->Writer->U.I.DstReg.Index, + d->AliveWriteMask); + + if (shared_mask == RC_MASK_NONE) + return; + + /* If we make it this far, it means that this source reads from the + * same register written to by d->ReaderData->Writer. */ + + if (d->ReaderData->AbortOnRead) { + d->ReaderData->Abort = 1; + return; + } + + read_mask = rc_swizzle_to_writemask(src->Swizzle); + /* XXX The behavior in this case should be configurable. */ + if ((read_mask & d->AliveWriteMask) != read_mask) { + d->ReaderData->Abort = 1; + return; + } + + d->ReadCB(d->ReaderData, inst, src); + if (d->ReaderData->Abort) + return; + + add_reader(&d->C->Pool, d->ReaderData, inst, shared_mask, src); +} + +/** + * This function is used by rc_get_readers_normal() to determine when + * userdata->ReaderData->Writer is dead (i. e. All compontents of its + * destination register have been overwritten by other instructions). + */ +static void get_readers_write_callback( + void *userdata, + struct rc_instruction * inst, + rc_register_file file, + unsigned int index, + unsigned int mask) +{ + struct get_readers_callback_data * d = userdata; + + if (index == d->ReaderData->Writer->U.I.DstReg.Index + && file == d->ReaderData->Writer->U.I.DstReg.File) { + unsigned int shared_mask = mask + & d->ReaderData->Writer->U.I.DstReg.WriteMask; + if (d->ReaderData->InElse) { + if (shared_mask & d->AliveWriteMask) { + /* We set AbortOnRead here because the + * destination register of d->ReaderData->Writer + * is written to in both the IF and the + * ELSE block of this IF/ELSE statement. + * This means that readers of this + * destination register that follow this IF/ELSE + * statement use the value of different + * instructions depending on the control flow + * decisions made by the program. */ + d->ReaderData->AbortOnRead = 1; + } + } else { + d->AliveWriteMask &= ~shared_mask; + } + } + + d->WriteCB(d->ReaderData, inst, file, index, mask); +} + +/** + * This function will create a list of readers via the rc_reader_data struct. + * This function will abort (set the flag data->Abort) and return if it + * encounters an instruction that reads from @param writer and also a different + * instruction. Here are some examples: + * + * writer = instruction 0; + * 0 MOV TEMP[0].xy, TEMP[1].xy + * 1 MOV TEMP[0].zw, TEMP[2].xy + * 2 MOV TEMP[3], TEMP[0] + * The Abort flag will be set on instruction 2, because it reads values written + * by instructions 0 and 1. + * + * writer = instruction 1; + * 0 IF TEMP[0].x + * 1 MOV TEMP[1], TEMP[2] + * 2 ELSE + * 3 MOV TEMP[1], TEMP[2] + * 4 ENDIF + * 5 MOV TEMP[3], TEMP[1] + * The Abort flag will be set on instruction 5, because it could read from the + * value written by either instruction 1 or 3, depending on the jump decision + * made at instruction 0. + * + * writer = instruction 0; + * 0 MOV TEMP[0], TEMP[1] + * 2 BGNLOOP + * 3 ADD TEMP[0], TEMP[0], none.1 + * 4 ENDLOOP + * The Abort flag will be set on instruction 3, because in the first iteration + * of the loop it reads the value written by instruction 0 and in all other + * iterations it reads the value written by instruction 3. + * + * @param read_cb This function will be called for for every instruction that + * has been determined to be a reader of writer. + * @param write_cb This function will be called for every instruction after + * writer. + */ +void rc_get_readers_normal( + struct radeon_compiler * c, + struct rc_instruction * writer, + struct rc_reader_data * data, + rc_read_src_fn read_cb, + rc_read_write_mask_fn write_cb) +{ + struct rc_instruction * tmp; + struct get_readers_callback_data d; + unsigned int branch_depth = 0; + + data->Writer = writer; + data->Abort = 0; + data->AbortOnRead = 0; + data->InElse = 0; + data->ReaderCount = 0; + data->ReadersReserved = 0; + data->Readers = NULL; + + d.C = c; + d.AliveWriteMask = writer->U.I.DstReg.WriteMask; + d.ReaderData = data; + d.ReadCB = read_cb; + d.WriteCB = write_cb; + + if (!writer->U.I.DstReg.WriteMask) + return; + + for(tmp = writer->Next; tmp != &c->Program.Instructions; + tmp = tmp->Next){ + rc_opcode opcode = get_flow_control_inst(tmp); + switch(opcode) { + case RC_OPCODE_BGNLOOP: + /* XXX We can do better when we see a BGNLOOP if we + * add a flag called AbortOnWrite to struct + * rc_reader_data and leave it set until the next + * ENDLOOP. */ + case RC_OPCODE_ENDLOOP: + /* XXX We can do better when we see an ENDLOOP by + * searching backwards from writer and looking for + * readers of writer's destination index. If we find a + * reader before we get to the BGNLOOP, we must abort + * unless there is another writer between that reader + * and the BGNLOOP. */ + data->Abort = 1; + return; + case RC_OPCODE_IF: + branch_depth++; + break; + case RC_OPCODE_ELSE: + if (branch_depth == 0) + data->InElse = 1; + break; + case RC_OPCODE_ENDIF: + if (branch_depth == 0) { + data->AbortOnRead = 1; + data->InElse = 0; + } + else { + branch_depth--; + } + break; + default: + break; + } + + if (!data->InElse) + rc_for_all_reads_src(tmp, get_readers_normal_read_callback, &d); + rc_for_all_writes_mask(tmp, get_readers_write_callback, &d); + + if (data->Abort) + return; + + if (!d.AliveWriteMask) + return; + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h index d10ae3c7b7..7de6b98f76 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h @@ -1,5 +1,6 @@ /* * Copyright (C) 2009 Nicolai Haehnle. + * Copyright 2010 Tom Stellard <tstellar@gmail.com> * * All Rights Reserved. * @@ -35,6 +36,7 @@ struct rc_instruction; struct rc_swizzle_caps; struct rc_src_register; struct rc_pair_instruction_arg; +struct rc_compiler; /** @@ -66,6 +68,32 @@ typedef void (*rc_remap_register_fn)(void * userdata, struct rc_instruction * in void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, void * userdata); /*@}*/ +struct rc_reader { + struct rc_instruction * Inst; + unsigned int WriteMask; + struct rc_src_register * Src; +}; + +struct rc_reader_data { + unsigned int Abort; + unsigned int AbortOnRead; + unsigned int InElse; + struct rc_instruction * Writer; + + unsigned int ReaderCount; + unsigned int ReadersReserved; + struct rc_reader * Readers; + + void * CbData; +}; + +void rc_get_readers_normal( + struct radeon_compiler * c, + struct rc_instruction * inst, + struct rc_reader_data * data, + /*XXX: These should be their own function types. */ + rc_read_src_fn read_cb, + rc_read_write_mask_fn write_cb); /** * Compiler passes based on dataflow analysis. diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c index 9d17b4772a..87906f37b1 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c @@ -106,10 +106,12 @@ static void push_loop(struct deadcode_state * s) static void push_branch(struct deadcode_state * s) { + struct branchinfo * branch; + memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack, s->BranchStackSize, s->BranchStackReserved, 1); - struct branchinfo * branch = &s->BranchStack[s->BranchStackSize++]; + branch = &s->BranchStack[s->BranchStackSize++]; branch->HaveElse = 0; memcpy(&branch->StoreEndif, &s->R, sizeof(s->R)); } @@ -152,6 +154,7 @@ static void update_instruction(struct deadcode_state * s, struct rc_instruction const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); struct instruction_state * insts = &s->Instructions[inst->IP]; unsigned int usedmask = 0; + unsigned int srcmasks[3]; if (opcode->HasDstReg) { unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index); @@ -180,7 +183,6 @@ static void update_instruction(struct deadcode_state * s, struct rc_instruction } } - unsigned int srcmasks[3]; rc_compute_sources_for_writemask(inst, usedmask, srcmasks); for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { @@ -219,6 +221,7 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, void *user) unsigned int nr_instructions; unsigned has_temp_reladdr_src = 0; rc_dataflow_mark_outputs_fn dce = (rc_dataflow_mark_outputs_fn)user; + unsigned int ip; /* Give up if there is relative addressing of destination operands. */ for(struct rc_instruction * inst = c->Program.Instructions.Next; @@ -349,12 +352,14 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, void *user) update_instruction(&s, inst); } - unsigned int ip = 0; + ip = 0; for(struct rc_instruction * inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next, ++ip) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); int dead = 1; + unsigned int srcmasks[3]; + unsigned int usemask; if (!opcode->HasDstReg) { dead = 0; @@ -376,8 +381,7 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, void *user) continue; } - unsigned int srcmasks[3]; - unsigned int usemask = s.Instructions[ip].WriteMask; + usemask = s.Instructions[ip].WriteMask; if (inst->U.I.WriteALUResult == RC_ALURESULT_X) usemask |= RC_MASK_X; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c index 222e5b7e02..7bede344f3 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c @@ -57,18 +57,21 @@ struct emulate_branch_state { static void handle_if(struct emulate_branch_state * s, struct rc_instruction * inst) { + struct branch_info * branch; + struct rc_instruction * inst_mov; + memory_pool_array_reserve(&s->C->Pool, struct branch_info, s->Branches, s->BranchCount, s->BranchReserved, 1); DBG("%s\n", __FUNCTION__); - struct branch_info * branch = &s->Branches[s->BranchCount++]; + branch = &s->Branches[s->BranchCount++]; memset(branch, 0, sizeof(struct branch_info)); branch->If = inst; /* Make a safety copy of the decision register, because we will need * it at ENDIF time and it might be overwritten in both branches. */ - struct rc_instruction * inst_mov = rc_insert_new_instruction(s->C, inst->Prev); + inst_mov = rc_insert_new_instruction(s->C, inst->Prev); inst_mov->U.I.Opcode = RC_OPCODE_MOV; inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mov->U.I.DstReg.Index = rc_find_free_temporary(s->C); @@ -84,6 +87,8 @@ static void handle_if(struct emulate_branch_state * s, struct rc_instruction * i static void handle_else(struct emulate_branch_state * s, struct rc_instruction * inst) { + struct branch_info * branch; + if (!s->BranchCount) { rc_error(s->C, "Encountered ELSE outside of branches"); return; @@ -91,7 +96,7 @@ static void handle_else(struct emulate_branch_state * s, struct rc_instruction * DBG("%s\n", __FUNCTION__); - struct branch_info * branch = &s->Branches[s->BranchCount - 1]; + branch = &s->Branches[s->BranchCount - 1]; branch->Else = inst; } @@ -191,6 +196,10 @@ static void inject_cmp(struct emulate_branch_state * s, static void handle_endif(struct emulate_branch_state * s, struct rc_instruction * inst) { + struct branch_info * branch; + struct register_proxies IfProxies; + struct register_proxies ElseProxies; + if (!s->BranchCount) { rc_error(s->C, "Encountered ENDIF outside of branches"); return; @@ -198,9 +207,7 @@ static void handle_endif(struct emulate_branch_state * s, struct rc_instruction DBG("%s\n", __FUNCTION__); - struct branch_info * branch = &s->Branches[s->BranchCount - 1]; - struct register_proxies IfProxies; - struct register_proxies ElseProxies; + branch = &s->Branches[s->BranchCount - 1]; memset(&IfProxies, 0, sizeof(IfProxies)); memset(&ElseProxies, 0, sizeof(ElseProxies)); @@ -261,16 +268,19 @@ static void remap_output_function(void * userdata, struct rc_instruction * inst, */ static void fix_output_writes(struct emulate_branch_state * s, struct rc_instruction * inst) { + const struct rc_opcode_info * opcode; + if (!s->BranchCount) return; - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + opcode = rc_get_opcode_info(inst->U.I.Opcode); if (!opcode->HasDstReg) return; if (inst->U.I.DstReg.File == RC_FILE_OUTPUT) { struct remap_output_data remap; + struct rc_instruction * inst_mov; remap.Output = inst->U.I.DstReg.Index; remap.Temporary = rc_find_free_temporary(s->C); @@ -281,7 +291,7 @@ static void fix_output_writes(struct emulate_branch_state * s, struct rc_instruc rc_remap_registers(inst, &remap_output_function, &remap); } - struct rc_instruction * inst_mov = rc_insert_new_instruction(s->C, s->C->Program.Instructions.Prev); + inst_mov = rc_insert_new_instruction(s->C, s->C->Program.Instructions.Prev); inst_mov->U.I.Opcode = RC_OPCODE_MOV; inst_mov->U.I.DstReg.File = RC_FILE_OUTPUT; inst_mov->U.I.DstReg.Index = remap.Output; @@ -299,12 +309,13 @@ static void fix_output_writes(struct emulate_branch_state * s, struct rc_instruc void rc_emulate_branches(struct radeon_compiler *c, void *user) { struct emulate_branch_state s; + struct rc_instruction * ptr; memset(&s, 0, sizeof(s)); s.C = c; /* Untypical loop because we may remove the current instruction */ - struct rc_instruction * ptr = c->Program.Instructions.Next; + ptr = c->Program.Instructions.Next; while(ptr != &c->Program.Instructions) { struct rc_instruction * inst = ptr; ptr = ptr->Next; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c index cd4fcbabb9..205eecd112 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c @@ -386,8 +386,6 @@ static int build_loop_info(struct radeon_compiler * c, struct loop_info * loop, case RC_OPCODE_SNE: break; default: - rc_error(c, "%s: expected conditional", - __FUNCTION__); return 0; } loop->Cond = loop->If->Prev; @@ -431,8 +429,10 @@ static int transform_loop(struct emulate_loop_state * s, loop = &s->Loops[s->LoopCount++]; - if (!build_loop_info(s->C, loop, inst)) + if (!build_loop_info(s->C, loop, inst)) { + rc_error(s->C, "Failed to build loop info\n"); return 0; + } if(try_unroll_loop(s->C, loop)){ return 1; @@ -511,11 +511,12 @@ void rc_emulate_loops(struct radeon_compiler *c, void *user) * loops are unrolled first. */ for( i = s->LoopCount - 1; i >= 0; i-- ){ + unsigned int iterations; + if(!s->Loops[i].EndLoop){ continue; } - unsigned int iterations = loop_max_possible_iterations( - s->C, &s->Loops[i]); + iterations = loop_max_possible_iterations(s->C, &s->Loops[i]); unroll_loop(s->C, &s->Loops[i], iterations); } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c index 3be50b93e4..4d9120ffd0 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c @@ -1,5 +1,6 @@ /* * Copyright (C) 2009 Nicolai Haehnle. + * Copyright 2010 Tom Stellard <tstellar@gmail.com> * * All Rights Reserved. * @@ -28,6 +29,7 @@ #include "radeon_dataflow.h" #include "radeon_compiler.h" +#include "radeon_compiler_util.h" #include "radeon_swizzle.h" struct peephole_state { @@ -86,80 +88,60 @@ struct copy_propagate_state { int BranchDepth; }; -/** - * This is a callback function that is meant to be passed to - * rc_for_all_reads_mask. This function will be called once for each source - * register in inst. - * @param inst The instruction that the source register belongs to. - * @param file The register file of the source register. - * @param index The index of the source register. - * @param mask The components of the source register that are being read from. - */ static void copy_propagate_scan_read(void * data, struct rc_instruction * inst, - rc_register_file file, unsigned int index, unsigned int mask) + struct rc_src_register * src) { - struct copy_propagate_state * s = data; + rc_register_file file = src->File; + struct rc_reader_data * reader_data = data; + const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode); - /* XXX This could probably be handled better. */ - if (file == RC_FILE_ADDRESS) { - s->Conflict = 1; + /* It is possible to do copy propigation in this situation, + * just not right now, see peephole_add_presub_inv() */ + if (reader_data->Writer->U.I.PreSub.Opcode != RC_PRESUB_NONE && + (info->NumSrcRegs > 2 || info->HasTexture)) { + reader_data->Abort = 1; return; } - if (file != RC_FILE_TEMPORARY || index != s->Mov->U.I.DstReg.Index) + /* XXX This could probably be handled better. */ + if (file == RC_FILE_ADDRESS) { + reader_data->Abort = 1; return; + } /* These instructions cannot read from the constants file. * see radeonTransformTEX() */ - if(s->Mov->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && - s->Mov->U.I.SrcReg[0].File != RC_FILE_INPUT && + if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && + reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT && (inst->U.I.Opcode == RC_OPCODE_TEX || inst->U.I.Opcode == RC_OPCODE_TXB || inst->U.I.Opcode == RC_OPCODE_TXP || inst->U.I.Opcode == RC_OPCODE_KIL)){ - s->Conflict = 1; + reader_data->Abort = 1; return; } - if ((mask & s->MovMask) == mask) { - if (s->SourceClobbered) { - s->Conflict = 1; - } - } else if ((mask & s->DefinedMask) == mask) { - /* read from something entirely written by other instruction: this is okay */ - } else { - /* read from component combination that is not well-defined without - * the MOV: cannot remove it */ - s->Conflict = 1; - } } static void copy_propagate_scan_write(void * data, struct rc_instruction * inst, rc_register_file file, unsigned int index, unsigned int mask) { - struct copy_propagate_state * s = data; - - if (s->BranchDepth < 0) - return; + struct rc_reader_data * reader_data = data; + struct copy_propagate_state * s = reader_data->CbData; - if (file == s->Mov->U.I.DstReg.File && index == s->Mov->U.I.DstReg.Index) { - s->MovMask &= ~mask; - if (s->BranchDepth == 0) - s->DefinedMask |= mask; - else - s->DefinedMask &= ~mask; - } - if (file == s->Mov->U.I.SrcReg[0].File && index == s->Mov->U.I.SrcReg[0].Index) { + if (file == reader_data->Writer->U.I.SrcReg[0].File && index == reader_data->Writer->U.I.SrcReg[0].Index) { if (mask & s->SourcedMask) - s->SourceClobbered = 1; + reader_data->AbortOnRead = 1; } else if (s->Mov->U.I.SrcReg[0].RelAddr && file == RC_FILE_ADDRESS) { - s->SourceClobbered = 1; + reader_data->AbortOnRead = 1; } } static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov) { struct copy_propagate_state s; + struct rc_reader_data reader_data; + unsigned int i; if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY || inst_mov->U.I.DstReg.RelAddr || @@ -173,95 +155,27 @@ static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * i s.MovMask = inst_mov->U.I.DstReg.WriteMask; s.DefinedMask = RC_MASK_XYZW & ~s.MovMask; + reader_data.CbData = &s; + for(unsigned int chan = 0; chan < 4; ++chan) { unsigned int swz = GET_SWZ(inst_mov->U.I.SrcReg[0].Swizzle, chan); s.SourcedMask |= (1 << swz) & RC_MASK_XYZW; } - /* 1st pass: Check whether all subsequent readers can be changed */ - for(struct rc_instruction * inst = inst_mov->Next; - inst != &c->Program.Instructions; - inst = inst->Next) { - const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode); - /* XXX In the future we might be able to make the optimizer - * smart enough to handle loops. */ - if(inst->U.I.Opcode == RC_OPCODE_BGNLOOP - || inst->U.I.Opcode == RC_OPCODE_ENDLOOP){ - return; - } + /* Get a list of all the readers of this MOV instruction. */ + rc_get_readers_normal(c, inst_mov, &reader_data, + copy_propagate_scan_read, copy_propagate_scan_write); - /* It is possible to do copy propigation in this situation, - * just not right now, see peephole_add_presub_inv() */ - if (inst_mov->U.I.PreSub.Opcode != RC_PRESUB_NONE && - (info->NumSrcRegs > 2 || info->HasTexture)) { - return; - } - - rc_for_all_reads_mask(inst, copy_propagate_scan_read, &s); - rc_for_all_writes_mask(inst, copy_propagate_scan_write, &s); - if (s.Conflict) - return; - - if (s.BranchDepth >= 0) { - if (inst->U.I.Opcode == RC_OPCODE_IF) { - s.BranchDepth++; - } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF - || inst->U.I.Opcode == RC_OPCODE_ELSE) { - s.BranchDepth--; - if (s.BranchDepth < 0) { - s.DefinedMask &= ~s.MovMask; - s.MovMask = 0; - } - } - } - } - - if (s.Conflict) + if (reader_data.Abort || reader_data.ReaderCount == 0) return; - /* 2nd pass: We can satisfy all readers, so switch them over all at once */ - s.MovMask = inst_mov->U.I.DstReg.WriteMask; - s.BranchDepth = 0; - - for(struct rc_instruction * inst = inst_mov->Next; - inst != &c->Program.Instructions; - inst = inst->Next) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { - if (inst->U.I.SrcReg[src].File == RC_FILE_TEMPORARY && - inst->U.I.SrcReg[src].Index == s.Mov->U.I.DstReg.Index) { - unsigned int refmask = 0; - - for(unsigned int chan = 0; chan < 4; ++chan) { - unsigned int swz = GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan); - refmask |= (1 << swz) & RC_MASK_XYZW; - } + /* Propagate the MOV instruction. */ + for (i = 0; i < reader_data.ReaderCount; i++) { + struct rc_instruction * inst = reader_data.Readers[i].Inst; + *reader_data.Readers[i].Src = chain_srcregs(*reader_data.Readers[i].Src, s.Mov->U.I.SrcReg[0]); - if ((refmask & s.MovMask) == refmask) { - inst->U.I.SrcReg[src] = chain_srcregs(inst->U.I.SrcReg[src], s.Mov->U.I.SrcReg[0]); - if (s.Mov->U.I.SrcReg[0].File == RC_FILE_PRESUB) - inst->U.I.PreSub = s.Mov->U.I.PreSub; - } - } - } - - if (opcode->HasDstReg) { - if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY && - inst->U.I.DstReg.Index == s.Mov->U.I.DstReg.Index) { - s.MovMask &= ~inst->U.I.DstReg.WriteMask; - } - } - - if (s.BranchDepth >= 0) { - if (inst->U.I.Opcode == RC_OPCODE_IF) { - s.BranchDepth++; - } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF - || inst->U.I.Opcode == RC_OPCODE_ELSE) { - s.BranchDepth--; - if (s.BranchDepth < 0) - break; /* no more readers after this point */ - } - } + if (s.Mov->U.I.SrcReg[0].File == RC_FILE_PRESUB) + inst->U.I.PreSub = s.Mov->U.I.PreSub; } /* Finally, remove the original MOV instruction */ @@ -408,30 +322,38 @@ static void constant_folding_add(struct rc_instruction * inst) static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned int i; /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */ for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { + struct rc_constant * constant; + struct rc_src_register newsrc; + int have_real_reference; + if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT || inst->U.I.SrcReg[src].RelAddr || inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count) continue; - struct rc_constant * constant = + constant = &c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index]; if (constant->Type != RC_CONSTANT_IMMEDIATE) continue; - struct rc_src_register newsrc = inst->U.I.SrcReg[src]; - int have_real_reference = 0; + newsrc = inst->U.I.SrcReg[src]; + have_real_reference = 0; for(unsigned int chan = 0; chan < 4; ++chan) { unsigned int swz = GET_SWZ(newsrc.Swizzle, chan); + unsigned int newswz; + float imm; + float baseimm; + if (swz >= 4) continue; - unsigned int newswz; - float imm = constant->u.Immediate[swz]; - float baseimm = imm; + imm = constant->u.Immediate[swz]; + baseimm = imm; if (imm < 0.0) baseimm = -baseimm; @@ -473,6 +395,13 @@ static void constant_folding(struct radeon_compiler * c, struct rc_instruction * constant_folding_mul(inst); else if (inst->U.I.Opcode == RC_OPCODE_ADD) constant_folding_add(inst); + + /* In case this instruction has been converted, make sure all of the + * registers that are no longer used are empty. */ + opcode = rc_get_opcode_info(inst->U.I.Opcode); + for(i = opcode->NumSrcRegs; i < 3; i++) { + memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register)); + } } /** @@ -482,18 +411,10 @@ static void constant_folding(struct radeon_compiler * c, struct rc_instruction * static unsigned int src_reads_dst_mask(struct rc_src_register src, struct rc_dst_register dst) { - unsigned int mask = 0; - unsigned int i; if (dst.File != src.File || dst.Index != src.Index) { return 0; } - - for(i = 0; i < 4; i++) { - mask |= 1 << GET_SWZ(src.Swizzle, i); - } - mask &= RC_MASK_XYZW; - - return mask; + return rc_swizzle_to_writemask(src.Swizzle); } /* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0) diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c index c73845512f..91524f5ec6 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c @@ -322,12 +322,13 @@ void rc_pair_regalloc_inputs_only(struct radeon_compiler *cc, void *user) { struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc; struct regalloc_state s; + int temp_reg_offset; compute_live_intervals(cc, &s); c->AllocateHwInputs(c, &alloc_input, &s); - int temp_reg_offset = 0; + temp_reg_offset = 0; for (unsigned i = 0; i < RC_REGISTER_MAX_INDEX; i++) { if (s.Input[i].Allocated && temp_reg_offset <= s.Input[i].Index) temp_reg_offset = s.Input[i].Index + 1; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c index a33b2fde7b..d4a38607d9 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c @@ -237,11 +237,12 @@ static void commit_alu_instruction(struct schedule_state * s, struct schedule_in static void emit_all_tex(struct schedule_state * s, struct rc_instruction * before) { struct schedule_instruction *readytex; + struct rc_instruction * inst_begin; assert(s->ReadyTEX); /* Node marker for R300 */ - struct rc_instruction * inst_begin = rc_insert_new_instruction(s->C, before->Prev); + inst_begin = rc_insert_new_instruction(s->C, before->Prev); inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX; /* Link texture instructions back in */ @@ -274,12 +275,113 @@ static void emit_all_tex(struct schedule_state * s, struct rc_instruction * befo } } +/* This is a helper function for destructive_merge_instructions(). It helps + * merge presubtract sources from two instructions and makes sure the + * presubtract sources end up in the correct spot. This function assumes that + * dst_full is an rgb instruction, meaning that it has a vector instruction(rgb) + * but no scalar instruction (alpha). + * @return 0 if merging the presubtract sources fails. + * @retrun 1 if merging the presubtract sources succeeds. + */ +static int merge_presub_sources( + struct rc_pair_instruction * dst_full, + struct rc_pair_sub_instruction src, + unsigned int type) +{ + unsigned int srcp_src, srcp_regs, is_rgb, is_alpha; + struct rc_pair_sub_instruction * dst_sub; + + assert(dst_full->Alpha.Opcode == RC_OPCODE_NOP); + + switch(type) { + case RC_PAIR_SOURCE_RGB: + is_rgb = 1; + is_alpha = 0; + dst_sub = &dst_full->RGB; + break; + case RC_PAIR_SOURCE_ALPHA: + is_rgb = 0; + is_alpha = 1; + dst_sub = &dst_full->Alpha; + break; + default: + assert(0); + return 0; + } + + const struct rc_opcode_info * info = + rc_get_opcode_info(dst_full->RGB.Opcode); + if (dst_sub->Src[RC_PAIR_PRESUB_SRC].Used) + return 0; + + srcp_regs = rc_presubtract_src_reg_count( + src.Src[RC_PAIR_PRESUB_SRC].Index); + for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) { + unsigned int arg; + int free_source; + unsigned int one_way = 0; + struct rc_pair_instruction_source srcp = src.Src[srcp_src]; + struct rc_pair_instruction_source temp; + + free_source = rc_pair_alloc_source(dst_full, is_rgb, is_alpha, + srcp.File, srcp.Index); + + /* If free_source < 0 then there are no free source + * slots. */ + if (free_source < 0) + return 0; + + temp = dst_sub->Src[srcp_src]; + dst_sub->Src[srcp_src] = dst_sub->Src[free_source]; + + /* srcp needs src0 and src1 to be the same */ + if (free_source < srcp_src) { + if (!temp.Used) + continue; + free_source = rc_pair_alloc_source(dst_full, is_rgb, + is_alpha, temp.File, temp.Index); + one_way = 1; + } else { + dst_sub->Src[free_source] = temp; + } + + /* If free_source == srcp_src, then the presubtract + * source is already in the correct place. */ + if (free_source == srcp_src) + continue; + + /* Shuffle the sources, so we can put the + * presubtract source in the correct place. */ + for(arg = 0; arg < info->NumSrcRegs; arg++) { + /*If this arg does not read from an rgb source, + * do nothing. */ + if (!(rc_source_type_that_arg_reads( + dst_full->RGB.Arg[arg].Source, + dst_full->RGB.Arg[arg].Swizzle) & type)) { + continue; + } + if (dst_full->RGB.Arg[arg].Source == srcp_src) + dst_full->RGB.Arg[arg].Source = free_source; + /* We need to do this just in case register + * is one of the sources already, but in the + * wrong spot. */ + else if(dst_full->RGB.Arg[arg].Source == free_source + && !one_way) { + dst_full->RGB.Arg[arg].Source = srcp_src; + } + } + } + return 1; +} + +/* This function assumes that rgb.Alpha and alpha.RGB are unused */ static int destructive_merge_instructions( struct rc_pair_instruction * rgb, struct rc_pair_instruction * alpha) { const struct rc_opcode_info * opcode; + assert(rgb->Alpha.Opcode == RC_OPCODE_NOP); assert(alpha->RGB.Opcode == RC_OPCODE_NOP); @@ -288,130 +390,15 @@ static int destructive_merge_instructions( * src1. */ /* Merge the rgb presubtract registers. */ - const struct rc_opcode_info * rgb_info = - rc_get_opcode_info(rgb->RGB.Opcode); if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) { - unsigned int srcp_src; - unsigned int srcp_regs; - if (rgb->RGB.Src[RC_PAIR_PRESUB_SRC].Used) + if (!merge_presub_sources(rgb, alpha->RGB, RC_PAIR_SOURCE_RGB)) { return 0; - srcp_regs = rc_presubtract_src_reg_count( - alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Index); - for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) { - unsigned int arg; - int free_source; - unsigned int one_way = 0; - struct rc_pair_instruction_source srcp = - alpha->RGB.Src[srcp_src]; - struct rc_pair_instruction_source temp; - /* 2nd arg of 1 means this is an rgb source. - * 3rd arg of 0 means this is not an alpha source. */ - free_source = rc_pair_alloc_source(rgb, 1, 0, - srcp.File, srcp.Index); - /* If free_source < 0 then there are no free source - * slots. */ - if (free_source < 0) - return 0; - - temp = rgb->RGB.Src[srcp_src]; - rgb->RGB.Src[srcp_src] = rgb->RGB.Src[free_source]; - /* srcp needs src0 and src1 to be the same */ - if (free_source < srcp_src) { - if (!temp.Used) - continue; - free_source = rc_pair_alloc_source(rgb, 1, 0, - srcp.File, srcp.Index); - one_way = 1; - } else { - rgb->RGB.Src[free_source] = temp; - } - /* If free_source == srcp_src, then the presubtract - * source is already in the correct place. */ - if (free_source == srcp_src) - continue; - /* Shuffle the sources, so we can put the - * presubtract source in the correct place. */ - for (arg = 0; arg < rgb_info->NumSrcRegs; arg++) { - /*If this arg does not read from an rgb source, - * do nothing. */ - if (rc_source_type_that_arg_reads( - rgb->RGB.Arg[arg].Source, - rgb->RGB.Arg[arg].Swizzle, 3) - != RC_PAIR_SOURCE_RGB) { - continue; - } - if (rgb->RGB.Arg[arg].Source == srcp_src) - rgb->RGB.Arg[arg].Source = free_source; - /* We need to do this just in case register - * is one of the sources already, but in the - * wrong spot. */ - else if(rgb->RGB.Arg[arg].Source == free_source - && !one_way) { - rgb->RGB.Arg[arg].Source = srcp_src; - } - } } } - /* Merge the alpha presubtract registers */ if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) { - unsigned int srcp_src; - unsigned int srcp_regs; - if(rgb->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) + if(!merge_presub_sources(rgb, alpha->Alpha, RC_PAIR_SOURCE_ALPHA)){ return 0; - - srcp_regs = rc_presubtract_src_reg_count( - alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Index); - for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) { - unsigned int arg; - int free_source; - unsigned int one_way = 0; - struct rc_pair_instruction_source srcp = - alpha->Alpha.Src[srcp_src]; - struct rc_pair_instruction_source temp; - /* 2nd arg of 0 means this is not an rgb source. - * 3rd arg of 1 means this is an alpha source. */ - free_source = rc_pair_alloc_source(rgb, 0, 1, - srcp.File, srcp.Index); - /* If free_source < 0 then there are no free source - * slots. */ - if (free_source < 0) - return 0; - - temp = rgb->Alpha.Src[srcp_src]; - rgb->Alpha.Src[srcp_src] = rgb->Alpha.Src[free_source]; - /* srcp needs src0 and src1 to be the same. */ - if (free_source < srcp_src) { - if (!temp.Used) - continue; - free_source = rc_pair_alloc_source(rgb, 0, 1, - temp.File, temp.Index); - one_way = 1; - } else { - rgb->Alpha.Src[free_source] = temp; - } - /* If free_source == srcp_src, then the presubtract - * source is already in the correct place. */ - if (free_source == srcp_src) - continue; - /* Shuffle the sources, so we can put the - * presubtract source in the correct place. */ - for(arg = 0; arg < rgb_info->NumSrcRegs; arg++) { - /*If this arg does not read from an alpha - * source, do nothing. */ - if (rc_source_type_that_arg_reads( - rgb->RGB.Arg[arg].Source, - rgb->RGB.Arg[arg].Swizzle, 3) - != RC_PAIR_SOURCE_ALPHA) { - continue; - } - if (rgb->RGB.Arg[arg].Source == srcp_src) - rgb->RGB.Arg[arg].Source = free_source; - else if (rgb->RGB.Arg[arg].Source == free_source - && !one_way) { - rgb->RGB.Arg[arg].Source = srcp_src; - } - } } } @@ -424,6 +411,7 @@ static int destructive_merge_instructions( unsigned int oldsrc = alpha->Alpha.Arg[arg].Source; rc_register_file file = 0; unsigned int index = 0; + int source; if (alpha->Alpha.Arg[arg].Swizzle < 3) { srcrgb = 1; @@ -435,7 +423,7 @@ static int destructive_merge_instructions( index = alpha->Alpha.Src[oldsrc].Index; } - int source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index); + source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index); if (source < 0) return 0; @@ -475,6 +463,12 @@ static int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_i { struct rc_pair_instruction backup; + /*Instructions can't write output registers and ALU result at the + * same time. */ + if ((rgb->WriteALUResult && alpha->Alpha.OutputWriteMask) + || (rgb->RGB.OutputWriteMask && alpha->WriteALUResult)) { + return 0; + } memcpy(&backup, rgb, sizeof(struct rc_pair_instruction)); if (destructive_merge_instructions(rgb, alpha)) @@ -597,6 +591,7 @@ static void scan_read(void * data, struct rc_instruction * inst, { struct schedule_state * s = data; struct reg_value * v = get_reg_value(s, file, index, chan); + struct reg_value_reader * reader; if (!v) return; @@ -610,7 +605,7 @@ static void scan_read(void * data, struct rc_instruction * inst, DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan); - struct reg_value_reader * reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader)); + reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader)); reader->Reader = s->Current; reader->Next = v->Readers; v->Readers = reader; @@ -630,13 +625,14 @@ static void scan_write(void * data, struct rc_instruction * inst, { struct schedule_state * s = data; struct reg_value ** pv = get_reg_valuep(s, file, index, chan); + struct reg_value * newv; if (!pv) return; DBG("%i: write %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan); - struct reg_value * newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv)); + newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv)); memset(newv, 0, sizeof(*newv)); newv->Writer = s->Current; @@ -659,12 +655,13 @@ static void schedule_block(struct r300_fragment_program_compiler * c, struct rc_instruction * begin, struct rc_instruction * end) { struct schedule_state s; + unsigned int ip; memset(&s, 0, sizeof(s)); s.C = &c->Base; /* Scan instructions for data dependencies */ - unsigned int ip = 0; + ip = 0; for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) { s.Current = memory_pool_malloc(&c->Base.Pool, sizeof(*s.Current)); memset(s.Current, 0, sizeof(struct schedule_instruction)); @@ -716,12 +713,14 @@ void rc_pair_schedule(struct radeon_compiler *cc, void *user) struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc; struct rc_instruction * inst = c->Base.Program.Instructions.Next; while(inst != &c->Base.Program.Instructions) { + struct rc_instruction * first; + if (is_controlflow(inst)) { inst = inst->Next; continue; } - struct rc_instruction * first = inst; + first = inst; while(inst != &c->Base.Program.Instructions && !is_controlflow(inst)) inst = inst->Next; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c index ff82584466..c549be5218 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c @@ -148,9 +148,12 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c, struct rc_pair_instruction * pair, struct rc_sub_instruction * inst) { + int needrgb, needalpha, istranscendent; + const struct rc_opcode_info * opcode; + int i; + memset(pair, 0, sizeof(struct rc_pair_instruction)); - int needrgb, needalpha, istranscendent; classify_instruction(inst, &needrgb, &needalpha, &istranscendent); if (needrgb) { @@ -167,8 +170,7 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c, pair->Alpha.Saturate = 1; } - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); - int i; + opcode = rc_get_opcode_info(inst->Opcode); /* Presubtract handling: * We need to make sure that the values used by the presubtract @@ -330,15 +332,18 @@ void rc_pair_translate(struct radeon_compiler *cc, void *user) for(struct rc_instruction * inst = c->Base.Program.Instructions.Next; inst != &c->Base.Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode; + struct rc_sub_instruction copy; + if (inst->Type != RC_INSTRUCTION_NORMAL) continue; - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + opcode = rc_get_opcode_info(inst->U.I.Opcode); if (opcode->HasTexture || opcode->IsFlowControl || opcode->Opcode == RC_OPCODE_KIL) continue; - struct rc_sub_instruction copy = inst->U.I; + copy = inst->U.I; check_opcode_support(c, ©); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c index 4d3e26f28c..39408845d5 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c @@ -874,13 +874,15 @@ int r300_transform_trig_simple(struct radeon_compiler* c, struct rc_instruction* inst, void* unused) { + unsigned int constants[2]; + unsigned int tempreg; + if (inst->U.I.Opcode != RC_OPCODE_COS && inst->U.I.Opcode != RC_OPCODE_SIN && inst->U.I.Opcode != RC_OPCODE_SCS) return 0; - unsigned int constants[2]; - unsigned int tempreg = rc_find_free_temporary(c); + tempreg = rc_find_free_temporary(c); sincos_constants(c, constants); @@ -918,6 +920,8 @@ int r300_transform_trig_simple(struct radeon_compiler* c, swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)), constants); } else { + struct rc_dst_register dst; + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY), swizzle_xxxx(inst->U.I.SrcReg[0]), swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])), @@ -929,7 +933,7 @@ int r300_transform_trig_simple(struct radeon_compiler* c, swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])), negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0])))); - struct rc_dst_register dst = inst->U.I.DstReg; + dst = inst->U.I.DstReg; dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_X; sin_approx(c, inst, dst, @@ -988,16 +992,16 @@ int radeonTransformTrigScale(struct radeon_compiler* c, struct rc_instruction* inst, void* unused) { - if (inst->U.I.Opcode != RC_OPCODE_COS && - inst->U.I.Opcode != RC_OPCODE_SIN && - inst->U.I.Opcode != RC_OPCODE_SCS) - return 0; - static const float RCP_2PI = 0.15915494309189535; unsigned int temp; unsigned int constant; unsigned int constant_swizzle; + if (inst->U.I.Opcode != RC_OPCODE_COS && + inst->U.I.Opcode != RC_OPCODE_SIN && + inst->U.I.Opcode != RC_OPCODE_SCS) + return 0; + temp = rc_find_free_temporary(c); constant = rc_constants_add_immediate_scalar(&c->Program.Constants, RCP_2PI, &constant_swizzle); @@ -1020,6 +1024,10 @@ int r300_transform_trig_scale_vertex(struct radeon_compiler *c, struct rc_instruction *inst, void *unused) { + static const float cons[4] = {0.15915494309189535, 0.5, 6.28318530717959, -3.14159265358979}; + unsigned int temp; + unsigned int constant; + if (inst->U.I.Opcode != RC_OPCODE_COS && inst->U.I.Opcode != RC_OPCODE_SIN && inst->U.I.Opcode != RC_OPCODE_SCS) @@ -1030,10 +1038,6 @@ int r300_transform_trig_scale_vertex(struct radeon_compiler *c, * repeat(x) = frac(x / 2PI + 0.5) * 2PI - PI */ - static const float cons[4] = {0.15915494309189535, 0.5, 6.28318530717959, -3.14159265358979}; - unsigned int temp; - unsigned int constant; - temp = rc_find_free_temporary(c); constant = rc_constants_add_immediate_vec4(&c->Program.Constants, cons); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c index c31efdb059..a21fe8d3df 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c @@ -206,34 +206,22 @@ void rc_pair_foreach_source_that_rgb_reads( /*return 0 for rgb, 1 for alpha -1 for error. */ -rc_pair_source_type rc_source_type_that_arg_reads( +unsigned int rc_source_type_that_arg_reads( unsigned int source, - unsigned int swizzle, - unsigned int channels) + unsigned int swizzle) { unsigned int chan; unsigned int swz = RC_SWIZZLE_UNUSED; - int isRGB = 0; - int isAlpha = 0; - /* Find a swizzle that is either X,Y,Z,or W. We assume here - * that if one channel swizzles X,Y, or Z, then none of the - * other channels swizzle W, and vice-versa. */ - for(chan = 0; chan < channels; chan++) { + unsigned int ret = RC_PAIR_SOURCE_NONE; + + for(chan = 0; chan < 3; chan++) { swz = GET_SWZ(swizzle, chan); if (swz == RC_SWIZZLE_W) { - isAlpha = 1; + ret |= RC_PAIR_SOURCE_ALPHA; } else if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y || swz == RC_SWIZZLE_Z) { - isRGB = 1; + ret |= RC_PAIR_SOURCE_RGB; } } - assert(!isRGB || !isAlpha); - - if(!isRGB && !isAlpha) - return RC_PAIR_SOURCE_NONE; - - if (isRGB) - return RC_PAIR_SOURCE_RGB; - /*isAlpha*/ - return RC_PAIR_SOURCE_ALPHA; + return ret; } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h index 01cdb15424..54d44a2098 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h @@ -55,6 +55,10 @@ struct radeon_compiler; */ #define RC_PAIR_PRESUB_SRC 3 +#define RC_PAIR_SOURCE_NONE 0x0 +#define RC_PAIR_SOURCE_RGB 0x1 +#define RC_PAIR_SOURCE_ALPHA 0x2 + struct rc_pair_instruction_source { unsigned int Used:1; unsigned int File:3; @@ -93,12 +97,6 @@ struct rc_pair_instruction { typedef void (*rc_pair_foreach_src_fn) (void *, struct rc_pair_instruction_source *); -typedef enum { - RC_PAIR_SOURCE_NONE = 0, - RC_PAIR_SOURCE_RGB, - RC_PAIR_SOURCE_ALPHA -} rc_pair_source_type; - /** * General helper functions for dealing with the paired instruction format. */ @@ -117,10 +115,9 @@ void rc_pair_foreach_source_that_rgb_reads( void * data, rc_pair_foreach_src_fn cb); -rc_pair_source_type rc_source_type_that_arg_reads( +unsigned int rc_source_type_that_arg_reads( unsigned int source, - unsigned int swizzle, - unsigned int channels); + unsigned int swizzle); /*@}*/ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c b/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c index c59161640c..5f67f536f6 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c @@ -68,6 +68,7 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user) unsigned are_externals_remapped = 0; struct rc_constant *constants = c->Program.Constants.Constants; struct mark_used_data d; + unsigned new_count; if (!c->Program.Constants.Count) { *out_remap_table = NULL; @@ -97,7 +98,7 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user) * This pass removes unused constants simply by overwriting them by other constants. */ remap_table = malloc(c->Program.Constants.Count * sizeof(unsigned)); inv_remap_table = malloc(c->Program.Constants.Count * sizeof(unsigned)); - unsigned new_count = 0; + new_count = 0; for (unsigned i = 0; i < c->Program.Constants.Count; i++) { if (const_used[i]) { @@ -144,6 +145,6 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user) free(const_used); free(inv_remap_table); - if (c->Debug) + if (c->Debug & RC_DBG_LOG) rc_constants_print(&c->Program.Constants); } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c index 31d25f9ab8..60e228be5b 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c @@ -104,13 +104,14 @@ void rc_rename_regs(struct radeon_compiler *c, void *user) inst != &c->Program.Instructions; inst = inst->Next) { const struct rc_opcode_info * info; + unsigned int old_index, temp_index; + struct rc_dst_register * dst; if(inst->Type != RC_INSTRUCTION_NORMAL) { rc_error(c, "%s only works with normal instructions.", __FUNCTION__); return; } - unsigned int old_index, temp_index; - struct rc_dst_register * dst = &inst->U.I.DstReg; + dst = &inst->U.I.DstReg; info = rc_get_opcode_info(inst->U.I.Opcode); if(!info->HasDstReg || dst->File != RC_FILE_TEMPORARY) { continue; |