From 3e5f9789d653726d2602de67e996b73a813ebc2e Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Sat, 13 Nov 2010 17:12:58 -0800 Subject: r300/compiler: Fix instruction scheduling within IF blocks Reads of registers that where not written to within the same block were not being tracked. So in a situations like this: 0: IF 1: ADD t0, t1, t2 2: MOV t2, t1 Instruction 2 didn't know that instruction 1 read from t2, so in some cases instruction 2 was being scheduled before instruction 1. NOTE: This is a candidate for the 7.9 branch. --- .../dri/r300/compiler/radeon_pair_schedule.c | 37 ++++++++++++---------- 1 file changed, 20 insertions(+), 17 deletions(-) (limited to 'src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c') diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c index 553e9dcf7c..f760a9023d 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c @@ -126,15 +126,6 @@ static struct reg_value ** get_reg_valuep(struct schedule_state * s, return &s->Temporary[index].Values[chan]; } -static struct reg_value * get_reg_value(struct schedule_state * s, - rc_register_file file, unsigned int index, unsigned int chan) -{ - struct reg_value ** pv = get_reg_valuep(s, file, index, chan); - if (!pv) - return 0; - return *pv; -} - static void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst) { inst->NextReady = *list; @@ -591,13 +582,13 @@ static void scan_read(void * data, struct rc_instruction * inst, rc_register_file file, unsigned int index, unsigned int chan) { struct schedule_state * s = data; - struct reg_value * v = get_reg_value(s, file, index, chan); + struct reg_value ** v = get_reg_valuep(s, file, index, chan); struct reg_value_reader * reader; if (!v) return; - if (v->Writer == s->Current) { + if (*v && (*v)->Writer == s->Current) { /* The instruction reads and writes to a register component. * In this case, we only want to increment dependencies by one. */ @@ -608,16 +599,28 @@ static void scan_read(void * data, struct rc_instruction * inst, reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader)); reader->Reader = s->Current; - reader->Next = v->Readers; - v->Readers = reader; - v->NumReaders++; - - s->Current->NumDependencies++; + if (!*v) { + /* In this situation, the instruction reads from a register + * that hasn't been written to or read from in the current + * block. */ + *v = memory_pool_malloc(&s->C->Pool, sizeof(struct reg_value)); + memset(*v, 0, sizeof(struct reg_value)); + (*v)->Readers = reader; + } else { + reader->Next = (*v)->Readers; + (*v)->Readers = reader; + /* Only update the current instruction's dependencies if the + * register it reads from has been written to in this block. */ + if ((*v)->Writer) { + s->Current->NumDependencies++; + } + } + (*v)->NumReaders++; if (s->Current->NumReadValues >= 12) { rc_error(s->C, "%s: NumReadValues overflow\n", __FUNCTION__); } else { - s->Current->ReadValues[s->Current->NumReadValues++] = v; + s->Current->ReadValues[s->Current->NumReadValues++] = *v; } } -- cgit v1.2.3 From ddceededf850c942c1bb8185ec7dc0c203d3bad6 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 29 Sep 2010 23:52:49 -0700 Subject: r300/compiler: Convert RGB to alpha in the scheduler --- .../dri/r300/compiler/radeon_pair_schedule.c | 310 +++++++++++++++++++-- .../drivers/dri/r300/compiler/radeon_program.c | 11 + .../drivers/dri/r300/compiler/radeon_program.h | 15 + .../dri/r300/compiler/radeon_program_print.c | 1 + 4 files changed, 320 insertions(+), 17 deletions(-) (limited to 'src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c') diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c index f760a9023d..cce253f11e 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c @@ -54,6 +54,11 @@ struct schedule_instruction { * this instruction can be scheduled. */ unsigned int NumDependencies:5; + + /** List of all readers (see rc_get_readers() for the definition of + * "all readers"), even those outside the basic block this instruction + * lives in. */ + struct rc_reader_data GlobalReaders; }; @@ -94,6 +99,16 @@ struct register_state { struct reg_value * Values[4]; }; +struct remap_reg { + struct rc_instruciont * Inst; + unsigned int OldIndex:(RC_REGISTER_INDEX_BITS+1); + unsigned int OldSwizzle:3; + unsigned int NewIndex:(RC_REGISTER_INDEX_BITS+1); + unsigned int NewSwizzle:3; + unsigned int OnlyTexReads:1; + struct remap_reg * Next; +}; + struct schedule_state { struct radeon_compiler * C; struct schedule_instruction * Current; @@ -516,6 +531,222 @@ static void presub_nop(struct rc_instruction * emitted) { } } } + +static void rgb_to_alpha_remap ( + struct rc_instruction * inst, + struct rc_pair_instruction_arg * arg, + rc_register_file old_file, + rc_swizzle old_swz, + unsigned int new_index) +{ + int new_src_index; + unsigned int i; + struct rc_pair_instruction_source * old_src = + rc_pair_get_src(&inst->U.P, arg); + if (!old_src) { + return; + } + + for (i = 0; i < 3; i++) { + if (get_swz(arg->Swizzle, i) == old_swz) { + SET_SWZ(arg->Swizzle, i, RC_SWIZZLE_W); + } + } + memset(old_src, 0, sizeof(struct rc_pair_instruction_source)); + new_src_index = rc_pair_alloc_source(&inst->U.P, 0, 1, + old_file, new_index); + /* This conversion is not possible, we must have made a mistake in + * is_rgb_to_alpha_possible. */ + if (new_src_index < 0) { + assert(0); + return; + } + + arg->Source = new_src_index; +} + +static int can_remap(unsigned int opcode) +{ + switch(opcode) { + case RC_OPCODE_DDX: + case RC_OPCODE_DDY: + return 0; + default: + return 1; + } +} + +static int can_convert_opcode_to_alpha(unsigned int opcode) +{ + switch(opcode) { + case RC_OPCODE_DDX: + case RC_OPCODE_DDY: + case RC_OPCODE_DP2: + case RC_OPCODE_DP3: + case RC_OPCODE_DP4: + case RC_OPCODE_DPH: + return 0; + default: + return 1; + } +} + +static void is_rgb_to_alpha_possible( + void * userdata, + struct rc_instruction * inst, + struct rc_pair_instruction_arg * arg, + struct rc_pair_instruction_source * src) +{ + unsigned int chan_count = 0; + unsigned int alpha_sources = 0; + unsigned int i; + struct rc_reader_data * reader_data = userdata; + + if (!can_remap(inst->U.P.RGB.Opcode) + || !can_remap(inst->U.P.Alpha.Opcode)) { + reader_data->Abort = 1; + return; + } + + if (!src) + return; + + /* XXX There are some cases where we can still do the conversion if + * a reader reads from a presubtract source, but for now we'll prevent + * it. */ + if (arg->Source == RC_PAIR_PRESUB_SRC) { + reader_data->Abort = 1; + return; + } + + /* Make sure the source only reads from one component. + * XXX We should allow the source to read from the same component twice. + * XXX If the index we will be converting to is the same as the + * current index, then it is OK to read from more than one component. + */ + for (i = 0; i < 3; i++) { + rc_swizzle swz = get_swz(arg->Swizzle, i); + switch(swz) { + case RC_SWIZZLE_X: + case RC_SWIZZLE_Y: + case RC_SWIZZLE_Z: + case RC_SWIZZLE_W: + chan_count++; + break; + default: + break; + } + } + if (chan_count > 1) { + reader_data->Abort = 1; + return; + } + + /* Make sure there are enough alpha sources. + * XXX If we know what register all the readers are going + * to be remapped to, then in some situations we can still do + * the subsitution, even if all 3 alpha sources are being used.*/ + for (i = 0; i < 3; i++) { + if (inst->U.P.Alpha.Src[i].Used) { + alpha_sources++; + } + } + if (alpha_sources > 2) { + reader_data->Abort = 1; + return; + } +} + +static int convert_rgb_to_alpha( + struct schedule_state * s, + struct schedule_instruction * sched_inst) +{ + struct rc_pair_instruction * pair_inst = &sched_inst->Instruction->U.P; + unsigned int old_mask = pair_inst->RGB.WriteMask; + unsigned int old_swz = rc_mask_to_swizzle(old_mask); + const struct rc_opcode_info * info = + rc_get_opcode_info(pair_inst->RGB.Opcode); + int new_index = -1; + unsigned int i; + + if (sched_inst->GlobalReaders.Abort) + return 0; + + if (!pair_inst->RGB.WriteMask) + return 0; + + if (!can_convert_opcode_to_alpha(pair_inst->RGB.Opcode) + || !can_convert_opcode_to_alpha(pair_inst->Alpha.Opcode)) { + return 0; + } + + assert(sched_inst->NumWriteValues == 1); + + if (!sched_inst->WriteValues[0]) { + assert(0); + return 0; + } + + /* We start at the old index, because if we can reuse the same + * register and just change the swizzle then it is more likely we + * will be able to convert all the readers. */ + for (i = pair_inst->RGB.DestIndex; i < RC_REGISTER_MAX_INDEX; i++) { + struct reg_value ** new_regvalp = get_reg_valuep( + s, RC_FILE_TEMPORARY, i, 3); + if (!*new_regvalp) { + struct reg_value ** old_regvalp = + get_reg_valuep(s, + RC_FILE_TEMPORARY, + pair_inst->RGB.DestIndex, + rc_mask_to_swz(old_mask)); + new_index = i; + *new_regvalp = *old_regvalp; + *old_regvalp = NULL; + new_regvalp = get_reg_valuep(s, RC_FILE_TEMPORARY, i, 3); + break; + } + } + if (new_index < 0) { + return 0; + } + + pair_inst->Alpha.Opcode = pair_inst->RGB.Opcode; + pair_inst->Alpha.DestIndex = new_index; + pair_inst->Alpha.WriteMask = 1; + pair_inst->Alpha.Target = pair_inst->RGB.Target; + pair_inst->Alpha.OutputWriteMask = pair_inst->RGB.OutputWriteMask; + pair_inst->Alpha.DepthWriteMask = pair_inst->RGB.DepthWriteMask; + pair_inst->Alpha.Saturate = pair_inst->RGB.Saturate; + memcpy(pair_inst->Alpha.Arg, pair_inst->RGB.Arg, + sizeof(pair_inst->Alpha.Arg)); + /* Move the swizzles into the first chan */ + for (i = 0; i < info->NumSrcRegs; i++) { + unsigned int j; + for (j = 0; j < 3; j++) { + unsigned int swz = get_swz(pair_inst->Alpha.Arg[i].Swizzle, j); + if (swz != RC_SWIZZLE_UNUSED) { + pair_inst->Alpha.Arg[i].Swizzle = swz; + break; + } + } + } + pair_inst->RGB.Opcode = RC_OPCODE_NOP; + pair_inst->RGB.DestIndex = 0; + pair_inst->RGB.WriteMask = 0; + pair_inst->RGB.Target = 0; + pair_inst->RGB.OutputWriteMask = 0; + pair_inst->RGB.DepthWriteMask = 0; + pair_inst->RGB.Saturate = 0; + memset(pair_inst->RGB.Arg, 0, sizeof(pair_inst->RGB.Arg)); + + for(i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) { + struct rc_reader reader = sched_inst->GlobalReaders.Readers[i]; + rgb_to_alpha_remap(reader.Inst, reader.U.Arg, + RC_FILE_TEMPORARY, old_swz, new_index); + } + return 1; +} + /** * Find a good ALU instruction or pair of ALU instruction and emit it. * @@ -527,24 +758,16 @@ static void emit_one_alu(struct schedule_state *s, struct rc_instruction * befor { struct schedule_instruction * sinst; - if (s->ReadyFullALU || !(s->ReadyRGB && s->ReadyAlpha)) { - if (s->ReadyFullALU) { - sinst = s->ReadyFullALU; - s->ReadyFullALU = s->ReadyFullALU->NextReady; - } else if (s->ReadyRGB) { - sinst = s->ReadyRGB; - s->ReadyRGB = s->ReadyRGB->NextReady; - } else { - sinst = s->ReadyAlpha; - s->ReadyAlpha = s->ReadyAlpha->NextReady; - } - + if (s->ReadyFullALU) { + sinst = s->ReadyFullALU; + s->ReadyFullALU = s->ReadyFullALU->NextReady; rc_insert_instruction(before->Prev, sinst->Instruction); commit_alu_instruction(s, sinst); } else { struct schedule_instruction **prgb; struct schedule_instruction **palpha; - + struct schedule_instruction *prev; +pair: /* Some pairings might fail because they require too * many source slots; try all possible pairings if necessary */ for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) { @@ -563,10 +786,43 @@ static void emit_one_alu(struct schedule_state *s, struct rc_instruction * befor goto success; } } - - /* No success in pairing; just take the first RGB instruction */ - sinst = s->ReadyRGB; - s->ReadyRGB = s->ReadyRGB->NextReady; + prev = NULL; + /* No success in pairing, now try to convert one of the RGB + * instructions to an Alpha so we can pair it with another RGB. + */ + if (s->ReadyRGB && s->ReadyRGB->NextReady) { + for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) { + if ((*prgb)->NumWriteValues == 1) { + struct schedule_instruction * prgb_next; + if (!convert_rgb_to_alpha(s, *prgb)) + goto cont_loop; + prgb_next = (*prgb)->NextReady; + /* Add instruction to the Alpha ready list. */ + (*prgb)->NextReady = s->ReadyAlpha; + s->ReadyAlpha = *prgb; + /* Remove instruction from the RGB ready list.*/ + if (prev) + prev->NextReady = prgb_next; + else + s->ReadyRGB = prgb_next; + goto pair; + } +cont_loop: + prev = *prgb; + } + } + /* Still no success in pairing, just take the first RGB + * or alpha instruction. */ + if (s->ReadyRGB) { + sinst = s->ReadyRGB; + s->ReadyRGB = s->ReadyRGB->NextReady; + } else if (s->ReadyAlpha) { + sinst = s->ReadyAlpha; + s->ReadyAlpha = s->ReadyAlpha->NextReady; + } else { + /*XXX Something real bad has happened. */ + assert(0); + } rc_insert_instruction(before->Prev, sinst->Instruction); commit_alu_instruction(s, sinst); @@ -655,6 +911,16 @@ static void scan_write(void * data, struct rc_instruction * inst, } } +static void is_rgb_to_alpha_possible_normal( + void * userdata, + struct rc_instruction * inst, + struct rc_src_register * src) +{ + struct rc_reader_data * reader_data = userdata; + reader_data->Abort = 1; + +} + static void schedule_block(struct r300_fragment_program_compiler * c, struct rc_instruction * begin, struct rc_instruction * end) { @@ -686,6 +952,11 @@ static void schedule_block(struct r300_fragment_program_compiler * c, if (!s.Current->NumDependencies) instruction_ready(&s, s.Current); + + /* Get global readers for possible RGB->Alpha conversion. */ + rc_get_readers(s.C, inst, &s.Current->GlobalReaders, + is_rgb_to_alpha_possible_normal, + is_rgb_to_alpha_possible, NULL); } /* Temporarily unlink all instructions */ @@ -714,8 +985,13 @@ static int is_controlflow(struct rc_instruction * inst) void rc_pair_schedule(struct radeon_compiler *cc, void *user) { + struct schedule_state s; + struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc; struct rc_instruction * inst = c->Base.Program.Instructions.Next; + + memset(&s, 0, sizeof(s)); + s.C = &c->Base; while(inst != &c->Base.Program.Instructions) { struct rc_instruction * first; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.c b/src/mesa/drivers/dri/r300/compiler/radeon_program.c index 24b685fbeb..14dade9be7 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.c @@ -182,3 +182,14 @@ unsigned int rc_recompute_ips(struct radeon_compiler * c) return ip; } + +rc_swizzle rc_mask_to_swizzle(unsigned int mask) +{ + switch(mask) { + case RC_MASK_X: return RC_SWIZZLE_X; + case RC_MASK_Y: return RC_SWIZZLE_Y; + case RC_MASK_Z: return RC_SWIZZLE_Z; + case RC_MASK_W: return RC_SWIZZLE_W; + default: return RC_SWIZZLE_UNUSED; + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h index f0a77d7b53..a4d50d3663 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h @@ -191,6 +191,20 @@ static inline unsigned int combine_swizzles(unsigned int src, unsigned int swz) return ret; } +/** + * @param mask Must be either RC_MASK_X, RC_MASK_Y, RC_MASK_Z, or RC_MASK_W + */ +static inline rc_swizzle rc_mask_to_swz(unsigned int mask) +{ + switch (mask) { + case RC_MASK_X: return RC_SWIZZLE_X; + case RC_MASK_Y: return RC_SWIZZLE_Y; + case RC_MASK_Z: return RC_SWIZZLE_Z; + case RC_MASK_W: return RC_SWIZZLE_W; + default: assert(0); + } + return RC_SWIZZLE_UNUSED; +} struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg); static inline void reset_srcreg(struct rc_src_register* reg) @@ -233,4 +247,5 @@ unsigned int rc_recompute_ips(struct radeon_compiler * c); void rc_print_program(const struct rc_program *prog); +rc_swizzle rc_mask_to_swizzle(unsigned int mask); #endif diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c index 618ab5a099..ae13f6742f 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c @@ -129,6 +129,7 @@ static char rc_swizzle_char(unsigned int swz) case RC_SWIZZLE_HALF: return 'H'; case RC_SWIZZLE_UNUSED: return '_'; } + fprintf(stderr, "bad swz: %u\n", swz); return '?'; } -- cgit v1.2.3 From bbe49bc585c4fed46f55d184b463d13bddd97f1b Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Mon, 8 Nov 2010 18:49:44 -0800 Subject: r300/compiler: Use presubtract operations as much as possible Previously, presubtract operations where only being used by instructions with less than three source source registers. --- .../dri/r300/compiler/radeon_compiler_util.c | 122 +++++++++++++++++++++ .../dri/r300/compiler/radeon_compiler_util.h | 15 +++ .../drivers/dri/r300/compiler/radeon_dataflow.c | 16 ++- .../drivers/dri/r300/compiler/radeon_optimize.c | 36 +++--- .../dri/r300/compiler/radeon_pair_schedule.c | 17 +-- .../dri/r300/compiler/radeon_program_constants.h | 5 + .../dri/r300/compiler/radeon_program_pair.c | 52 ++++----- .../dri/r300/compiler/radeon_program_pair.h | 8 -- 8 files changed, 204 insertions(+), 67 deletions(-) (limited to 'src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c') diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c index 97f4c75849..2b8d284ce9 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c @@ -31,6 +31,8 @@ #include "radeon_compiler_util.h" +#include "radeon_compiler.h" +#include "radeon_dataflow.h" /** */ unsigned int rc_swizzle_to_writemask(unsigned int swz) @@ -59,3 +61,123 @@ unsigned int rc_src_reads_dst_mask( } return dst_mask & rc_swizzle_to_writemask(src_swz); } + +unsigned int rc_source_type_swz(unsigned int swizzle, unsigned int channels) +{ + unsigned int chan; + unsigned int swz = RC_SWIZZLE_UNUSED; + unsigned int ret = RC_SOURCE_NONE; + + for(chan = 0; chan < channels; chan++) { + swz = GET_SWZ(swizzle, chan); + if (swz == RC_SWIZZLE_W) { + ret |= RC_SOURCE_ALPHA; + } else if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y + || swz == RC_SWIZZLE_Z) { + ret |= RC_SOURCE_RGB; + } + } + return ret; +} + +unsigned int rc_source_type_mask(unsigned int mask) +{ + unsigned int ret = RC_SOURCE_NONE; + + if (mask & RC_MASK_XYZ) + ret |= RC_SOURCE_RGB; + + if (mask & RC_MASK_W) + ret |= RC_SOURCE_ALPHA; + + return ret; +} + +struct can_use_presub_data { + struct rc_src_register RemoveSrcs[3]; + unsigned int RGBCount; + unsigned int AlphaCount; +}; + +static void can_use_presub_read_cb( + void * userdata, + struct rc_instruction * inst, + rc_register_file file, + unsigned int index, + unsigned int mask) +{ + struct can_use_presub_data * d = userdata; + unsigned int src_type = rc_source_type_mask(mask); + unsigned int i; + + if (file == RC_FILE_NONE) + return; + + for(i = 0; i < 3; i++) { + if (d->RemoveSrcs[i].File == file + && d->RemoveSrcs[i].Index == index) { + src_type &= + ~rc_source_type_swz(d->RemoveSrcs[i].Swizzle, 4); + } + } + + if (src_type & RC_SOURCE_RGB) + d->RGBCount++; + + if (src_type & RC_SOURCE_ALPHA) + d->AlphaCount++; +} + +unsigned int rc_inst_can_use_presub( + struct rc_instruction * inst, + rc_presubtract_op presub_op, + unsigned int presub_writemask, + struct rc_src_register replace_reg, + struct rc_src_register presub_src0, + struct rc_src_register presub_src1) +{ + struct can_use_presub_data d; + unsigned int num_presub_srcs; + unsigned int presub_src_type = rc_source_type_mask(presub_writemask); + const struct rc_opcode_info * info = + rc_get_opcode_info(inst->U.I.Opcode); + + if (presub_op == RC_PRESUB_NONE) { + return 1; + } + + if (info->HasTexture) { + return 0; + } + + /* We can't use more than one presubtract value in an + * instruction, unless the two prsubtract operations + * are the same and read from the same registers. + * XXX For now we will limit instructions to only one presubtract + * value.*/ + if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) { + return 0; + } + + memset(&d, 0, sizeof(d)); + d.RemoveSrcs[0] = replace_reg; + d.RemoveSrcs[1] = presub_src0; + d.RemoveSrcs[2] = presub_src1; + + rc_for_all_reads_mask(inst, can_use_presub_read_cb, &d); + + num_presub_srcs = rc_presubtract_src_reg_count(presub_op); + + if ((presub_src_type & RC_SOURCE_RGB) + && d.RGBCount + num_presub_srcs > 3) { + return 0; + } + + if ((presub_src_type & RC_SOURCE_ALPHA) + && d.AlphaCount + num_presub_srcs > 3) { + return 0; + } + + return 1; +} + diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h index 1a14e7cb0e..e50dfbd4fb 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h @@ -3,6 +3,9 @@ #ifndef RADEON_PROGRAM_UTIL_H #define RADEON_PROGRAM_UTIL_H +struct rc_instruction; +struct rc_src_register; + unsigned int rc_swizzle_to_writemask(unsigned int swz); unsigned int rc_src_reads_dst_mask( @@ -13,4 +16,16 @@ unsigned int rc_src_reads_dst_mask( unsigned int dst_idx, unsigned int dst_mask); +unsigned int rc_source_type_swz(unsigned int swizzle, unsigned int channels); + +unsigned int rc_source_type_mask(unsigned int mask); + +unsigned int rc_inst_can_use_presub( + struct rc_instruction * inst, + rc_presubtract_op presub_op, + unsigned int presub_writemask, + struct rc_src_register replace_reg, + struct rc_src_register presub_src0, + struct rc_src_register presub_src1); + #endif /* RADEON_PROGRAM_UTIL_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c index 600be8b886..9df07edf2b 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c @@ -139,9 +139,17 @@ static void pair_sub_for_all_args( const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); for(i = 0; i < info->NumSrcRegs; i++) { - unsigned int src_type = rc_source_type_that_arg_reads( - sub->Arg[i].Source, sub->Arg[i].Swizzle); - if (src_type == RC_PAIR_SOURCE_NONE) + unsigned int src_type; + unsigned int channels = 0; + if (&fullinst->U.P.RGB == sub) + channels = 3; + else if (&fullinst->U.P.Alpha == sub) + channels = 1; + + assert(channels > 0); + src_type = rc_source_type_swz(sub->Arg[i].Swizzle, channels); + + if (src_type == RC_SOURCE_NONE) continue; if (sub->Arg[i].Source == RC_PAIR_PRESUB_SRC) { @@ -149,7 +157,7 @@ static void pair_sub_for_all_args( unsigned int presub_src_count; struct rc_pair_instruction_source * src_array; unsigned int j; - if (src_type & RC_PAIR_SOURCE_RGB) { + if (src_type & RC_SOURCE_RGB) { presub_type = fullinst-> U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index; src_array = fullinst->U.P.RGB.Src; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c index 2eb548474f..27b10ffbd6 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c @@ -71,12 +71,13 @@ static void copy_propagate_scan_read(void * data, struct rc_instruction * inst, { rc_register_file file = src->File; struct rc_reader_data * reader_data = data; - const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode); - /* It is possible to do copy propigation in this situation, - * just not right now, see peephole_add_presub_inv() */ - if (reader_data->Writer->U.I.PreSub.Opcode != RC_PRESUB_NONE && - (info->NumSrcRegs > 2 || info->HasTexture)) { + if(!rc_inst_can_use_presub(inst, + reader_data->Writer->U.I.PreSub.Opcode, + rc_swizzle_to_writemask(src->Swizzle), + *src, + reader_data->Writer->U.I.PreSub.SrcReg[0], + reader_data->Writer->U.I.PreSub.SrcReg[1])) { reader_data->Abort = 1; return; } @@ -424,24 +425,13 @@ static void presub_scan_read( struct rc_src_register * src) { struct rc_reader_data * reader_data = data; - const struct rc_opcode_info * info = - rc_get_opcode_info(inst->U.I.Opcode); - /* XXX: There are some situations where instructions - * with more than 2 src registers can use the - * presubtract select, but to keep things simple we - * will disable presubtract on these instructions for - * now. */ - if (info->NumSrcRegs > 2 || info->HasTexture) { - reader_data->Abort = 1; - return; - } + rc_presubtract_op * presub_opcode = reader_data->CbData; - /* We can't use more than one presubtract value in an - * instruction, unless the two prsubtract operations - * are the same and read from the same registers. - * XXX For now we will limit instructions to only one presubtract - * value.*/ - if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) { + if (!rc_inst_can_use_presub(inst, *presub_opcode, + reader_data->Writer->U.I.DstReg.WriteMask, + *src, + reader_data->Writer->U.I.SrcReg[0], + reader_data->Writer->U.I.SrcReg[1])) { reader_data->Abort = 1; return; } @@ -455,7 +445,9 @@ static int presub_helper( { struct rc_reader_data reader_data; unsigned int i; + rc_presubtract_op cb_op = presub_opcode; + reader_data.CbData = &cb_op; rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL, is_src_clobbered_scan_write); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c index cce253f11e..cbb5ef6237 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c @@ -30,6 +30,7 @@ #include #include "radeon_compiler.h" +#include "radeon_compiler_util.h" #include "radeon_dataflow.h" @@ -301,12 +302,12 @@ static int merge_presub_sources( assert(dst_full->Alpha.Opcode == RC_OPCODE_NOP); switch(type) { - case RC_PAIR_SOURCE_RGB: + case RC_SOURCE_RGB: is_rgb = 1; is_alpha = 0; dst_sub = &dst_full->RGB; break; - case RC_PAIR_SOURCE_ALPHA: + case RC_SOURCE_ALPHA: is_rgb = 0; is_alpha = 1; dst_sub = &dst_full->Alpha; @@ -347,6 +348,8 @@ static int merge_presub_sources( continue; free_source = rc_pair_alloc_source(dst_full, is_rgb, is_alpha, temp.File, temp.Index); + if (free_source < 0) + return 0; one_way = 1; } else { dst_sub->Src[free_source] = temp; @@ -362,11 +365,11 @@ static int merge_presub_sources( for(arg = 0; arg < info->NumSrcRegs; arg++) { /*If this arg does not read from an rgb source, * do nothing. */ - if (!(rc_source_type_that_arg_reads( - dst_full->RGB.Arg[arg].Source, - dst_full->RGB.Arg[arg].Swizzle) & type)) { + if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle, + 3) & type)) { continue; } + if (dst_full->RGB.Arg[arg].Source == srcp_src) dst_full->RGB.Arg[arg].Source = free_source; /* We need to do this just in case register @@ -398,13 +401,13 @@ static int destructive_merge_instructions( /* Merge the rgb presubtract registers. */ if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) { - if (!merge_presub_sources(rgb, alpha->RGB, RC_PAIR_SOURCE_RGB)) { + if (!merge_presub_sources(rgb, alpha->RGB, RC_SOURCE_RGB)) { return 0; } } /* Merge the alpha presubtract registers */ if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) { - if(!merge_presub_sources(rgb, alpha->Alpha, RC_PAIR_SOURCE_ALPHA)){ + if(!merge_presub_sources(rgb, alpha->Alpha, RC_SOURCE_ALPHA)){ return 0; } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h index 9dcd44c522..45f79ece5b 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h @@ -181,4 +181,9 @@ static inline int rc_presubtract_src_reg_count(rc_presubtract_op op){ return 0; } } + +#define RC_SOURCE_NONE 0x0 +#define RC_SOURCE_RGB 0x1 +#define RC_SOURCE_ALPHA 0x2 + #endif /* RADEON_PROGRAM_CONSTANTS_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c index 582d73b61f..5905d26e52 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c @@ -27,6 +27,8 @@ #include "radeon_program_pair.h" +#include "radeon_compiler_util.h" + #include /** @@ -205,37 +207,35 @@ void rc_pair_foreach_source_that_rgb_reads( } } -/*return 0 for rgb, 1 for alpha -1 for error. */ - -unsigned int rc_source_type_that_arg_reads( - unsigned int source, - unsigned int swizzle) -{ - unsigned int chan; - unsigned int swz = RC_SWIZZLE_UNUSED; - unsigned int ret = RC_PAIR_SOURCE_NONE; - - for(chan = 0; chan < 3; chan++) { - swz = GET_SWZ(swizzle, chan); - if (swz == RC_SWIZZLE_W) { - ret |= RC_PAIR_SOURCE_ALPHA; - } else if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y - || swz == RC_SWIZZLE_Z) { - ret |= RC_PAIR_SOURCE_RGB; - } - } - return ret; -} - struct rc_pair_instruction_source * rc_pair_get_src( struct rc_pair_instruction * pair_inst, struct rc_pair_instruction_arg * arg) { - unsigned int type = rc_source_type_that_arg_reads(arg->Source, - arg->Swizzle); - if (type & RC_PAIR_SOURCE_RGB) { + unsigned int i, type; + unsigned int channels = 0; + + for(i = 0; i < 3; i++) { + if (arg == pair_inst->RGB.Arg + i) { + channels = 3; + break; + } + } + + if (channels == 0) { + for (i = 0; i < 3; i++) { + if (arg == pair_inst->Alpha.Arg + i) { + channels = 1; + break; + } + } + } + + assert(channels > 0); + type = rc_source_type_swz(arg->Swizzle, channels); + + if (type & RC_SOURCE_RGB) { return &pair_inst->RGB.Src[arg->Source]; - } else if (type & RC_PAIR_SOURCE_ALPHA) { + } else if (type & RC_SOURCE_ALPHA) { return &pair_inst->Alpha.Src[arg->Source]; } else { return NULL; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h index 54ca56762b..ccf7a0070c 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h @@ -55,10 +55,6 @@ struct radeon_compiler; */ #define RC_PAIR_PRESUB_SRC 3 -#define RC_PAIR_SOURCE_NONE 0x0 -#define RC_PAIR_SOURCE_RGB 0x1 -#define RC_PAIR_SOURCE_ALPHA 0x2 - struct rc_pair_instruction_source { unsigned int Used:1; unsigned int File:3; @@ -115,10 +111,6 @@ void rc_pair_foreach_source_that_rgb_reads( void * data, rc_pair_foreach_src_fn cb); -unsigned int rc_source_type_that_arg_reads( - unsigned int source, - unsigned int swizzle); - struct rc_pair_instruction_source * rc_pair_get_src( struct rc_pair_instruction * pair_inst, struct rc_pair_instruction_arg * arg); -- cgit v1.2.3 From 7c294462324fd3dc1951fc003290b926fbc0b07e Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Thu, 25 Nov 2010 01:25:03 +0100 Subject: r300/compiler: remove duplicate function rc_mask_to_swz --- src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c | 2 +- src/mesa/drivers/dri/r300/compiler/radeon_program.c | 11 ----------- src/mesa/drivers/dri/r300/compiler/radeon_program.h | 3 +-- 3 files changed, 2 insertions(+), 14 deletions(-) (limited to 'src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c') diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c index cbb5ef6237..9beb5d6357 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c @@ -701,7 +701,7 @@ static int convert_rgb_to_alpha( get_reg_valuep(s, RC_FILE_TEMPORARY, pair_inst->RGB.DestIndex, - rc_mask_to_swz(old_mask)); + rc_mask_to_swizzle(old_mask)); new_index = i; *new_regvalp = *old_regvalp; *old_regvalp = NULL; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.c b/src/mesa/drivers/dri/r300/compiler/radeon_program.c index d7bedc5729..707882882a 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.c @@ -244,14 +244,3 @@ unsigned int rc_recompute_ips(struct radeon_compiler * c) return ip; } - -rc_swizzle rc_mask_to_swizzle(unsigned int mask) -{ - switch(mask) { - case RC_MASK_X: return RC_SWIZZLE_X; - case RC_MASK_Y: return RC_SWIZZLE_Y; - case RC_MASK_Z: return RC_SWIZZLE_Z; - case RC_MASK_W: return RC_SWIZZLE_W; - default: return RC_SWIZZLE_UNUSED; - } -} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h index be078b4f4f..913815b24d 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h @@ -194,14 +194,13 @@ static inline unsigned int combine_swizzles(unsigned int src, unsigned int swz) /** * @param mask Must be either RC_MASK_X, RC_MASK_Y, RC_MASK_Z, or RC_MASK_W */ -static inline rc_swizzle rc_mask_to_swz(unsigned int mask) +static inline rc_swizzle rc_mask_to_swizzle(unsigned int mask) { switch (mask) { case RC_MASK_X: return RC_SWIZZLE_X; case RC_MASK_Y: return RC_SWIZZLE_Y; case RC_MASK_Z: return RC_SWIZZLE_Z; case RC_MASK_W: return RC_SWIZZLE_W; - default: assert(0); } return RC_SWIZZLE_UNUSED; } -- cgit v1.2.3