From 412803b5cd64a5771835366b38b398a2ed8732d8 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 11 Nov 2010 01:28:44 -0800 Subject: r300/compiler: Make sure presubtract sources use supported swizzles NOTE: This is a candidate for the 7.9 branch. --- .../drivers/dri/r300/compiler/r300_fragprog_swizzle.c | 6 ++++-- src/mesa/drivers/dri/r300/compiler/radeon_optimize.c | 15 +++++++++++---- 2 files changed, 15 insertions(+), 6 deletions(-) (limited to 'src/mesa/drivers/dri/r300/compiler') diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c index 2d28b06539..05d3da8a10 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c @@ -94,6 +94,7 @@ static const struct swizzle_data* lookup_native_swizzle(unsigned int swizzle) */ static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) { + const struct swizzle_data* sd; unsigned int relevant; int j; @@ -127,7 +128,8 @@ static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant)) return 0; - if (!lookup_native_swizzle(reg.Swizzle)) + sd = lookup_native_swizzle(reg.Swizzle); + if (!sd || (reg.File == RC_FILE_PRESUB && sd->srcp_stride == 0)) return 0; return 1; @@ -201,7 +203,7 @@ unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle) { const struct swizzle_data* sd = lookup_native_swizzle(swizzle); - if (!sd) { + if (!sd || (src == RC_PAIR_PRESUB_SRC && sd->srcp_stride == 0)) { fprintf(stderr, "Not a native swizzle: %08x\n", swizzle); return 0; } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c index 15b9c5e7dc..96badb0a23 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c @@ -505,7 +505,9 @@ static void presub_replace_add( inst_reader->U.I.SrcReg[src_index].Index = presub_opcode; } -static int is_presub_candidate(struct rc_instruction * inst) +static int is_presub_candidate( + struct radeon_compiler * c, + struct rc_instruction * inst) { const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode); unsigned int i; @@ -514,7 +516,12 @@ static int is_presub_candidate(struct rc_instruction * inst) return 0; for(i = 0; i < info->NumSrcRegs; i++) { - if (src_reads_dst_mask(inst->U.I.SrcReg[i], inst->U.I.DstReg)) + struct rc_src_register src = inst->U.I.SrcReg[i]; + if (src_reads_dst_mask(src, inst->U.I.DstReg)) + return 0; + + src.File = RC_FILE_PRESUB; + if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src)) return 0; } return 1; @@ -528,7 +535,7 @@ static int peephole_add_presub_add( struct rc_src_register * src1 = NULL; unsigned int i; - if (!is_presub_candidate(inst_add)) + if (!is_presub_candidate(c, inst_add)) return 0; if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle) @@ -592,7 +599,7 @@ static int peephole_add_presub_inv( { unsigned int i, swz, mask; - if (!is_presub_candidate(inst_add)) + if (!is_presub_candidate(c, inst_add)) return 0; mask = inst_add->U.I.DstReg.WriteMask; -- cgit v1.2.3 From e2301b45c288cdbd4e763dfbc698d709045f2df5 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Sat, 13 Nov 2010 16:57:06 -0800 Subject: r300/compiler: Fix register allocator's handling of loops NOTE: This is a candidate for the 7.9 branch. --- .../dri/r300/compiler/radeon_pair_regalloc.c | 31 +++++++++++++++------- 1 file changed, 21 insertions(+), 10 deletions(-) (limited to 'src/mesa/drivers/dri/r300/compiler') diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c index 91524f5ec6..3f880c88fa 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c @@ -66,10 +66,13 @@ struct regalloc_state { struct hardware_register * HwTemporary; unsigned int NumHwTemporaries; /** - * If an instruction is inside of a loop, end_loop will be the - * IP of the ENDLOOP instruction, otherwise end_loop will be 0 + * If an instruction is inside of a loop, EndLoop will be the + * IP of the ENDLOOP instruction, and BeginLoop will be the IP + * of the BGNLOOP instruction. Otherwise, EndLoop and BeginLoop + * will be -1. */ - int end_loop; + int EndLoop; + int BeginLoop; }; static void print_live_intervals(struct live_intervals * src) @@ -180,11 +183,13 @@ static void scan_callback(void * data, struct rc_instruction * inst, reg->Used = 1; if (file == RC_FILE_INPUT) reg->Live.Start = -1; + else if (s->BeginLoop >= 0) + reg->Live.Start = s->BeginLoop; else reg->Live.Start = inst->IP; reg->Live.End = inst->IP; - } else if (s->end_loop) - reg->Live.End = s->end_loop; + } else if (s->EndLoop >= 0) + reg->Live.End = s->EndLoop; else if (inst->IP > reg->Live.End) reg->Live.End = inst->IP; } @@ -195,6 +200,8 @@ static void compute_live_intervals(struct radeon_compiler *c, memset(s, 0, sizeof(*s)); s->C = c; s->NumHwTemporaries = c->max_temp_regs; + s->BeginLoop = -1; + s->EndLoop = -1; s->HwTemporary = memory_pool_malloc(&c->Pool, s->NumHwTemporaries * sizeof(struct hardware_register)); @@ -207,8 +214,10 @@ static void compute_live_intervals(struct radeon_compiler *c, inst = inst->Next) { /* For all instructions inside of a loop, the ENDLOOP - * instruction is used as the end of the live interval. */ - if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP && !s->end_loop) { + * instruction is used as the end of the live interval and + * the BGNLOOP instruction is used as the beginning. */ + if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP && s->EndLoop < 0) { + s->BeginLoop = inst->IP; int loops = 1; struct rc_instruction * tmp; for(tmp = inst->Next; @@ -219,15 +228,17 @@ static void compute_live_intervals(struct radeon_compiler *c, } else if (tmp->U.I.Opcode == RC_OPCODE_ENDLOOP) { if(!--loops) { - s->end_loop = tmp->IP; + s->EndLoop = tmp->IP; break; } } } } - if (inst->IP == s->end_loop) - s->end_loop = 0; + if (inst->IP == s->EndLoop) { + s->EndLoop = -1; + s->BeginLoop = -1; + } rc_for_all_reads_mask(inst, scan_callback, s); rc_for_all_writes_mask(inst, scan_callback, s); -- cgit v1.2.3 From 3e5f9789d653726d2602de67e996b73a813ebc2e Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Sat, 13 Nov 2010 17:12:58 -0800 Subject: r300/compiler: Fix instruction scheduling within IF blocks Reads of registers that where not written to within the same block were not being tracked. So in a situations like this: 0: IF 1: ADD t0, t1, t2 2: MOV t2, t1 Instruction 2 didn't know that instruction 1 read from t2, so in some cases instruction 2 was being scheduled before instruction 1. NOTE: This is a candidate for the 7.9 branch. --- .../dri/r300/compiler/radeon_pair_schedule.c | 37 ++++++++++++---------- 1 file changed, 20 insertions(+), 17 deletions(-) (limited to 'src/mesa/drivers/dri/r300/compiler') diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c index 553e9dcf7c..f760a9023d 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c @@ -126,15 +126,6 @@ static struct reg_value ** get_reg_valuep(struct schedule_state * s, return &s->Temporary[index].Values[chan]; } -static struct reg_value * get_reg_value(struct schedule_state * s, - rc_register_file file, unsigned int index, unsigned int chan) -{ - struct reg_value ** pv = get_reg_valuep(s, file, index, chan); - if (!pv) - return 0; - return *pv; -} - static void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst) { inst->NextReady = *list; @@ -591,13 +582,13 @@ static void scan_read(void * data, struct rc_instruction * inst, rc_register_file file, unsigned int index, unsigned int chan) { struct schedule_state * s = data; - struct reg_value * v = get_reg_value(s, file, index, chan); + struct reg_value ** v = get_reg_valuep(s, file, index, chan); struct reg_value_reader * reader; if (!v) return; - if (v->Writer == s->Current) { + if (*v && (*v)->Writer == s->Current) { /* The instruction reads and writes to a register component. * In this case, we only want to increment dependencies by one. */ @@ -608,16 +599,28 @@ static void scan_read(void * data, struct rc_instruction * inst, reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader)); reader->Reader = s->Current; - reader->Next = v->Readers; - v->Readers = reader; - v->NumReaders++; - - s->Current->NumDependencies++; + if (!*v) { + /* In this situation, the instruction reads from a register + * that hasn't been written to or read from in the current + * block. */ + *v = memory_pool_malloc(&s->C->Pool, sizeof(struct reg_value)); + memset(*v, 0, sizeof(struct reg_value)); + (*v)->Readers = reader; + } else { + reader->Next = (*v)->Readers; + (*v)->Readers = reader; + /* Only update the current instruction's dependencies if the + * register it reads from has been written to in this block. */ + if ((*v)->Writer) { + s->Current->NumDependencies++; + } + } + (*v)->NumReaders++; if (s->Current->NumReadValues >= 12) { rc_error(s->C, "%s: NumReadValues overflow\n", __FUNCTION__); } else { - s->Current->ReadValues[s->Current->NumReadValues++] = v; + s->Current->ReadValues[s->Current->NumReadValues++] = *v; } } -- cgit v1.2.3 From d668659003d0164f0d9c805f4127ff8eb64f0624 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 11 Nov 2010 01:01:13 -0800 Subject: r300/compiler: Use zero as the register index for unused sources This fixes an invalid "Too many hardware temporaries used" error in the case where a source reads from a temporary register with an index greater than max_temp_regs and then the source is marked as unused before the register allocation pass. NOTE: This is a candidate for the 7.9 branch. --- src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c | 3 +++ src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c | 3 +++ 2 files changed, 6 insertions(+) (limited to 'src/mesa/drivers/dri/r300/compiler') diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c index 8be32ea91f..1db8678e89 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c @@ -76,6 +76,9 @@ static void use_temporary(struct r300_fragment_program_code *code, unsigned int static unsigned int use_source(struct r300_fragment_program_code* code, struct rc_pair_instruction_source src) { + if (!src.Used) + return 0; + if (src.File == RC_FILE_CONSTANT) { return src.Index | (1 << 5); } else if (src.File == RC_FILE_TEMPORARY) { diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c index 6f101c68eb..0311e7cf9a 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c @@ -200,6 +200,9 @@ static void use_temporary(struct r500_fragment_program_code* code, unsigned int static unsigned int use_source(struct r500_fragment_program_code* code, struct rc_pair_instruction_source src) { + if (!src.Used) + return 0; + if (src.File == RC_FILE_CONSTANT) { return src.Index | 0x100; } else if (src.File == RC_FILE_TEMPORARY) { -- cgit v1.2.3 From 23f577dbd491bd045c47c6378bd23748255eb045 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 11 Nov 2010 01:13:01 -0800 Subject: r300/compiler: Ignore alpha dest register when replicating the result When the result of the alpha instruction is being replicated to the RGB destination register, we do not need to use alpha's destination register. This fixes an invalid "Too many hardware temporaries used" error in the case where a transcendent operation writes to a temporary register greater than max_temp_regs. NOTE: This is a candidate for the 7.9 branch. --- src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri/r300/compiler') diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c index c549be5218..fc05366f50 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c @@ -280,9 +280,12 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c, pair->RGB.DestIndex = inst->DstReg.Index; pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ; } + if (needalpha) { - pair->Alpha.DestIndex = inst->DstReg.Index; pair->Alpha.WriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); + if (pair->Alpha.WriteMask) { + pair->Alpha.DestIndex = inst->DstReg.Index; + } } } -- cgit v1.2.3 From 96f9580160bf769fbdcd005b48c7bf6e92d453f7 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 29 Oct 2010 22:27:04 -0700 Subject: r300/compiler: Add rc_get_readers() --- .../drivers/dri/r300/compiler/radeon_dataflow.c | 306 ++++++++++++++------- .../drivers/dri/r300/compiler/radeon_dataflow.h | 18 +- .../drivers/dri/r300/compiler/radeon_optimize.c | 11 +- .../dri/r300/compiler/radeon_program_pair.c | 16 ++ .../dri/r300/compiler/radeon_program_pair.h | 4 + 5 files changed, 246 insertions(+), 109 deletions(-) (limited to 'src/mesa/drivers/dri/r300/compiler') diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c index fd94194dc3..d757b1715c 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c @@ -139,7 +139,38 @@ static void pair_sub_for_all_args( const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); for(i = 0; i < info->NumSrcRegs; i++) { - cb(userdata, fullinst, &sub->Arg[i]); + unsigned int src_type = rc_source_type_that_arg_reads( + sub->Arg[i].Source, sub->Arg[i].Swizzle); + if (src_type == RC_PAIR_SOURCE_NONE) + continue; + + if (sub->Arg[i].Source == RC_PAIR_PRESUB_SRC) { + unsigned int presub_type; + unsigned int presub_src_count; + struct rc_pair_instruction_source * src_array; + unsigned int j; + if (src_type & RC_PAIR_SOURCE_RGB) { + presub_type = fullinst-> + U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index; + src_array = fullinst->U.P.RGB.Src; + } else { + presub_type = fullinst-> + U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index; + src_array = fullinst->U.P.Alpha.Src; + } + presub_src_count + = rc_presubtract_src_reg_count(presub_type); + for(j = 0; j < presub_src_count; j++) { + cb(userdata, fullinst, &sub->Arg[i], + &src_array[j]); + } + } else { + struct rc_pair_instruction_source * src = + rc_pair_get_src(&fullinst->U.P, &sub->Arg[i]); + if (src) { + cb(userdata, fullinst, &sub->Arg[i], src); + } + } } } @@ -430,11 +461,20 @@ static rc_opcode get_flow_control_inst(struct rc_instruction * inst) } +union get_readers_read_cb { + rc_read_src_fn I; + rc_pair_read_arg_fn P; +}; + struct get_readers_callback_data { struct radeon_compiler * C; struct rc_reader_data * ReaderData; - rc_read_src_fn ReadCB; + rc_read_src_fn ReadNormalCB; + rc_pair_read_arg_fn ReadPairCB; rc_read_write_mask_fn WriteCB; + rc_register_file DstFile; + unsigned int DstIndex; + unsigned int DstMask; unsigned int AliveWriteMask; }; @@ -443,7 +483,7 @@ static void add_reader( struct rc_reader_data * data, struct rc_instruction * inst, unsigned int mask, - struct rc_src_register * src) + void * arg_or_src) { struct rc_reader * new; memory_pool_array_reserve(pool, struct rc_reader, data->Readers, @@ -451,7 +491,74 @@ static void add_reader( new = &data->Readers[data->ReaderCount++]; new->Inst = inst; new->WriteMask = mask; - new->Src = src; + if (inst->Type == RC_INSTRUCTION_NORMAL) { + new->U.Src = arg_or_src; + } else { + new->U.Arg = arg_or_src; + } +} + +static unsigned int get_readers_read_callback( + struct get_readers_callback_data * cb_data, + unsigned int has_rel_addr, + rc_register_file file, + unsigned int index, + unsigned int swizzle) +{ + unsigned int shared_mask, read_mask; + + if (has_rel_addr) { + cb_data->ReaderData->Abort = 1; + return RC_MASK_NONE; + } + + shared_mask = rc_src_reads_dst_mask(file, index, swizzle, + cb_data->DstFile, cb_data->DstIndex, cb_data->AliveWriteMask); + + if (shared_mask == RC_MASK_NONE) + return shared_mask; + + /* If we make it this far, it means that this source reads from the + * same register written to by d->ReaderData->Writer. */ + + if (cb_data->ReaderData->AbortOnRead) { + cb_data->ReaderData->Abort = 1; + return shared_mask; + } + + read_mask = rc_swizzle_to_writemask(swizzle); + /* XXX The behavior in this case should be configurable. */ + if ((read_mask & cb_data->AliveWriteMask) != read_mask) { + cb_data->ReaderData->Abort = 1; + return shared_mask; + } + + return shared_mask; +} + +static void get_readers_pair_read_callback( + void * userdata, + struct rc_instruction * inst, + struct rc_pair_instruction_arg * arg, + struct rc_pair_instruction_source * src) +{ + unsigned int shared_mask; + struct get_readers_callback_data * d = userdata; + + shared_mask = get_readers_read_callback(d, + 0 /*Pair Instructions don't use RelAddr*/, + src->File, src->Index, arg->Swizzle); + + if (shared_mask == RC_MASK_NONE) + return; + + if (d->ReadPairCB) + d->ReadPairCB(d->ReaderData, inst, arg, src); + + if (d->ReaderData->Abort) + return; + + add_reader(&d->C->Pool, d->ReaderData, inst, shared_mask, arg); } /** @@ -464,37 +571,18 @@ static void get_readers_normal_read_callback( struct rc_src_register * src) { struct get_readers_callback_data * d = userdata; - unsigned int read_mask; unsigned int shared_mask; - if (src->RelAddr) - d->ReaderData->Abort = 1; - - shared_mask = rc_src_reads_dst_mask(src->File, src->Index, - src->Swizzle, - d->ReaderData->Writer->U.I.DstReg.File, - d->ReaderData->Writer->U.I.DstReg.Index, - d->AliveWriteMask); + shared_mask = get_readers_read_callback(d, + src->RelAddr, src->File, src->Index, src->Swizzle); if (shared_mask == RC_MASK_NONE) return; + /* The callback function could potentially clear d->ReaderData->Abort, + * so we need to call it before we return. */ + if (d->ReadNormalCB) + d->ReadNormalCB(d->ReaderData, inst, src); - /* If we make it this far, it means that this source reads from the - * same register written to by d->ReaderData->Writer. */ - - if (d->ReaderData->AbortOnRead) { - d->ReaderData->Abort = 1; - return; - } - - read_mask = rc_swizzle_to_writemask(src->Swizzle); - /* XXX The behavior in this case should be configurable. */ - if ((read_mask & d->AliveWriteMask) != read_mask) { - d->ReaderData->Abort = 1; - return; - } - - d->ReadCB(d->ReaderData, inst, src); if (d->ReaderData->Abort) return; @@ -515,10 +603,8 @@ static void get_readers_write_callback( { struct get_readers_callback_data * d = userdata; - if (index == d->ReaderData->Writer->U.I.DstReg.Index - && file == d->ReaderData->Writer->U.I.DstReg.File) { - unsigned int shared_mask = mask - & d->ReaderData->Writer->U.I.DstReg.WriteMask; + if (index == d->DstIndex && file == d->DstFile) { + unsigned int shared_mask = mask & d->DstMask; if (d->ReaderData->InElse) { if (shared_mask & d->AliveWriteMask) { /* We set AbortOnRead here because the @@ -537,7 +623,90 @@ static void get_readers_write_callback( } } - d->WriteCB(d->ReaderData, inst, file, index, mask); + if(d->WriteCB) + d->WriteCB(d->ReaderData, inst, file, index, mask); +} + +static void get_readers_for_single_write( + void * userdata, + struct rc_instruction * writer, + rc_register_file dst_file, + unsigned int dst_index, + unsigned int dst_mask) +{ + struct rc_instruction * tmp; + unsigned int branch_depth = 0; + struct get_readers_callback_data * d = userdata; + + d->ReaderData->Writer = writer; + d->ReaderData->AbortOnRead = 0; + d->ReaderData->InElse = 0; + d->DstFile = dst_file; + d->DstIndex = dst_index; + d->DstMask = dst_mask; + d->AliveWriteMask = dst_mask; + + if (!dst_mask) + return; + + for(tmp = writer->Next; tmp != &d->C->Program.Instructions; + tmp = tmp->Next){ + rc_opcode opcode = get_flow_control_inst(tmp); + switch(opcode) { + case RC_OPCODE_BGNLOOP: + /* XXX We can do better when we see a BGNLOOP if we + * add a flag called AbortOnWrite to struct + * rc_reader_data and leave it set until the next + * ENDLOOP. */ + case RC_OPCODE_ENDLOOP: + /* XXX We can do better when we see an ENDLOOP by + * searching backwards from writer and looking for + * readers of writer's destination index. If we find a + * reader before we get to the BGNLOOP, we must abort + * unless there is another writer between that reader + * and the BGNLOOP. */ + d->ReaderData->Abort = 1; + return; + case RC_OPCODE_IF: + /* XXX We can do better here, but this will have to + * do until this dataflow analysis is more mature. */ + d->ReaderData->Abort = 1; + branch_depth++; + break; + case RC_OPCODE_ELSE: + if (branch_depth == 0) + d->ReaderData->InElse = 1; + break; + case RC_OPCODE_ENDIF: + if (branch_depth == 0) { + d->ReaderData->AbortOnRead = 1; + d->ReaderData->InElse = 0; + } + else { + branch_depth--; + } + break; + default: + break; + } + + if (!d->ReaderData->InElse) { + if (tmp->Type == RC_INSTRUCTION_NORMAL) { + rc_for_all_reads_src(tmp, + get_readers_normal_read_callback, d); + } else { + rc_pair_for_all_reads_arg(tmp, + get_readers_pair_read_callback, d); + } + } + rc_for_all_writes_mask(tmp, get_readers_write_callback, d); + + if (d->ReaderData->Abort) + return; + + if (!d->AliveWriteMask) + return; + } } /** @@ -578,83 +747,26 @@ static void get_readers_write_callback( * @param write_cb This function will be called for every instruction after * writer. */ -void rc_get_readers_normal( +void rc_get_readers( struct radeon_compiler * c, struct rc_instruction * writer, struct rc_reader_data * data, - rc_read_src_fn read_cb, + rc_read_src_fn read_normal_cb, + rc_pair_read_arg_fn read_pair_cb, rc_read_write_mask_fn write_cb) { - struct rc_instruction * tmp; struct get_readers_callback_data d; - unsigned int branch_depth = 0; - data->Writer = writer; data->Abort = 0; - data->AbortOnRead = 0; - data->InElse = 0; data->ReaderCount = 0; data->ReadersReserved = 0; data->Readers = NULL; d.C = c; - d.AliveWriteMask = writer->U.I.DstReg.WriteMask; d.ReaderData = data; - d.ReadCB = read_cb; + d.ReadNormalCB = read_normal_cb; + d.ReadPairCB = read_pair_cb; d.WriteCB = write_cb; - if (!writer->U.I.DstReg.WriteMask) - return; - - for(tmp = writer->Next; tmp != &c->Program.Instructions; - tmp = tmp->Next){ - rc_opcode opcode = get_flow_control_inst(tmp); - switch(opcode) { - case RC_OPCODE_BGNLOOP: - /* XXX We can do better when we see a BGNLOOP if we - * add a flag called AbortOnWrite to struct - * rc_reader_data and leave it set until the next - * ENDLOOP. */ - case RC_OPCODE_ENDLOOP: - /* XXX We can do better when we see an ENDLOOP by - * searching backwards from writer and looking for - * readers of writer's destination index. If we find a - * reader before we get to the BGNLOOP, we must abort - * unless there is another writer between that reader - * and the BGNLOOP. */ - data->Abort = 1; - return; - case RC_OPCODE_IF: - /* XXX We can do better here, but this will have to - * do until this dataflow analysis is more mature. */ - data->Abort = 1; - branch_depth++; - break; - case RC_OPCODE_ELSE: - if (branch_depth == 0) - data->InElse = 1; - break; - case RC_OPCODE_ENDIF: - if (branch_depth == 0) { - data->AbortOnRead = 1; - data->InElse = 0; - } - else { - branch_depth--; - } - break; - default: - break; - } - - if (!data->InElse) - rc_for_all_reads_src(tmp, get_readers_normal_read_callback, &d); - rc_for_all_writes_mask(tmp, get_readers_write_callback, &d); - - if (data->Abort) - return; - - if (!d.AliveWriteMask) - return; - } + rc_for_all_writes_mask(writer, get_readers_for_single_write, &d); } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h index 7de6b98f76..ef971c5b23 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h @@ -36,6 +36,7 @@ struct rc_instruction; struct rc_swizzle_caps; struct rc_src_register; struct rc_pair_instruction_arg; +struct rc_pair_instruction_source; struct rc_compiler; @@ -59,7 +60,8 @@ void rc_for_all_reads_src(struct rc_instruction * inst, rc_read_src_fn cb, void * userdata); typedef void (*rc_pair_read_arg_fn)(void * userdata, - struct rc_instruction * inst, struct rc_pair_instruction_arg * arg); + struct rc_instruction * inst, struct rc_pair_instruction_arg * arg, + struct rc_pair_instruction_source * src); void rc_pair_for_all_reads_arg(struct rc_instruction * inst, rc_pair_read_arg_fn cb, void * userdata); @@ -71,7 +73,10 @@ void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, v struct rc_reader { struct rc_instruction * Inst; unsigned int WriteMask; - struct rc_src_register * Src; + union { + struct rc_src_register * Src; + struct rc_pair_instruction_arg * Arg; + } U; }; struct rc_reader_data { @@ -87,14 +92,13 @@ struct rc_reader_data { void * CbData; }; -void rc_get_readers_normal( +void rc_get_readers( struct radeon_compiler * c, - struct rc_instruction * inst, + struct rc_instruction * writer, struct rc_reader_data * data, - /*XXX: These should be their own function types. */ - rc_read_src_fn read_cb, + rc_read_src_fn read_normal_cb, + rc_pair_read_arg_fn read_pair_cb, rc_read_write_mask_fn write_cb); - /** * Compiler passes based on dataflow analysis. */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c index 96badb0a23..0f707d8c52 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c @@ -149,8 +149,9 @@ static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * i return; /* Get a list of all the readers of this MOV instruction. */ - rc_get_readers_normal(c, inst_mov, &reader_data, - copy_propagate_scan_read, is_src_clobbered_scan_write); + rc_get_readers(c, inst_mov, &reader_data, + copy_propagate_scan_read, NULL, + is_src_clobbered_scan_write); if (reader_data.Abort || reader_data.ReaderCount == 0) return; @@ -158,7 +159,7 @@ static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * i /* Propagate the MOV instruction. */ for (i = 0; i < reader_data.ReaderCount; i++) { struct rc_instruction * inst = reader_data.Readers[i].Inst; - *reader_data.Readers[i].Src = chain_srcregs(*reader_data.Readers[i].Src, inst_mov->U.I.SrcReg[0]); + *reader_data.Readers[i].U.Src = chain_srcregs(*reader_data.Readers[i].U.Src, inst_mov->U.I.SrcReg[0]); if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB) inst->U.I.PreSub = inst_mov->U.I.PreSub; @@ -455,7 +456,7 @@ static int presub_helper( struct rc_reader_data reader_data; unsigned int i; - rc_get_readers_normal(c, inst_add, &reader_data, presub_scan_read, + rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL, is_src_clobbered_scan_write); if (reader_data.Abort || reader_data.ReaderCount == 0) @@ -468,7 +469,7 @@ static int presub_helper( rc_get_opcode_info(reader.Inst->U.I.Opcode); for (src_index = 0; src_index < info->NumSrcRegs; src_index++) { - if (&reader.Inst->U.I.SrcReg[src_index] == reader.Src) + if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.Src) presub_replace(inst_add, reader.Inst, src_index); } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c index a21fe8d3df..582d73b61f 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c @@ -27,6 +27,7 @@ #include "radeon_program_pair.h" +#include /** * Return the source slot where we installed the given register access, @@ -225,3 +226,18 @@ unsigned int rc_source_type_that_arg_reads( } return ret; } + +struct rc_pair_instruction_source * rc_pair_get_src( + struct rc_pair_instruction * pair_inst, + struct rc_pair_instruction_arg * arg) +{ + unsigned int type = rc_source_type_that_arg_reads(arg->Source, + arg->Swizzle); + if (type & RC_PAIR_SOURCE_RGB) { + return &pair_inst->RGB.Src[arg->Source]; + } else if (type & RC_PAIR_SOURCE_ALPHA) { + return &pair_inst->Alpha.Src[arg->Source]; + } else { + return NULL; + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h index 54d44a2098..54ca56762b 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h @@ -118,6 +118,10 @@ void rc_pair_foreach_source_that_rgb_reads( unsigned int rc_source_type_that_arg_reads( unsigned int source, unsigned int swizzle); + +struct rc_pair_instruction_source * rc_pair_get_src( + struct rc_pair_instruction * pair_inst, + struct rc_pair_instruction_arg * arg); /*@}*/ -- cgit v1.2.3 From 255860113f12062c7341c012e6d9a3e6d834ab98 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Sat, 13 Nov 2010 17:00:45 -0800 Subject: r300/compiler: Handle BREAK and CONTINUE in rc_get_readers() --- src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/mesa/drivers/dri/r300/compiler') diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c index d757b1715c..2ce0c6e2b6 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c @@ -665,6 +665,8 @@ static void get_readers_for_single_write( * reader before we get to the BGNLOOP, we must abort * unless there is another writer between that reader * and the BGNLOOP. */ + case RC_OPCODE_BRK: + case RC_OPCODE_CONT: d->ReaderData->Abort = 1; return; case RC_OPCODE_IF: -- cgit v1.2.3 From 681f56af807ed9e2a930a595ef509ee814d130dd Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Sat, 6 Nov 2010 11:30:27 -0700 Subject: r300/compiler: Track readers through branches in rc_get_readers() --- .../drivers/dri/r300/compiler/r500_fragprog_emit.c | 5 +- src/mesa/drivers/dri/r300/compiler/radeon_code.h | 2 + .../drivers/dri/r300/compiler/radeon_dataflow.c | 92 ++++++++++++++-------- .../drivers/dri/r300/compiler/radeon_optimize.c | 4 +- 4 files changed, 65 insertions(+), 38 deletions(-) (limited to 'src/mesa/drivers/dri/r300/compiler') diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c index 0311e7cf9a..5da82d90f6 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c @@ -45,9 +45,6 @@ #include "radeon_program_pair.h" -#define MAX_BRANCH_DEPTH_FULL 32 -#define MAX_BRANCH_DEPTH_PARTIAL 4 - #define PROG_CODE \ struct r500_fragment_program_code *code = &c->code->code.r500 @@ -509,7 +506,7 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst break; } case RC_OPCODE_IF: - if ( s->CurrentBranchDepth >= MAX_BRANCH_DEPTH_FULL) { + if ( s->CurrentBranchDepth >= R500_PFS_MAX_BRANCH_DEPTH_FULL) { rc_error(s->C, "Branch depth exceeds hardware limit"); return; } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h index cfb6df2cd7..b69e81698a 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h @@ -34,6 +34,8 @@ #define R500_PFS_MAX_INST 512 #define R500_PFS_NUM_TEMP_REGS 128 #define R500_PFS_NUM_CONST_REGS 256 +#define R500_PFS_MAX_BRANCH_DEPTH_FULL 32 +#define R500_PFS_MAX_BRANCH_DEPTH_PARTIAL 4 #define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0) diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c index 2ce0c6e2b6..600be8b886 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c @@ -461,6 +461,12 @@ static rc_opcode get_flow_control_inst(struct rc_instruction * inst) } +struct branch_write_mask { + unsigned int IfWriteMask:4; + unsigned int ElseWriteMask:4; + unsigned int HasElse:1; +}; + union get_readers_read_cb { rc_read_src_fn I; rc_pair_read_arg_fn P; @@ -476,6 +482,8 @@ struct get_readers_callback_data { unsigned int DstIndex; unsigned int DstMask; unsigned int AliveWriteMask; + /* For convenience, this is indexed starting at 1 */ + struct branch_write_mask BranchMasks[R500_PFS_MAX_BRANCH_DEPTH_FULL + 1]; }; static void add_reader( @@ -521,12 +529,12 @@ static unsigned int get_readers_read_callback( /* If we make it this far, it means that this source reads from the * same register written to by d->ReaderData->Writer. */ - if (cb_data->ReaderData->AbortOnRead) { + read_mask = rc_swizzle_to_writemask(swizzle); + if (cb_data->ReaderData->AbortOnRead & read_mask) { cb_data->ReaderData->Abort = 1; return shared_mask; } - read_mask = rc_swizzle_to_writemask(swizzle); /* XXX The behavior in this case should be configurable. */ if ((read_mask & cb_data->AliveWriteMask) != read_mask) { cb_data->ReaderData->Abort = 1; @@ -605,22 +613,8 @@ static void get_readers_write_callback( if (index == d->DstIndex && file == d->DstFile) { unsigned int shared_mask = mask & d->DstMask; - if (d->ReaderData->InElse) { - if (shared_mask & d->AliveWriteMask) { - /* We set AbortOnRead here because the - * destination register of d->ReaderData->Writer - * is written to in both the IF and the - * ELSE block of this IF/ELSE statement. - * This means that readers of this - * destination register that follow this IF/ELSE - * statement use the value of different - * instructions depending on the control flow - * decisions made by the program. */ - d->ReaderData->AbortOnRead = 1; - } - } else { - d->AliveWriteMask &= ~shared_mask; - } + d->ReaderData->AbortOnRead &= ~shared_mask; + d->AliveWriteMask &= ~shared_mask; } if(d->WriteCB) @@ -645,6 +639,7 @@ static void get_readers_for_single_write( d->DstIndex = dst_index; d->DstMask = dst_mask; d->AliveWriteMask = dst_mask; + memset(d->BranchMasks, 0, sizeof(d->BranchMasks)); if (!dst_mask) return; @@ -670,21 +665,53 @@ static void get_readers_for_single_write( d->ReaderData->Abort = 1; return; case RC_OPCODE_IF: - /* XXX We can do better here, but this will have to - * do until this dataflow analysis is more mature. */ - d->ReaderData->Abort = 1; branch_depth++; + if (branch_depth > R500_PFS_MAX_BRANCH_DEPTH_FULL) { + d->ReaderData->Abort = 1; + return; + } + d->BranchMasks[branch_depth].IfWriteMask = + d->AliveWriteMask; break; case RC_OPCODE_ELSE: - if (branch_depth == 0) + if (branch_depth == 0) { d->ReaderData->InElse = 1; + } else { + unsigned int temp_mask = d->AliveWriteMask; + d->AliveWriteMask = + d->BranchMasks[branch_depth].IfWriteMask; + d->BranchMasks[branch_depth].ElseWriteMask = + temp_mask; + d->BranchMasks[branch_depth].HasElse = 1; + } break; case RC_OPCODE_ENDIF: if (branch_depth == 0) { - d->ReaderData->AbortOnRead = 1; + d->ReaderData->AbortOnRead = d->AliveWriteMask; d->ReaderData->InElse = 0; } else { + struct branch_write_mask * masks = + &d->BranchMasks[branch_depth]; + + if (masks->HasElse) { + d->ReaderData->AbortOnRead |= + masks->IfWriteMask + & ~masks->ElseWriteMask; + d->AliveWriteMask = masks->IfWriteMask + ^ ((masks->IfWriteMask ^ + masks->ElseWriteMask) + & (masks->IfWriteMask + ^ d->AliveWriteMask)); + } else { + d->ReaderData->AbortOnRead |= + masks->IfWriteMask + & ~d->AliveWriteMask; + d->AliveWriteMask = masks->IfWriteMask; + + } + memset(masks, 0, + sizeof(struct branch_write_mask)); branch_depth--; } break; @@ -692,21 +719,22 @@ static void get_readers_for_single_write( break; } - if (!d->ReaderData->InElse) { - if (tmp->Type == RC_INSTRUCTION_NORMAL) { - rc_for_all_reads_src(tmp, - get_readers_normal_read_callback, d); - } else { - rc_pair_for_all_reads_arg(tmp, - get_readers_pair_read_callback, d); - } + if (d->ReaderData->InElse) + continue; + + if (tmp->Type == RC_INSTRUCTION_NORMAL) { + rc_for_all_reads_src(tmp, + get_readers_normal_read_callback, d); + } else { + rc_pair_for_all_reads_arg(tmp, + get_readers_pair_read_callback, d); } rc_for_all_writes_mask(tmp, get_readers_write_callback, d); if (d->ReaderData->Abort) return; - if (!d->AliveWriteMask) + if (branch_depth == 0 && !d->AliveWriteMask) return; } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c index 0f707d8c52..2eb548474f 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c @@ -112,11 +112,11 @@ static void src_clobbered_reads_cb( && src->Index == sc_data->Index && (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) { - sc_data->ReaderData->AbortOnRead = 1; + sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW; } if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) { - sc_data->ReaderData->AbortOnRead = 1; + sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW; } } -- cgit v1.2.3 From ddceededf850c942c1bb8185ec7dc0c203d3bad6 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 29 Sep 2010 23:52:49 -0700 Subject: r300/compiler: Convert RGB to alpha in the scheduler --- .../dri/r300/compiler/radeon_pair_schedule.c | 310 +++++++++++++++++++-- .../drivers/dri/r300/compiler/radeon_program.c | 11 + .../drivers/dri/r300/compiler/radeon_program.h | 15 + .../dri/r300/compiler/radeon_program_print.c | 1 + 4 files changed, 320 insertions(+), 17 deletions(-) (limited to 'src/mesa/drivers/dri/r300/compiler') diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c index f760a9023d..cce253f11e 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c @@ -54,6 +54,11 @@ struct schedule_instruction { * this instruction can be scheduled. */ unsigned int NumDependencies:5; + + /** List of all readers (see rc_get_readers() for the definition of + * "all readers"), even those outside the basic block this instruction + * lives in. */ + struct rc_reader_data GlobalReaders; }; @@ -94,6 +99,16 @@ struct register_state { struct reg_value * Values[4]; }; +struct remap_reg { + struct rc_instruciont * Inst; + unsigned int OldIndex:(RC_REGISTER_INDEX_BITS+1); + unsigned int OldSwizzle:3; + unsigned int NewIndex:(RC_REGISTER_INDEX_BITS+1); + unsigned int NewSwizzle:3; + unsigned int OnlyTexReads:1; + struct remap_reg * Next; +}; + struct schedule_state { struct radeon_compiler * C; struct schedule_instruction * Current; @@ -516,6 +531,222 @@ static void presub_nop(struct rc_instruction * emitted) { } } } + +static void rgb_to_alpha_remap ( + struct rc_instruction * inst, + struct rc_pair_instruction_arg * arg, + rc_register_file old_file, + rc_swizzle old_swz, + unsigned int new_index) +{ + int new_src_index; + unsigned int i; + struct rc_pair_instruction_source * old_src = + rc_pair_get_src(&inst->U.P, arg); + if (!old_src) { + return; + } + + for (i = 0; i < 3; i++) { + if (get_swz(arg->Swizzle, i) == old_swz) { + SET_SWZ(arg->Swizzle, i, RC_SWIZZLE_W); + } + } + memset(old_src, 0, sizeof(struct rc_pair_instruction_source)); + new_src_index = rc_pair_alloc_source(&inst->U.P, 0, 1, + old_file, new_index); + /* This conversion is not possible, we must have made a mistake in + * is_rgb_to_alpha_possible. */ + if (new_src_index < 0) { + assert(0); + return; + } + + arg->Source = new_src_index; +} + +static int can_remap(unsigned int opcode) +{ + switch(opcode) { + case RC_OPCODE_DDX: + case RC_OPCODE_DDY: + return 0; + default: + return 1; + } +} + +static int can_convert_opcode_to_alpha(unsigned int opcode) +{ + switch(opcode) { + case RC_OPCODE_DDX: + case RC_OPCODE_DDY: + case RC_OPCODE_DP2: + case RC_OPCODE_DP3: + case RC_OPCODE_DP4: + case RC_OPCODE_DPH: + return 0; + default: + return 1; + } +} + +static void is_rgb_to_alpha_possible( + void * userdata, + struct rc_instruction * inst, + struct rc_pair_instruction_arg * arg, + struct rc_pair_instruction_source * src) +{ + unsigned int chan_count = 0; + unsigned int alpha_sources = 0; + unsigned int i; + struct rc_reader_data * reader_data = userdata; + + if (!can_remap(inst->U.P.RGB.Opcode) + || !can_remap(inst->U.P.Alpha.Opcode)) { + reader_data->Abort = 1; + return; + } + + if (!src) + return; + + /* XXX There are some cases where we can still do the conversion if + * a reader reads from a presubtract source, but for now we'll prevent + * it. */ + if (arg->Source == RC_PAIR_PRESUB_SRC) { + reader_data->Abort = 1; + return; + } + + /* Make sure the source only reads from one component. + * XXX We should allow the source to read from the same component twice. + * XXX If the index we will be converting to is the same as the + * current index, then it is OK to read from more than one component. + */ + for (i = 0; i < 3; i++) { + rc_swizzle swz = get_swz(arg->Swizzle, i); + switch(swz) { + case RC_SWIZZLE_X: + case RC_SWIZZLE_Y: + case RC_SWIZZLE_Z: + case RC_SWIZZLE_W: + chan_count++; + break; + default: + break; + } + } + if (chan_count > 1) { + reader_data->Abort = 1; + return; + } + + /* Make sure there are enough alpha sources. + * XXX If we know what register all the readers are going + * to be remapped to, then in some situations we can still do + * the subsitution, even if all 3 alpha sources are being used.*/ + for (i = 0; i < 3; i++) { + if (inst->U.P.Alpha.Src[i].Used) { + alpha_sources++; + } + } + if (alpha_sources > 2) { + reader_data->Abort = 1; + return; + } +} + +static int convert_rgb_to_alpha( + struct schedule_state * s, + struct schedule_instruction * sched_inst) +{ + struct rc_pair_instruction * pair_inst = &sched_inst->Instruction->U.P; + unsigned int old_mask = pair_inst->RGB.WriteMask; + unsigned int old_swz = rc_mask_to_swizzle(old_mask); + const struct rc_opcode_info * info = + rc_get_opcode_info(pair_inst->RGB.Opcode); + int new_index = -1; + unsigned int i; + + if (sched_inst->GlobalReaders.Abort) + return 0; + + if (!pair_inst->RGB.WriteMask) + return 0; + + if (!can_convert_opcode_to_alpha(pair_inst->RGB.Opcode) + || !can_convert_opcode_to_alpha(pair_inst->Alpha.Opcode)) { + return 0; + } + + assert(sched_inst->NumWriteValues == 1); + + if (!sched_inst->WriteValues[0]) { + assert(0); + return 0; + } + + /* We start at the old index, because if we can reuse the same + * register and just change the swizzle then it is more likely we + * will be able to convert all the readers. */ + for (i = pair_inst->RGB.DestIndex; i < RC_REGISTER_MAX_INDEX; i++) { + struct reg_value ** new_regvalp = get_reg_valuep( + s, RC_FILE_TEMPORARY, i, 3); + if (!*new_regvalp) { + struct reg_value ** old_regvalp = + get_reg_valuep(s, + RC_FILE_TEMPORARY, + pair_inst->RGB.DestIndex, + rc_mask_to_swz(old_mask)); + new_index = i; + *new_regvalp = *old_regvalp; + *old_regvalp = NULL; + new_regvalp = get_reg_valuep(s, RC_FILE_TEMPORARY, i, 3); + break; + } + } + if (new_index < 0) { + return 0; + } + + pair_inst->Alpha.Opcode = pair_inst->RGB.Opcode; + pair_inst->Alpha.DestIndex = new_index; + pair_inst->Alpha.WriteMask = 1; + pair_inst->Alpha.Target = pair_inst->RGB.Target; + pair_inst->Alpha.OutputWriteMask = pair_inst->RGB.OutputWriteMask; + pair_inst->Alpha.DepthWriteMask = pair_inst->RGB.DepthWriteMask; + pair_inst->Alpha.Saturate = pair_inst->RGB.Saturate; + memcpy(pair_inst->Alpha.Arg, pair_inst->RGB.Arg, + sizeof(pair_inst->Alpha.Arg)); + /* Move the swizzles into the first chan */ + for (i = 0; i < info->NumSrcRegs; i++) { + unsigned int j; + for (j = 0; j < 3; j++) { + unsigned int swz = get_swz(pair_inst->Alpha.Arg[i].Swizzle, j); + if (swz != RC_SWIZZLE_UNUSED) { + pair_inst->Alpha.Arg[i].Swizzle = swz; + break; + } + } + } + pair_inst->RGB.Opcode = RC_OPCODE_NOP; + pair_inst->RGB.DestIndex = 0; + pair_inst->RGB.WriteMask = 0; + pair_inst->RGB.Target = 0; + pair_inst->RGB.OutputWriteMask = 0; + pair_inst->RGB.DepthWriteMask = 0; + pair_inst->RGB.Saturate = 0; + memset(pair_inst->RGB.Arg, 0, sizeof(pair_inst->RGB.Arg)); + + for(i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) { + struct rc_reader reader = sched_inst->GlobalReaders.Readers[i]; + rgb_to_alpha_remap(reader.Inst, reader.U.Arg, + RC_FILE_TEMPORARY, old_swz, new_index); + } + return 1; +} + /** * Find a good ALU instruction or pair of ALU instruction and emit it. * @@ -527,24 +758,16 @@ static void emit_one_alu(struct schedule_state *s, struct rc_instruction * befor { struct schedule_instruction * sinst; - if (s->ReadyFullALU || !(s->ReadyRGB && s->ReadyAlpha)) { - if (s->ReadyFullALU) { - sinst = s->ReadyFullALU; - s->ReadyFullALU = s->ReadyFullALU->NextReady; - } else if (s->ReadyRGB) { - sinst = s->ReadyRGB; - s->ReadyRGB = s->ReadyRGB->NextReady; - } else { - sinst = s->ReadyAlpha; - s->ReadyAlpha = s->ReadyAlpha->NextReady; - } - + if (s->ReadyFullALU) { + sinst = s->ReadyFullALU; + s->ReadyFullALU = s->ReadyFullALU->NextReady; rc_insert_instruction(before->Prev, sinst->Instruction); commit_alu_instruction(s, sinst); } else { struct schedule_instruction **prgb; struct schedule_instruction **palpha; - + struct schedule_instruction *prev; +pair: /* Some pairings might fail because they require too * many source slots; try all possible pairings if necessary */ for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) { @@ -563,10 +786,43 @@ static void emit_one_alu(struct schedule_state *s, struct rc_instruction * befor goto success; } } - - /* No success in pairing; just take the first RGB instruction */ - sinst = s->ReadyRGB; - s->ReadyRGB = s->ReadyRGB->NextReady; + prev = NULL; + /* No success in pairing, now try to convert one of the RGB + * instructions to an Alpha so we can pair it with another RGB. + */ + if (s->ReadyRGB && s->ReadyRGB->NextReady) { + for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) { + if ((*prgb)->NumWriteValues == 1) { + struct schedule_instruction * prgb_next; + if (!convert_rgb_to_alpha(s, *prgb)) + goto cont_loop; + prgb_next = (*prgb)->NextReady; + /* Add instruction to the Alpha ready list. */ + (*prgb)->NextReady = s->ReadyAlpha; + s->ReadyAlpha = *prgb; + /* Remove instruction from the RGB ready list.*/ + if (prev) + prev->NextReady = prgb_next; + else + s->ReadyRGB = prgb_next; + goto pair; + } +cont_loop: + prev = *prgb; + } + } + /* Still no success in pairing, just take the first RGB + * or alpha instruction. */ + if (s->ReadyRGB) { + sinst = s->ReadyRGB; + s->ReadyRGB = s->ReadyRGB->NextReady; + } else if (s->ReadyAlpha) { + sinst = s->ReadyAlpha; + s->ReadyAlpha = s->ReadyAlpha->NextReady; + } else { + /*XXX Something real bad has happened. */ + assert(0); + } rc_insert_instruction(before->Prev, sinst->Instruction); commit_alu_instruction(s, sinst); @@ -655,6 +911,16 @@ static void scan_write(void * data, struct rc_instruction * inst, } } +static void is_rgb_to_alpha_possible_normal( + void * userdata, + struct rc_instruction * inst, + struct rc_src_register * src) +{ + struct rc_reader_data * reader_data = userdata; + reader_data->Abort = 1; + +} + static void schedule_block(struct r300_fragment_program_compiler * c, struct rc_instruction * begin, struct rc_instruction * end) { @@ -686,6 +952,11 @@ static void schedule_block(struct r300_fragment_program_compiler * c, if (!s.Current->NumDependencies) instruction_ready(&s, s.Current); + + /* Get global readers for possible RGB->Alpha conversion. */ + rc_get_readers(s.C, inst, &s.Current->GlobalReaders, + is_rgb_to_alpha_possible_normal, + is_rgb_to_alpha_possible, NULL); } /* Temporarily unlink all instructions */ @@ -714,8 +985,13 @@ static int is_controlflow(struct rc_instruction * inst) void rc_pair_schedule(struct radeon_compiler *cc, void *user) { + struct schedule_state s; + struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc; struct rc_instruction * inst = c->Base.Program.Instructions.Next; + + memset(&s, 0, sizeof(s)); + s.C = &c->Base; while(inst != &c->Base.Program.Instructions) { struct rc_instruction * first; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.c b/src/mesa/drivers/dri/r300/compiler/radeon_program.c index 24b685fbeb..14dade9be7 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.c @@ -182,3 +182,14 @@ unsigned int rc_recompute_ips(struct radeon_compiler * c) return ip; } + +rc_swizzle rc_mask_to_swizzle(unsigned int mask) +{ + switch(mask) { + case RC_MASK_X: return RC_SWIZZLE_X; + case RC_MASK_Y: return RC_SWIZZLE_Y; + case RC_MASK_Z: return RC_SWIZZLE_Z; + case RC_MASK_W: return RC_SWIZZLE_W; + default: return RC_SWIZZLE_UNUSED; + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h index f0a77d7b53..a4d50d3663 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h @@ -191,6 +191,20 @@ static inline unsigned int combine_swizzles(unsigned int src, unsigned int swz) return ret; } +/** + * @param mask Must be either RC_MASK_X, RC_MASK_Y, RC_MASK_Z, or RC_MASK_W + */ +static inline rc_swizzle rc_mask_to_swz(unsigned int mask) +{ + switch (mask) { + case RC_MASK_X: return RC_SWIZZLE_X; + case RC_MASK_Y: return RC_SWIZZLE_Y; + case RC_MASK_Z: return RC_SWIZZLE_Z; + case RC_MASK_W: return RC_SWIZZLE_W; + default: assert(0); + } + return RC_SWIZZLE_UNUSED; +} struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg); static inline void reset_srcreg(struct rc_src_register* reg) @@ -233,4 +247,5 @@ unsigned int rc_recompute_ips(struct radeon_compiler * c); void rc_print_program(const struct rc_program *prog); +rc_swizzle rc_mask_to_swizzle(unsigned int mask); #endif diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c index 618ab5a099..ae13f6742f 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c @@ -129,6 +129,7 @@ static char rc_swizzle_char(unsigned int swz) case RC_SWIZZLE_HALF: return 'H'; case RC_SWIZZLE_UNUSED: return '_'; } + fprintf(stderr, "bad swz: %u\n", swz); return '?'; } -- cgit v1.2.3 From bbe49bc585c4fed46f55d184b463d13bddd97f1b Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Mon, 8 Nov 2010 18:49:44 -0800 Subject: r300/compiler: Use presubtract operations as much as possible Previously, presubtract operations where only being used by instructions with less than three source source registers. --- .../dri/r300/compiler/radeon_compiler_util.c | 122 +++++++++++++++++++++ .../dri/r300/compiler/radeon_compiler_util.h | 15 +++ .../drivers/dri/r300/compiler/radeon_dataflow.c | 16 ++- .../drivers/dri/r300/compiler/radeon_optimize.c | 36 +++--- .../dri/r300/compiler/radeon_pair_schedule.c | 17 +-- .../dri/r300/compiler/radeon_program_constants.h | 5 + .../dri/r300/compiler/radeon_program_pair.c | 52 ++++----- .../dri/r300/compiler/radeon_program_pair.h | 8 -- 8 files changed, 204 insertions(+), 67 deletions(-) (limited to 'src/mesa/drivers/dri/r300/compiler') diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c index 97f4c75849..2b8d284ce9 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c @@ -31,6 +31,8 @@ #include "radeon_compiler_util.h" +#include "radeon_compiler.h" +#include "radeon_dataflow.h" /** */ unsigned int rc_swizzle_to_writemask(unsigned int swz) @@ -59,3 +61,123 @@ unsigned int rc_src_reads_dst_mask( } return dst_mask & rc_swizzle_to_writemask(src_swz); } + +unsigned int rc_source_type_swz(unsigned int swizzle, unsigned int channels) +{ + unsigned int chan; + unsigned int swz = RC_SWIZZLE_UNUSED; + unsigned int ret = RC_SOURCE_NONE; + + for(chan = 0; chan < channels; chan++) { + swz = GET_SWZ(swizzle, chan); + if (swz == RC_SWIZZLE_W) { + ret |= RC_SOURCE_ALPHA; + } else if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y + || swz == RC_SWIZZLE_Z) { + ret |= RC_SOURCE_RGB; + } + } + return ret; +} + +unsigned int rc_source_type_mask(unsigned int mask) +{ + unsigned int ret = RC_SOURCE_NONE; + + if (mask & RC_MASK_XYZ) + ret |= RC_SOURCE_RGB; + + if (mask & RC_MASK_W) + ret |= RC_SOURCE_ALPHA; + + return ret; +} + +struct can_use_presub_data { + struct rc_src_register RemoveSrcs[3]; + unsigned int RGBCount; + unsigned int AlphaCount; +}; + +static void can_use_presub_read_cb( + void * userdata, + struct rc_instruction * inst, + rc_register_file file, + unsigned int index, + unsigned int mask) +{ + struct can_use_presub_data * d = userdata; + unsigned int src_type = rc_source_type_mask(mask); + unsigned int i; + + if (file == RC_FILE_NONE) + return; + + for(i = 0; i < 3; i++) { + if (d->RemoveSrcs[i].File == file + && d->RemoveSrcs[i].Index == index) { + src_type &= + ~rc_source_type_swz(d->RemoveSrcs[i].Swizzle, 4); + } + } + + if (src_type & RC_SOURCE_RGB) + d->RGBCount++; + + if (src_type & RC_SOURCE_ALPHA) + d->AlphaCount++; +} + +unsigned int rc_inst_can_use_presub( + struct rc_instruction * inst, + rc_presubtract_op presub_op, + unsigned int presub_writemask, + struct rc_src_register replace_reg, + struct rc_src_register presub_src0, + struct rc_src_register presub_src1) +{ + struct can_use_presub_data d; + unsigned int num_presub_srcs; + unsigned int presub_src_type = rc_source_type_mask(presub_writemask); + const struct rc_opcode_info * info = + rc_get_opcode_info(inst->U.I.Opcode); + + if (presub_op == RC_PRESUB_NONE) { + return 1; + } + + if (info->HasTexture) { + return 0; + } + + /* We can't use more than one presubtract value in an + * instruction, unless the two prsubtract operations + * are the same and read from the same registers. + * XXX For now we will limit instructions to only one presubtract + * value.*/ + if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) { + return 0; + } + + memset(&d, 0, sizeof(d)); + d.RemoveSrcs[0] = replace_reg; + d.RemoveSrcs[1] = presub_src0; + d.RemoveSrcs[2] = presub_src1; + + rc_for_all_reads_mask(inst, can_use_presub_read_cb, &d); + + num_presub_srcs = rc_presubtract_src_reg_count(presub_op); + + if ((presub_src_type & RC_SOURCE_RGB) + && d.RGBCount + num_presub_srcs > 3) { + return 0; + } + + if ((presub_src_type & RC_SOURCE_ALPHA) + && d.AlphaCount + num_presub_srcs > 3) { + return 0; + } + + return 1; +} + diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h index 1a14e7cb0e..e50dfbd4fb 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h @@ -3,6 +3,9 @@ #ifndef RADEON_PROGRAM_UTIL_H #define RADEON_PROGRAM_UTIL_H +struct rc_instruction; +struct rc_src_register; + unsigned int rc_swizzle_to_writemask(unsigned int swz); unsigned int rc_src_reads_dst_mask( @@ -13,4 +16,16 @@ unsigned int rc_src_reads_dst_mask( unsigned int dst_idx, unsigned int dst_mask); +unsigned int rc_source_type_swz(unsigned int swizzle, unsigned int channels); + +unsigned int rc_source_type_mask(unsigned int mask); + +unsigned int rc_inst_can_use_presub( + struct rc_instruction * inst, + rc_presubtract_op presub_op, + unsigned int presub_writemask, + struct rc_src_register replace_reg, + struct rc_src_register presub_src0, + struct rc_src_register presub_src1); + #endif /* RADEON_PROGRAM_UTIL_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c index 600be8b886..9df07edf2b 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c @@ -139,9 +139,17 @@ static void pair_sub_for_all_args( const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); for(i = 0; i < info->NumSrcRegs; i++) { - unsigned int src_type = rc_source_type_that_arg_reads( - sub->Arg[i].Source, sub->Arg[i].Swizzle); - if (src_type == RC_PAIR_SOURCE_NONE) + unsigned int src_type; + unsigned int channels = 0; + if (&fullinst->U.P.RGB == sub) + channels = 3; + else if (&fullinst->U.P.Alpha == sub) + channels = 1; + + assert(channels > 0); + src_type = rc_source_type_swz(sub->Arg[i].Swizzle, channels); + + if (src_type == RC_SOURCE_NONE) continue; if (sub->Arg[i].Source == RC_PAIR_PRESUB_SRC) { @@ -149,7 +157,7 @@ static void pair_sub_for_all_args( unsigned int presub_src_count; struct rc_pair_instruction_source * src_array; unsigned int j; - if (src_type & RC_PAIR_SOURCE_RGB) { + if (src_type & RC_SOURCE_RGB) { presub_type = fullinst-> U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index; src_array = fullinst->U.P.RGB.Src; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c index 2eb548474f..27b10ffbd6 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c @@ -71,12 +71,13 @@ static void copy_propagate_scan_read(void * data, struct rc_instruction * inst, { rc_register_file file = src->File; struct rc_reader_data * reader_data = data; - const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode); - /* It is possible to do copy propigation in this situation, - * just not right now, see peephole_add_presub_inv() */ - if (reader_data->Writer->U.I.PreSub.Opcode != RC_PRESUB_NONE && - (info->NumSrcRegs > 2 || info->HasTexture)) { + if(!rc_inst_can_use_presub(inst, + reader_data->Writer->U.I.PreSub.Opcode, + rc_swizzle_to_writemask(src->Swizzle), + *src, + reader_data->Writer->U.I.PreSub.SrcReg[0], + reader_data->Writer->U.I.PreSub.SrcReg[1])) { reader_data->Abort = 1; return; } @@ -424,24 +425,13 @@ static void presub_scan_read( struct rc_src_register * src) { struct rc_reader_data * reader_data = data; - const struct rc_opcode_info * info = - rc_get_opcode_info(inst->U.I.Opcode); - /* XXX: There are some situations where instructions - * with more than 2 src registers can use the - * presubtract select, but to keep things simple we - * will disable presubtract on these instructions for - * now. */ - if (info->NumSrcRegs > 2 || info->HasTexture) { - reader_data->Abort = 1; - return; - } + rc_presubtract_op * presub_opcode = reader_data->CbData; - /* We can't use more than one presubtract value in an - * instruction, unless the two prsubtract operations - * are the same and read from the same registers. - * XXX For now we will limit instructions to only one presubtract - * value.*/ - if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) { + if (!rc_inst_can_use_presub(inst, *presub_opcode, + reader_data->Writer->U.I.DstReg.WriteMask, + *src, + reader_data->Writer->U.I.SrcReg[0], + reader_data->Writer->U.I.SrcReg[1])) { reader_data->Abort = 1; return; } @@ -455,7 +445,9 @@ static int presub_helper( { struct rc_reader_data reader_data; unsigned int i; + rc_presubtract_op cb_op = presub_opcode; + reader_data.CbData = &cb_op; rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL, is_src_clobbered_scan_write); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c index cce253f11e..cbb5ef6237 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c @@ -30,6 +30,7 @@ #include #include "radeon_compiler.h" +#include "radeon_compiler_util.h" #include "radeon_dataflow.h" @@ -301,12 +302,12 @@ static int merge_presub_sources( assert(dst_full->Alpha.Opcode == RC_OPCODE_NOP); switch(type) { - case RC_PAIR_SOURCE_RGB: + case RC_SOURCE_RGB: is_rgb = 1; is_alpha = 0; dst_sub = &dst_full->RGB; break; - case RC_PAIR_SOURCE_ALPHA: + case RC_SOURCE_ALPHA: is_rgb = 0; is_alpha = 1; dst_sub = &dst_full->Alpha; @@ -347,6 +348,8 @@ static int merge_presub_sources( continue; free_source = rc_pair_alloc_source(dst_full, is_rgb, is_alpha, temp.File, temp.Index); + if (free_source < 0) + return 0; one_way = 1; } else { dst_sub->Src[free_source] = temp; @@ -362,11 +365,11 @@ static int merge_presub_sources( for(arg = 0; arg < info->NumSrcRegs; arg++) { /*If this arg does not read from an rgb source, * do nothing. */ - if (!(rc_source_type_that_arg_reads( - dst_full->RGB.Arg[arg].Source, - dst_full->RGB.Arg[arg].Swizzle) & type)) { + if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle, + 3) & type)) { continue; } + if (dst_full->RGB.Arg[arg].Source == srcp_src) dst_full->RGB.Arg[arg].Source = free_source; /* We need to do this just in case register @@ -398,13 +401,13 @@ static int destructive_merge_instructions( /* Merge the rgb presubtract registers. */ if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) { - if (!merge_presub_sources(rgb, alpha->RGB, RC_PAIR_SOURCE_RGB)) { + if (!merge_presub_sources(rgb, alpha->RGB, RC_SOURCE_RGB)) { return 0; } } /* Merge the alpha presubtract registers */ if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) { - if(!merge_presub_sources(rgb, alpha->Alpha, RC_PAIR_SOURCE_ALPHA)){ + if(!merge_presub_sources(rgb, alpha->Alpha, RC_SOURCE_ALPHA)){ return 0; } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h index 9dcd44c522..45f79ece5b 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h @@ -181,4 +181,9 @@ static inline int rc_presubtract_src_reg_count(rc_presubtract_op op){ return 0; } } + +#define RC_SOURCE_NONE 0x0 +#define RC_SOURCE_RGB 0x1 +#define RC_SOURCE_ALPHA 0x2 + #endif /* RADEON_PROGRAM_CONSTANTS_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c index 582d73b61f..5905d26e52 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c @@ -27,6 +27,8 @@ #include "radeon_program_pair.h" +#include "radeon_compiler_util.h" + #include /** @@ -205,37 +207,35 @@ void rc_pair_foreach_source_that_rgb_reads( } } -/*return 0 for rgb, 1 for alpha -1 for error. */ - -unsigned int rc_source_type_that_arg_reads( - unsigned int source, - unsigned int swizzle) -{ - unsigned int chan; - unsigned int swz = RC_SWIZZLE_UNUSED; - unsigned int ret = RC_PAIR_SOURCE_NONE; - - for(chan = 0; chan < 3; chan++) { - swz = GET_SWZ(swizzle, chan); - if (swz == RC_SWIZZLE_W) { - ret |= RC_PAIR_SOURCE_ALPHA; - } else if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y - || swz == RC_SWIZZLE_Z) { - ret |= RC_PAIR_SOURCE_RGB; - } - } - return ret; -} - struct rc_pair_instruction_source * rc_pair_get_src( struct rc_pair_instruction * pair_inst, struct rc_pair_instruction_arg * arg) { - unsigned int type = rc_source_type_that_arg_reads(arg->Source, - arg->Swizzle); - if (type & RC_PAIR_SOURCE_RGB) { + unsigned int i, type; + unsigned int channels = 0; + + for(i = 0; i < 3; i++) { + if (arg == pair_inst->RGB.Arg + i) { + channels = 3; + break; + } + } + + if (channels == 0) { + for (i = 0; i < 3; i++) { + if (arg == pair_inst->Alpha.Arg + i) { + channels = 1; + break; + } + } + } + + assert(channels > 0); + type = rc_source_type_swz(arg->Swizzle, channels); + + if (type & RC_SOURCE_RGB) { return &pair_inst->RGB.Src[arg->Source]; - } else if (type & RC_PAIR_SOURCE_ALPHA) { + } else if (type & RC_SOURCE_ALPHA) { return &pair_inst->Alpha.Src[arg->Source]; } else { return NULL; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h index 54ca56762b..ccf7a0070c 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h @@ -55,10 +55,6 @@ struct radeon_compiler; */ #define RC_PAIR_PRESUB_SRC 3 -#define RC_PAIR_SOURCE_NONE 0x0 -#define RC_PAIR_SOURCE_RGB 0x1 -#define RC_PAIR_SOURCE_ALPHA 0x2 - struct rc_pair_instruction_source { unsigned int Used:1; unsigned int File:3; @@ -115,10 +111,6 @@ void rc_pair_foreach_source_that_rgb_reads( void * data, rc_pair_foreach_src_fn cb); -unsigned int rc_source_type_that_arg_reads( - unsigned int source, - unsigned int swizzle); - struct rc_pair_instruction_source * rc_pair_get_src( struct rc_pair_instruction * pair_inst, struct rc_pair_instruction_arg * arg); -- cgit v1.2.3 From 8833f53e659e079e7ab74bb9197f9b44b1eeefe0 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 10 Nov 2010 21:34:18 -0800 Subject: r300/compiler: Enable rename_reg pass for r500 cards In addition, the rename_reg pass has been rewritten to use rc_get_readers(). --- src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c | 2 +- .../drivers/dri/r300/compiler/radeon_rename_regs.c | 93 ++++------------------ 2 files changed, 16 insertions(+), 79 deletions(-) (limited to 'src/mesa/drivers/dri/r300/compiler') diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index 2f130198d3..7b9c316794 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -137,7 +137,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) /* This pass makes it easier for the scheduler to group TEX * instructions and reduces the chances of creating too * many texture indirections.*/ - {"register rename", 1, !is_r500, rc_rename_regs, NULL}, + {"register rename", 1, !is_r500 || opt, rc_rename_regs, NULL}, {"pair translate", 1, 1, rc_pair_translate, NULL}, {"pair scheduling", 1, 1, rc_pair_schedule, NULL}, {"register allocation", 1, opt, rc_pair_regalloc, NULL}, diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c index 60e228be5b..87ec3dca06 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c @@ -34,99 +34,36 @@ #include "radeon_compiler.h" #include "radeon_dataflow.h" -struct reg_rename { - int old_index; - int new_index; - int temp_index; -}; - -static void rename_reg(void * data, struct rc_instruction * inst, - rc_register_file * file, unsigned int * index) -{ - struct reg_rename *r = data; - - if(r->old_index == *index && *file == RC_FILE_TEMPORARY) { - *index = r->new_index; - } - else if(r->new_index == *index && *file == RC_FILE_TEMPORARY) { - *index = r->temp_index; - } -} - -static void rename_all( - struct radeon_compiler *c, - struct rc_instruction * start, - unsigned int old, - unsigned int new, - unsigned int temp) -{ - struct rc_instruction * inst; - struct reg_rename r; - r.old_index = old; - r.new_index = new; - r.temp_index = temp; - for(inst = start; inst != &c->Program.Instructions; - inst = inst->Next) { - rc_remap_registers(inst, rename_reg, &r); - } -} - /** * This function renames registers in an attempt to get the code close to * SSA form. After this function has completed, most of the register are only - * written to one time, with a few exceptions. For example, this block of code - * will not be modified by this function: - * Mov Temp[0].x Const[0].x - * Mov Temp[0].y Const[0].y - * Basically, destination registers will be renamed if: - * 1. There have been no previous writes to that register - * or - * 2. If the instruction is writting to the exact components (no more, no less) - * of a register that has been written to by previous instructions. + * written to one time, with a few exceptions. * * This function assumes all the instructions are still of type * RC_INSTRUCTION_NORMAL. */ void rc_rename_regs(struct radeon_compiler *c, void *user) { - unsigned int cur_index = 0; - unsigned int icount; + unsigned int new_index, i; struct rc_instruction * inst; - unsigned int * masks; - - /* The number of instructions in the program is also the maximum - * number of temp registers that could potentially be used. */ - icount = rc_recompute_ips(c); - masks = memory_pool_malloc(&c->Pool, icount * sizeof(unsigned int)); - memset(masks, 0, icount * sizeof(unsigned int)); + struct rc_reader_data reader_data; for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { - const struct rc_opcode_info * info; - unsigned int old_index, temp_index; - struct rc_dst_register * dst; - if(inst->Type != RC_INSTRUCTION_NORMAL) { - rc_error(c, "%s only works with normal instructions.", - __FUNCTION__); - return; - } - dst = &inst->U.I.DstReg; - info = rc_get_opcode_info(inst->U.I.Opcode); - if(!info->HasDstReg || dst->File != RC_FILE_TEMPORARY) { + + if (inst->U.I.DstReg.File != RC_FILE_TEMPORARY) continue; + + rc_get_readers(c, inst, &reader_data, NULL, NULL, NULL); + + if (reader_data.Abort || reader_data.ReaderCount == 0) + continue; + + new_index = rc_find_free_temporary(c); + reader_data.Writer->U.I.DstReg.Index = new_index; + for(i = 0; i < reader_data.ReaderCount; i++) { + reader_data.Readers[i].U.Src->Index = new_index; } - if(dst->Index >= icount || !masks[dst->Index] || - masks[dst->Index] == dst->WriteMask) { - old_index = dst->Index; - /* We need to set dst->Index here so get free temporary - * will work. */ - dst->Index = cur_index++; - temp_index = rc_find_free_temporary(c); - rename_all(c, inst->Next, old_index, - dst->Index, temp_index); - } - assert(dst->Index < icount); - masks[dst->Index] |= dst->WriteMask; } } -- cgit v1.2.3 From 1b6ed809729dd3adee4adfa87c227bbd98d9f23a Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 12 Nov 2010 00:59:13 -0800 Subject: r300/compiler: Add a more efficient version of rc_find_free_temporary() --- .../drivers/dri/r300/compiler/radeon_program.c | 104 ++++++++++++++++----- .../drivers/dri/r300/compiler/radeon_program.h | 11 +++ .../drivers/dri/r300/compiler/radeon_rename_regs.c | 18 +++- 3 files changed, 110 insertions(+), 23 deletions(-) (limited to 'src/mesa/drivers/dri/r300/compiler') diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.c b/src/mesa/drivers/dri/r300/compiler/radeon_program.c index 14dade9be7..d7bedc5729 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.c @@ -30,6 +30,7 @@ #include #include "radeon_compiler.h" +#include "radeon_dataflow.h" /** @@ -91,37 +92,98 @@ struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register return tmp; } -unsigned int rc_find_free_temporary(struct radeon_compiler * c) +struct get_used_temporaries_data { + unsigned char * Used; + unsigned int UsedLength; +}; + +static void get_used_temporaries_cb( + void * userdata, + struct rc_instruction * inst, + rc_register_file file, + unsigned int index, + unsigned int mask) { - char used[RC_REGISTER_MAX_INDEX]; - unsigned int i; - struct rc_instruction * rcinst; + struct get_used_temporaries_data * d = userdata; - memset(used, 0, sizeof(used)); + if (file != RC_FILE_TEMPORARY) + return; - for (rcinst = c->Program.Instructions.Next; rcinst != &c->Program.Instructions; rcinst = rcinst->Next) { - const struct rc_sub_instruction *inst = &rcinst->U.I; - const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->Opcode); - unsigned int k; + if (index >= d->UsedLength) + return; - for (k = 0; k < opcode->NumSrcRegs; k++) { - if (inst->SrcReg[k].File == RC_FILE_TEMPORARY) - used[inst->SrcReg[k].Index] = 1; - } + d->Used[index] |= mask; +} - if (opcode->HasDstReg) { - if (inst->DstReg.File == RC_FILE_TEMPORARY) - used[inst->DstReg.Index] = 1; - } +/** + * This function fills in the parameter 'used' with a writemask that + * represent which components of each temporary register are used by the + * program. This is meant to be combined with rc_find_free_temporary_list as a + * more efficient version of rc_find_free_temporary. + * @param used The function does not initialize this parameter. + */ +void rc_get_used_temporaries( + struct radeon_compiler * c, + unsigned char * used, + unsigned int used_length) +{ + struct rc_instruction * inst; + struct get_used_temporaries_data d; + d.Used = used; + d.UsedLength = used_length; + + for(inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; inst = inst->Next) { + + rc_for_all_reads_mask(inst, get_used_temporaries_cb, &d); + rc_for_all_writes_mask(inst, get_used_temporaries_cb, &d); } +} - for (i = 0; i < RC_REGISTER_MAX_INDEX; i++) { - if (!used[i]) +/* Search a list of used temporaries for a free one + * \sa rc_get_used_temporaries + * @note If this functions finds a free temporary, it will mark it as used + * in the used temporary list (param 'used') + * @param used list of used temporaries + * @param used_length number of items in param 'used' + * @param mask which components must be free in the temporary index that is + * returned. + * @return -1 If there are no more free temporaries, otherwise the index of + * a temporary register where the components specified in param 'mask' are + * not being used. + */ +int rc_find_free_temporary_list( + struct radeon_compiler * c, + unsigned char * used, + unsigned int used_length, + unsigned int mask) +{ + int i; + for(i = 0; i < used_length; i++) { + if ((~used[i] & mask) == mask) { + used[i] |= mask; return i; + } } + return -1; +} - rc_error(c, "Ran out of temporary registers\n"); - return 0; +unsigned int rc_find_free_temporary(struct radeon_compiler * c) +{ + unsigned char used[RC_REGISTER_MAX_INDEX]; + int free; + + memset(used, 0, sizeof(used)); + + rc_get_used_temporaries(c, used, RC_REGISTER_MAX_INDEX); + + free = rc_find_free_temporary_list(c, used, RC_REGISTER_MAX_INDEX, + RC_MASK_XYZW); + if (free < 0) { + rc_error(c, "Ran out of temporary registers\n"); + return 0; + } + return free; } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h index a4d50d3663..be078b4f4f 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h @@ -236,6 +236,17 @@ void rc_local_transform( struct radeon_compiler *c, void *user); +void rc_get_used_temporaries( + struct radeon_compiler * c, + unsigned char * used, + unsigned int used_length); + +int rc_find_free_temporary_list( + struct radeon_compiler * c, + unsigned char * used, + unsigned int used_length, + unsigned int mask); + unsigned int rc_find_free_temporary(struct radeon_compiler * c); struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c index 87ec3dca06..88165f7895 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c @@ -33,6 +33,7 @@ #include "radeon_compiler.h" #include "radeon_dataflow.h" +#include "radeon_program.h" /** * This function renames registers in an attempt to get the code close to @@ -44,10 +45,17 @@ */ void rc_rename_regs(struct radeon_compiler *c, void *user) { - unsigned int new_index, i; + unsigned int i, used_length; + int new_index; struct rc_instruction * inst; struct rc_reader_data reader_data; + unsigned char * used; + used_length = 2 * rc_recompute_ips(c); + used = memory_pool_malloc(&c->Pool, sizeof(unsigned char) * used_length); + memset(used, 0, sizeof(unsigned char) * used_length); + + rc_get_used_temporaries(c, used, used_length); for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { @@ -60,7 +68,13 @@ void rc_rename_regs(struct radeon_compiler *c, void *user) if (reader_data.Abort || reader_data.ReaderCount == 0) continue; - new_index = rc_find_free_temporary(c); + new_index = rc_find_free_temporary_list(c, used, used_length, + RC_MASK_XYZW); + if (new_index < 0) { + rc_error(c, "Ran out of temporary registers\n"); + return; + } + reader_data.Writer->U.I.DstReg.Index = new_index; for(i = 0; i < reader_data.ReaderCount; i++) { reader_data.Readers[i].U.Src->Index = new_index; -- cgit v1.2.3 From 4265c2f81980eae37f6081979c65b2d1bae60b82 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Mon, 22 Nov 2010 23:48:47 -0800 Subject: r300/compiler: Don't allow presubtract sources to be remapped twice https://bugs.freedesktop.org/show_bug.cgi?id=31193 NOTE: This is a candidate for the 7.9 branch. --- src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri/r300/compiler') diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c index 9df07edf2b..d0a64d936e 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c @@ -347,6 +347,7 @@ static void remap_normal_instruction(struct rc_instruction * fullinst, { struct rc_sub_instruction * inst = &fullinst->U.I; const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); + unsigned int remapped_presub = 0; if (opcode->HasDstReg) { rc_register_file file = inst->DstReg.File; @@ -366,6 +367,12 @@ static void remap_normal_instruction(struct rc_instruction * fullinst, unsigned int i; unsigned int srcp_srcs = rc_presubtract_src_reg_count( inst->PreSub.Opcode); + /* Make sure we only remap presubtract sources once in + * case more than one source register reads the + * presubtract result. */ + if (remapped_presub) + continue; + for(i = 0; i < srcp_srcs; i++) { file = inst->PreSub.SrcReg[i].File; index = inst->PreSub.SrcReg[i].Index; @@ -373,7 +380,7 @@ static void remap_normal_instruction(struct rc_instruction * fullinst, inst->PreSub.SrcReg[i].File = file; inst->PreSub.SrcReg[i].Index = index; } - + remapped_presub = 1; } else { cb(userdata, fullinst, &file, &index); -- cgit v1.2.3 From 995de71550d124dd93186d7264b2e6fc34a73d57 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Wed, 24 Nov 2010 23:00:03 -0800 Subject: r300/compiler: Move declaration before code. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes this GCC warning with linux-x86 build. radeon_pair_regalloc.c: In function ‘compute_live_intervals’: radeon_pair_regalloc.c:221: warning: ISO C90 forbids mixed declarations and code --- src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri/r300/compiler') diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c index 3f880c88fa..3874c7e418 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c @@ -217,8 +217,8 @@ static void compute_live_intervals(struct radeon_compiler *c, * instruction is used as the end of the live interval and * the BGNLOOP instruction is used as the beginning. */ if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP && s->EndLoop < 0) { - s->BeginLoop = inst->IP; int loops = 1; + s->BeginLoop = inst->IP; struct rc_instruction * tmp; for(tmp = inst->Next; tmp != &s->C->Program.Instructions; -- cgit v1.2.3 From 2cc79acc1ab5cdaadc427526a146c2c27ee0f906 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Wed, 24 Nov 2010 23:03:26 -0800 Subject: r300/compiler: Move declaration before code. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes this GCC warning with linux-x86 build. radeon_pair_regalloc.c: In function ‘compute_live_intervals’: radeon_pair_regalloc.c:222: warning: ISO C90 forbids mixed declarations and code --- src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri/r300/compiler') diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c index 3874c7e418..d53181e1f7 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c @@ -218,8 +218,8 @@ static void compute_live_intervals(struct radeon_compiler *c, * the BGNLOOP instruction is used as the beginning. */ if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP && s->EndLoop < 0) { int loops = 1; - s->BeginLoop = inst->IP; struct rc_instruction * tmp; + s->BeginLoop = inst->IP; for(tmp = inst->Next; tmp != &s->C->Program.Instructions; tmp = tmp->Next) { -- cgit v1.2.3 From ae3e58d973c6fbc10ca1e9880f40aec3a3ec1ecc Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Thu, 25 Nov 2010 00:35:30 +0100 Subject: r300/compiler: fix rc_rewrite_depth_out for it to work with any instruction It looks like the function was originally written for ARB_fragment_program. NOTE: This is a candidate for the 7.9 branch. --- src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c | 29 ++++++---------------- 1 file changed, 8 insertions(+), 21 deletions(-) (limited to 'src/mesa/drivers/dri/r300/compiler') diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index 7b9c316794..ecc5e8219f 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -54,6 +54,8 @@ static void rc_rewrite_depth_out(struct radeon_compiler *cc, void *user) for (rci = c->Base.Program.Instructions.Next; rci != &c->Base.Program.Instructions; rci = rci->Next) { struct rc_sub_instruction * inst = &rci->U.I; + unsigned i; + const struct rc_opcode_info *info = rc_get_opcode_info(inst->Opcode); if (inst->DstReg.File != RC_FILE_OUTPUT || inst->DstReg.Index != c->OutputDepth) continue; @@ -65,27 +67,12 @@ static void rc_rewrite_depth_out(struct radeon_compiler *cc, void *user) continue; } - switch (inst->Opcode) { - case RC_OPCODE_FRC: - case RC_OPCODE_MOV: - inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]); - break; - case RC_OPCODE_ADD: - case RC_OPCODE_MAX: - case RC_OPCODE_MIN: - case RC_OPCODE_MUL: - inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]); - inst->SrcReg[1] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[1]); - break; - case RC_OPCODE_CMP: - case RC_OPCODE_MAD: - inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]); - inst->SrcReg[1] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[1]); - inst->SrcReg[2] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[2]); - break; - default: - // Scalar instructions needn't be reswizzled - break; + if (!info->IsComponentwise) { + continue; + } + + for (i = 0; i < info->NumSrcRegs; i++) { + inst->SrcReg[i] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[i]); } } } -- cgit v1.2.3 From 7c294462324fd3dc1951fc003290b926fbc0b07e Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Thu, 25 Nov 2010 01:25:03 +0100 Subject: r300/compiler: remove duplicate function rc_mask_to_swz --- src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c | 2 +- src/mesa/drivers/dri/r300/compiler/radeon_program.c | 11 ----------- src/mesa/drivers/dri/r300/compiler/radeon_program.h | 3 +-- 3 files changed, 2 insertions(+), 14 deletions(-) (limited to 'src/mesa/drivers/dri/r300/compiler') diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c index cbb5ef6237..9beb5d6357 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c @@ -701,7 +701,7 @@ static int convert_rgb_to_alpha( get_reg_valuep(s, RC_FILE_TEMPORARY, pair_inst->RGB.DestIndex, - rc_mask_to_swz(old_mask)); + rc_mask_to_swizzle(old_mask)); new_index = i; *new_regvalp = *old_regvalp; *old_regvalp = NULL; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.c b/src/mesa/drivers/dri/r300/compiler/radeon_program.c index d7bedc5729..707882882a 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.c @@ -244,14 +244,3 @@ unsigned int rc_recompute_ips(struct radeon_compiler * c) return ip; } - -rc_swizzle rc_mask_to_swizzle(unsigned int mask) -{ - switch(mask) { - case RC_MASK_X: return RC_SWIZZLE_X; - case RC_MASK_Y: return RC_SWIZZLE_Y; - case RC_MASK_Z: return RC_SWIZZLE_Z; - case RC_MASK_W: return RC_SWIZZLE_W; - default: return RC_SWIZZLE_UNUSED; - } -} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h index be078b4f4f..913815b24d 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h @@ -194,14 +194,13 @@ static inline unsigned int combine_swizzles(unsigned int src, unsigned int swz) /** * @param mask Must be either RC_MASK_X, RC_MASK_Y, RC_MASK_Z, or RC_MASK_W */ -static inline rc_swizzle rc_mask_to_swz(unsigned int mask) +static inline rc_swizzle rc_mask_to_swizzle(unsigned int mask) { switch (mask) { case RC_MASK_X: return RC_SWIZZLE_X; case RC_MASK_Y: return RC_SWIZZLE_Y; case RC_MASK_Z: return RC_SWIZZLE_Z; case RC_MASK_W: return RC_SWIZZLE_W; - default: assert(0); } return RC_SWIZZLE_UNUSED; } -- cgit v1.2.3 From ea2f56b49026dce4f0e507b587fd58b18b5de1ec Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Thu, 25 Nov 2010 04:34:31 +0100 Subject: r300/compiler: add a function for swizzling a mask --- src/mesa/drivers/dri/r300/compiler/radeon_optimize.c | 7 +------ src/mesa/drivers/dri/r300/compiler/radeon_program.h | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 6 deletions(-) (limited to 'src/mesa/drivers/dri/r300/compiler') diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c index 27b10ffbd6..44f4c0fbdc 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c @@ -54,12 +54,7 @@ static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct combine.Negate = outer.Negate; } else { combine.Abs = inner.Abs; - combine.Negate = 0; - for(unsigned int chan = 0; chan < 4; ++chan) { - unsigned int swz = GET_SWZ(outer.Swizzle, chan); - if (swz < 4) - combine.Negate |= GET_BIT(inner.Negate, swz) << chan; - } + combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate); combine.Negate ^= outer.Negate; } combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h index 913815b24d..772ea14df3 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h @@ -204,6 +204,20 @@ static inline rc_swizzle rc_mask_to_swizzle(unsigned int mask) } return RC_SWIZZLE_UNUSED; } + +/* Reorder mask bits according to swizzle. */ +static inline unsigned swizzle_mask(unsigned swizzle, unsigned mask) +{ + unsigned ret = 0; + for (unsigned chan = 0; chan < 4; ++chan) { + unsigned swz = GET_SWZ(swizzle, chan); + if (swz < 4) + ret |= GET_BIT(mask, swz) << chan; + } + return ret; +} + + struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg); static inline void reset_srcreg(struct rc_src_register* reg) -- cgit v1.2.3 From e6e6fcd3a674429886aed499f9a63594aa5a0f58 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Thu, 25 Nov 2010 15:07:02 +0100 Subject: r300/compiler: move util functions to radeon_compiler_util The compiler seriously needs a cleanup as far as the arrangement of functions is concerned. It's hard to know whether some function was implemented or not because there are so many places to search in and it can be anywhere and named anyhow. --- src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c | 1 + src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 1 + src/mesa/drivers/dri/r300/compiler/r500_fragprog.c | 1 + .../drivers/dri/r300/compiler/radeon_compiler.c | 1 + .../dri/r300/compiler/radeon_compiler_util.c | 85 ++++++++++++++++++++++ .../dri/r300/compiler/radeon_compiler_util.h | 16 ++++ .../drivers/dri/r300/compiler/radeon_program.c | 21 ------ .../drivers/dri/r300/compiler/radeon_program.h | 68 ----------------- .../drivers/dri/r300/compiler/radeon_program_alu.c | 1 + .../drivers/dri/r300/compiler/radeon_program_tex.c | 2 + 10 files changed, 108 insertions(+), 89 deletions(-) (limited to 'src/mesa/drivers/dri/r300/compiler') diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index ecc5e8219f..23671e3cbc 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -24,6 +24,7 @@ #include +#include "radeon_compiler_util.h" #include "radeon_dataflow.h" #include "radeon_emulate_branches.h" #include "radeon_emulate_loops.h" diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index bf8341f017..a321e26fed 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -26,6 +26,7 @@ #include "../r300_reg.h" +#include "radeon_compiler_util.h" #include "radeon_dataflow.h" #include "radeon_program_alu.h" #include "radeon_swizzle.h" diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c index 289bb87ae5..ef81be48f7 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c @@ -29,6 +29,7 @@ #include +#include "radeon_compiler_util.h" #include "../r300_reg.h" /** diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c index 4286baed0c..b306bd03ae 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c @@ -29,6 +29,7 @@ #include "radeon_dataflow.h" #include "radeon_program.h" #include "radeon_program_pair.h" +#include "radeon_compiler_util.h" void rc_init(struct radeon_compiler * c) diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c index 2b8d284ce9..bf393a9fb1 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c @@ -48,6 +48,91 @@ unsigned int rc_swizzle_to_writemask(unsigned int swz) return mask; } +rc_swizzle get_swz(unsigned int swz, rc_swizzle idx) +{ + if (idx & 0x4) + return idx; + return GET_SWZ(swz, idx); +} + +unsigned int combine_swizzles4(unsigned int src, + rc_swizzle swz_x, rc_swizzle swz_y, rc_swizzle swz_z, rc_swizzle swz_w) +{ + unsigned int ret = 0; + + ret |= get_swz(src, swz_x); + ret |= get_swz(src, swz_y) << 3; + ret |= get_swz(src, swz_z) << 6; + ret |= get_swz(src, swz_w) << 9; + + return ret; +} + +unsigned int combine_swizzles(unsigned int src, unsigned int swz) +{ + unsigned int ret = 0; + + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_X)); + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Y)) << 3; + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Z)) << 6; + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_W)) << 9; + + return ret; +} + +/** + * @param mask Must be either RC_MASK_X, RC_MASK_Y, RC_MASK_Z, or RC_MASK_W + */ +rc_swizzle rc_mask_to_swizzle(unsigned int mask) +{ + switch (mask) { + case RC_MASK_X: return RC_SWIZZLE_X; + case RC_MASK_Y: return RC_SWIZZLE_Y; + case RC_MASK_Z: return RC_SWIZZLE_Z; + case RC_MASK_W: return RC_SWIZZLE_W; + } + return RC_SWIZZLE_UNUSED; +} + +/* Reorder mask bits according to swizzle. */ +unsigned swizzle_mask(unsigned swizzle, unsigned mask) +{ + unsigned ret = 0; + for (unsigned chan = 0; chan < 4; ++chan) { + unsigned swz = GET_SWZ(swizzle, chan); + if (swz < 4) + ret |= GET_BIT(mask, swz) << chan; + } + return ret; +} + +/** + * Left multiplication of a register with a swizzle + */ +struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg) +{ + struct rc_src_register tmp = srcreg; + int i; + tmp.Swizzle = 0; + tmp.Negate = 0; + for(i = 0; i < 4; ++i) { + rc_swizzle swz = GET_SWZ(swizzle, i); + if (swz < 4) { + tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3); + tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i; + } else { + tmp.Swizzle |= swz << (i*3); + } + } + return tmp; +} + +void reset_srcreg(struct rc_src_register* reg) +{ + memset(reg, 0, sizeof(struct rc_src_register)); + reg->Swizzle = RC_SWIZZLE_XYZW; +} + unsigned int rc_src_reads_dst_mask( rc_register_file src_file, unsigned int src_idx, diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h index e50dfbd4fb..461ab9ffb1 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h @@ -8,6 +8,22 @@ struct rc_src_register; unsigned int rc_swizzle_to_writemask(unsigned int swz); +rc_swizzle get_swz(unsigned int swz, rc_swizzle idx); + +unsigned int combine_swizzles4(unsigned int src, + rc_swizzle swz_x, rc_swizzle swz_y, + rc_swizzle swz_z, rc_swizzle swz_w); + +unsigned int combine_swizzles(unsigned int src, unsigned int swz); + +rc_swizzle rc_mask_to_swizzle(unsigned int mask); + +unsigned swizzle_mask(unsigned swizzle, unsigned mask); + +struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg); + +void reset_srcreg(struct rc_src_register* reg); + unsigned int rc_src_reads_dst_mask( rc_register_file src_file, unsigned int src_idx, diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.c b/src/mesa/drivers/dri/r300/compiler/radeon_program.c index 707882882a..fe5756ebc4 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.c @@ -71,27 +71,6 @@ void rc_local_transform( } } -/** - * Left multiplication of a register with a swizzle - */ -struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg) -{ - struct rc_src_register tmp = srcreg; - int i; - tmp.Swizzle = 0; - tmp.Negate = 0; - for(i = 0; i < 4; ++i) { - rc_swizzle swz = GET_SWZ(swizzle, i); - if (swz < 4) { - tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3); - tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i; - } else { - tmp.Swizzle |= swz << (i*3); - } - } - return tmp; -} - struct get_used_temporaries_data { unsigned char * Used; unsigned int UsedLength; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h index 772ea14df3..df6c94b35f 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h @@ -159,74 +159,6 @@ struct rc_program { struct rc_constant_list Constants; }; -static inline rc_swizzle get_swz(unsigned int swz, rc_swizzle idx) -{ - if (idx & 0x4) - return idx; - return GET_SWZ(swz, idx); -} - -static inline unsigned int combine_swizzles4(unsigned int src, - rc_swizzle swz_x, rc_swizzle swz_y, rc_swizzle swz_z, rc_swizzle swz_w) -{ - unsigned int ret = 0; - - ret |= get_swz(src, swz_x); - ret |= get_swz(src, swz_y) << 3; - ret |= get_swz(src, swz_z) << 6; - ret |= get_swz(src, swz_w) << 9; - - return ret; -} - -static inline unsigned int combine_swizzles(unsigned int src, unsigned int swz) -{ - unsigned int ret = 0; - - ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_X)); - ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Y)) << 3; - ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Z)) << 6; - ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_W)) << 9; - - return ret; -} - -/** - * @param mask Must be either RC_MASK_X, RC_MASK_Y, RC_MASK_Z, or RC_MASK_W - */ -static inline rc_swizzle rc_mask_to_swizzle(unsigned int mask) -{ - switch (mask) { - case RC_MASK_X: return RC_SWIZZLE_X; - case RC_MASK_Y: return RC_SWIZZLE_Y; - case RC_MASK_Z: return RC_SWIZZLE_Z; - case RC_MASK_W: return RC_SWIZZLE_W; - } - return RC_SWIZZLE_UNUSED; -} - -/* Reorder mask bits according to swizzle. */ -static inline unsigned swizzle_mask(unsigned swizzle, unsigned mask) -{ - unsigned ret = 0; - for (unsigned chan = 0; chan < 4; ++chan) { - unsigned swz = GET_SWZ(swizzle, chan); - if (swz < 4) - ret |= GET_BIT(mask, swz) << chan; - } - return ret; -} - - -struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg); - -static inline void reset_srcreg(struct rc_src_register* reg) -{ - memset(reg, 0, sizeof(struct rc_src_register)); - reg->Swizzle = RC_SWIZZLE_XYZW; -} - - /** * A transformation that can be passed to \ref rc_local_transform. * diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c index 39408845d5..106e03495d 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c @@ -36,6 +36,7 @@ #include "radeon_program_alu.h" #include "radeon_compiler.h" +#include "radeon_compiler_util.h" static struct rc_instruction *emit1( diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c index 530afa5e08..f9d9f34b6a 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c @@ -28,6 +28,8 @@ #include "radeon_program_tex.h" +#include "radeon_compiler_util.h" + /* Series of transformations to be done on textures. */ static struct rc_src_register shadow_ambient(struct r300_fragment_program_compiler *compiler, -- cgit v1.2.3 From e6d798948e00e255b80a69562a7d262257f77ee5 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Wed, 1 Dec 2010 16:44:22 +0100 Subject: r300/compiler: implement and lower OPCODE_CLAMP Needed for st/vega. --- src/gallium/drivers/r300/r300_tgsi_to_rc.c | 2 +- src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c | 7 +++++++ src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h | 3 +++ .../drivers/dri/r300/compiler/radeon_program_alu.c | 18 ++++++++++++++++++ 4 files changed, 29 insertions(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri/r300/compiler') diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 33448bf0de..15a323989b 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -57,7 +57,7 @@ static unsigned translate_opcode(unsigned opcode) /* case TGSI_OPCODE_DP2A: return RC_OPCODE_DP2A; */ /* gap */ case TGSI_OPCODE_FRC: return RC_OPCODE_FRC; - /* case TGSI_OPCODE_CLAMP: return RC_OPCODE_CLAMP; */ + case TGSI_OPCODE_CLAMP: return RC_OPCODE_CLAMP; case TGSI_OPCODE_FLR: return RC_OPCODE_FLR; /* case TGSI_OPCODE_ROUND: return RC_OPCODE_ROUND; */ case TGSI_OPCODE_EX2: return RC_OPCODE_EX2; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c index da495a3afa..113b27632a 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c @@ -66,6 +66,13 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { .HasDstReg = 1, .IsComponentwise = 1 }, + { + .Opcode = RC_OPCODE_CLAMP, + .Name = "CLAMP", + .NumSrcRegs = 3, + .HasDstReg = 1, + .IsComponentwise = 1 + }, { .Opcode = RC_OPCODE_CMP, .Name = "CMP", diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h index d3f639c870..7e66610127 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h @@ -50,6 +50,9 @@ typedef enum { /** vec4 instruction: dst.c = ceil(src0.c) */ RC_OPCODE_CEIL, + /** vec4 instruction: dst.c = clamp(src0.c, src1.c, src2.c) */ + RC_OPCODE_CLAMP, + /** vec4 instruction: dst.c = src0.c < 0.0 ? src1.c : src2.c */ RC_OPCODE_CMP, diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c index 106e03495d..01c2e74e7b 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c @@ -217,6 +217,22 @@ static void transform_CEIL(struct radeon_compiler* c, rc_remove_instruction(inst); } +static void transform_CLAMP(struct radeon_compiler *c, + struct rc_instruction *inst) +{ + /* CLAMP dst, src, min, max + * into: + * MIN tmp, src, max + * MAX dst, tmp, min + */ + int tempreg = rc_find_free_temporary(c); + emit2(c, inst->Prev, RC_OPCODE_MIN, 0, dstreg(RC_FILE_TEMPORARY, tempreg), + inst->U.I.SrcReg[0], inst->U.I.SrcReg[2]); + emit2(c, inst->Prev, RC_OPCODE_MAX, inst->U.I.SaturateMode, inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[1]); + rc_remove_instruction(inst); +} + static void transform_DP2(struct radeon_compiler* c, struct rc_instruction* inst) { @@ -554,6 +570,7 @@ int radeonTransformALU( switch(inst->U.I.Opcode) { case RC_OPCODE_ABS: transform_ABS(c, inst); return 1; case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1; + case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1; case RC_OPCODE_DP2: transform_DP2(c, inst); return 1; case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; case RC_OPCODE_DST: transform_DST(c, inst); return 1; @@ -782,6 +799,7 @@ int r300_transform_vertex_alu( switch(inst->U.I.Opcode) { case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1; case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1; + case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1; case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1; case RC_OPCODE_DP2: transform_r300_vertex_DP2(c, inst); return 1; case RC_OPCODE_DP3: transform_r300_vertex_DP3(c, inst); return 1; -- cgit v1.2.3 From 23390e2f5cf1050f98c70c8603e374191e7d84e7 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Thu, 2 Dec 2010 15:56:38 +0100 Subject: r300/compiler: disable the swizzle lowering pass in vertex shaders It was a no-op because all swizzles are native there. --- src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src/mesa/drivers/dri/r300/compiler') diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index a321e26fed..dab593310e 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -1063,7 +1063,6 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c) {"dataflow optimize", 1, opt, rc_optimize, NULL}, /* This pass must be done after optimizations. */ {"source conflict resolve", 1, 1, rc_local_transform, resolve_src_conflicts}, - {"dataflow swizzles", 1, 1, rc_dataflow_swizzles, NULL}, {"register allocation", 1, opt, allocate_temporary_registers, NULL}, {"dead constants", 1, kill_consts, rc_remove_unused_constants, &c->code->constants_remap_table}, {"final code validation", 0, 1, rc_validate_final_shader, NULL}, -- cgit v1.2.3 From 431b4c0c84424bb80820c3b189aed901fdb605b2 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Tue, 7 Dec 2010 21:45:34 +0100 Subject: r300/compiler: don't terminate regalloc if we surpass max temps limit The same check is already in a later pass (translate_vertex_program). --- src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) (limited to 'src/mesa/drivers/dri/r300/compiler') diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index dab593310e..0152bc9056 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -791,19 +791,14 @@ static void allocate_temporary_registers(struct radeon_compiler *c, void *user) if (!hwtemps[j]) break; } - if (j >= c->max_temp_regs) { - rc_error(c, "Too many temporaries\n"); - return; + ta[orig].Allocated = 1; + if (last_inst_src_reladdr && + last_inst_src_reladdr->IP > inst->IP) { + ta[orig].HwTemp = orig; } else { - ta[orig].Allocated = 1; - if (last_inst_src_reladdr && - last_inst_src_reladdr->IP > inst->IP) { - ta[orig].HwTemp = orig; - } else { - ta[orig].HwTemp = j; - } - hwtemps[ta[orig].HwTemp] = 1; + ta[orig].HwTemp = j; } + hwtemps[ta[orig].HwTemp] = 1; } inst->U.I.DstReg.Index = ta[orig].HwTemp; -- cgit v1.2.3 From 2592a8f506568784c5e97bfd7ba306348d6007ed Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Tue, 7 Dec 2010 23:34:21 +0100 Subject: r300/compiler: add a function to query program stats (alu, tex, temps..) --- .../drivers/dri/r300/compiler/radeon_compiler.c | 42 ++++++++++++++-------- .../drivers/dri/r300/compiler/radeon_compiler.h | 12 +++++++ 2 files changed, 39 insertions(+), 15 deletions(-) (limited to 'src/mesa/drivers/dri/r300/compiler') diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c index b306bd03ae..7cd86fb3ed 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c @@ -357,44 +357,54 @@ void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face) static void reg_count_callback(void * userdata, struct rc_instruction * inst, rc_register_file file, unsigned int index, unsigned int mask) { - unsigned int * max_reg = userdata; + int *max_reg = userdata; if (file == RC_FILE_TEMPORARY) - index > *max_reg ? *max_reg = index : 0; + (int)index > *max_reg ? *max_reg = index : 0; } -static void print_stats(struct radeon_compiler * c) +void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s) { + int max_reg = -1; struct rc_instruction * tmp; - unsigned max_reg, insts, fc, tex, alpha, rgb, presub; - max_reg = insts = fc = tex = alpha = rgb = presub = 0; + memset(s, 0, sizeof(*s)); + for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions; tmp = tmp->Next){ const struct rc_opcode_info * info; rc_for_all_reads_mask(tmp, reg_count_callback, &max_reg); if (tmp->Type == RC_INSTRUCTION_NORMAL) { if (tmp->U.I.PreSub.Opcode != RC_PRESUB_NONE) - presub++; + s->num_presub_ops++; info = rc_get_opcode_info(tmp->U.I.Opcode); } else { if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) - presub++; + s->num_presub_ops++; if (tmp->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used) - presub++; + s->num_presub_ops++; /* Assuming alpha will never be a flow control or * a tex instruction. */ if (tmp->U.P.Alpha.Opcode != RC_OPCODE_NOP) - alpha++; + s->num_alpha_insts++; if (tmp->U.P.RGB.Opcode != RC_OPCODE_NOP) - rgb++; + s->num_rgb_insts++; info = rc_get_opcode_info(tmp->U.P.RGB.Opcode); } if (info->IsFlowControl) - fc++; + s->num_fc_insts++; if (info->HasTexture) - tex++; - insts++; + s->num_tex_insts++; + s->num_insts++; } - if (insts < 4) + s->num_temp_regs = max_reg + 1; +} + +static void print_stats(struct radeon_compiler * c) +{ + struct rc_program_stats s; + + rc_get_stats(c, &s); + + if (s.num_insts < 4) return; fprintf(stderr,"~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" "~%4u Instructions\n" @@ -405,7 +415,9 @@ static void print_stats(struct radeon_compiler * c) "~%4u Presub Operations\n" "~%4u Temporary Registers\n" "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - insts, rgb, alpha, fc, tex, presub, max_reg + 1); + s.num_insts, s.num_rgb_insts, s.num_alpha_insts, + s.num_fc_insts, s.num_tex_insts, s.num_presub_ops, + s.num_temp_regs); } /* Executes a list of compiler passes given in the parameter 'list'. */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h index 31fd469a04..2aa7978010 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h @@ -140,6 +140,18 @@ struct radeon_compiler_pass { void *user; /* Optional parameter which is passed to the run function. */ }; +struct rc_program_stats { + unsigned num_insts; + unsigned num_fc_insts; + unsigned num_tex_insts; + unsigned num_rgb_insts; + unsigned num_alpha_insts; + unsigned num_presub_ops; + unsigned num_temp_regs; +}; + +void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s); + /* Executes a list of compiler passes given in the parameter 'list'. */ void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list, const char *shader_name); -- cgit v1.2.3 From 8fac29d49e2b87c058cb6332211721c979fc231e Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Wed, 8 Dec 2010 00:18:05 +0100 Subject: r300/compiler: cleanup rc_run_compiler --- src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c | 3 +- src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 3 +- .../drivers/dri/r300/compiler/radeon_compiler.c | 34 +++++++++++++++------- .../drivers/dri/r300/compiler/radeon_compiler.h | 11 +++++-- 4 files changed, 36 insertions(+), 15 deletions(-) (limited to 'src/mesa/drivers/dri/r300/compiler') diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index 23671e3cbc..3716deaa7e 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -138,9 +138,10 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) {NULL, 0, 0, NULL, NULL} }; + c->Base.type = RC_FRAGMENT_PROGRAM; c->Base.SwizzleCaps = c->Base.is_r500 ? &r500_swizzle_caps : &r300_swizzle_caps; - rc_run_compiler(&c->Base, fs_list, "Fragment Program"); + rc_run_compiler(&c->Base, fs_list); rc_constants_copy(&c->code->constants, &c->Base.Program.Constants); } diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index 0152bc9056..54f4cf3316 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -1066,9 +1066,10 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c) {NULL, 0, 0, NULL, NULL} }; + c->Base.type = RC_VERTEX_PROGRAM; c->Base.SwizzleCaps = &r300_vertprog_swizzle_caps; - rc_run_compiler(&c->Base, vs_list, "Vertex Program"); + rc_run_compiler(&c->Base, vs_list); c->code->InputsRead = c->Base.Program.InputsRead; c->code->OutputsWritten = c->Base.Program.OutputsWritten; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c index 7cd86fb3ed..72673d75e5 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c @@ -398,7 +398,7 @@ void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s) s->num_temp_regs = max_reg + 1; } -static void print_stats(struct radeon_compiler * c) +static void print_stats(struct radeon_compiler * c, const char *shader) { struct rc_program_stats s; @@ -407,6 +407,7 @@ static void print_stats(struct radeon_compiler * c) if (s.num_insts < 4) return; fprintf(stderr,"~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + "~ %s:\n" "~%4u Instructions\n" "~%4u Vector Instructions (RGB)\n" "~%4u Scalar Instructions (Alpha)\n" @@ -415,20 +416,19 @@ static void print_stats(struct radeon_compiler * c) "~%4u Presub Operations\n" "~%4u Temporary Registers\n" "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + shader, s.num_insts, s.num_rgb_insts, s.num_alpha_insts, s.num_fc_insts, s.num_tex_insts, s.num_presub_ops, s.num_temp_regs); } -/* Executes a list of compiler passes given in the parameter 'list'. */ -void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list, - const char *shader_name) -{ - if (c->Debug & RC_DBG_LOG) { - fprintf(stderr, "%s: before compilation\n", shader_name); - rc_print_program(&c->Program); - } +static const char *shader_name[RC_NUM_PROGRAM_TYPES] = { + "Vertex Program", + "Fragment Program" +}; +void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list) +{ for (unsigned i = 0; list[i].name; i++) { if (list[i].predicate) { list[i].run(c, list[i].user); @@ -437,13 +437,25 @@ void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *lis return; if ((c->Debug & RC_DBG_LOG) && list[i].dump) { - fprintf(stderr, "%s: after '%s'\n", shader_name, list[i].name); + fprintf(stderr, "%s: after '%s'\n", shader_name[c->type], list[i].name); rc_print_program(&c->Program); } } } +} + +/* Executes a list of compiler passes given in the parameter 'list'. */ +void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list) +{ + if (c->Debug & RC_DBG_LOG) { + fprintf(stderr, "%s: before compilation\n", shader_name[c->type]); + rc_print_program(&c->Program); + } + + rc_run_compiler_passes(c, list); + if (c->Debug & RC_DBG_STATS) - print_stats(c); + print_stats(c, shader_name[c->type]); } void rc_validate_final_shader(struct radeon_compiler *c, void *user) diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h index 2aa7978010..e663339589 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h @@ -35,9 +35,16 @@ struct rc_swizzle_caps; +enum rc_program_type { + RC_VERTEX_PROGRAM, + RC_FRAGMENT_PROGRAM, + RC_NUM_PROGRAM_TYPES +}; + struct radeon_compiler { struct memory_pool Pool; struct rc_program Program; + enum rc_program_type type; unsigned Debug:2; unsigned Error:1; char * ErrorMsg; @@ -153,8 +160,8 @@ struct rc_program_stats { void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s); /* Executes a list of compiler passes given in the parameter 'list'. */ -void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list, - const char *shader_name); +void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list); +void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list); void rc_validate_final_shader(struct radeon_compiler *c, void *user); #endif /* RADEON_COMPILER_H */ -- cgit v1.2.3 From 95080fb50f24bc96fdbb380ef386ee7b9cc164ec Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Wed, 8 Dec 2010 01:58:00 +0100 Subject: r300/compiler: do not print pair/tex/presub program stats for vertex shaders --- .../drivers/dri/r300/compiler/radeon_compiler.c | 46 ++++++++++++++-------- 1 file changed, 30 insertions(+), 16 deletions(-) (limited to 'src/mesa/drivers/dri/r300/compiler') diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c index 72673d75e5..65548604bc 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c @@ -398,7 +398,7 @@ void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s) s->num_temp_regs = max_reg + 1; } -static void print_stats(struct radeon_compiler * c, const char *shader) +static void print_stats(struct radeon_compiler * c) { struct rc_program_stats s; @@ -406,20 +406,34 @@ static void print_stats(struct radeon_compiler * c, const char *shader) if (s.num_insts < 4) return; - fprintf(stderr,"~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" - "~ %s:\n" - "~%4u Instructions\n" - "~%4u Vector Instructions (RGB)\n" - "~%4u Scalar Instructions (Alpha)\n" - "~%4u Flow Control Instructions\n" - "~%4u Texture Instructions\n" - "~%4u Presub Operations\n" - "~%4u Temporary Registers\n" - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - shader, - s.num_insts, s.num_rgb_insts, s.num_alpha_insts, - s.num_fc_insts, s.num_tex_insts, s.num_presub_ops, - s.num_temp_regs); + + switch (c->type) { + case RC_VERTEX_PROGRAM: + fprintf(stderr,"~~~~~~~~~ VERTEX PROGRAM ~~~~~~~~\n" + "~%4u Instructions\n" + "~%4u Flow Control Instructions\n" + "~%4u Temporary Registers\n" + "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n", + s.num_insts, s.num_fc_insts, s.num_temp_regs); + break; + + case RC_FRAGMENT_PROGRAM: + fprintf(stderr,"~~~~~~~~ FRAGMENT PROGRAM ~~~~~~~\n" + "~%4u Instructions\n" + "~%4u Vector Instructions (RGB)\n" + "~%4u Scalar Instructions (Alpha)\n" + "~%4u Flow Control Instructions\n" + "~%4u Texture Instructions\n" + "~%4u Presub Operations\n" + "~%4u Temporary Registers\n" + "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n", + s.num_insts, s.num_rgb_insts, s.num_alpha_insts, + s.num_fc_insts, s.num_tex_insts, s.num_presub_ops, + s.num_temp_regs); + break; + default: + assert(0); + } } static const char *shader_name[RC_NUM_PROGRAM_TYPES] = { @@ -455,7 +469,7 @@ void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *lis rc_run_compiler_passes(c, list); if (c->Debug & RC_DBG_STATS) - print_stats(c, shader_name[c->type]); + print_stats(c); } void rc_validate_final_shader(struct radeon_compiler *c, void *user) -- cgit v1.2.3 From 93f2df07606132519e8eed349ff2f6910e9fbbd6 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Wed, 8 Dec 2010 01:59:33 +0100 Subject: r300/compiler: handle DPH and XPD in rc_compute_sources_for_writemask This bug can only be triggered if you put deadcode before native rewrite. --- src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/mesa/drivers/dri/r300/compiler') diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c index 113b27632a..25afd272be 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c @@ -460,6 +460,7 @@ void rc_compute_sources_for_writemask( srcmasks[1] |= RC_MASK_XY; break; case RC_OPCODE_DP3: + case RC_OPCODE_XPD: srcmasks[0] |= RC_MASK_XYZ; srcmasks[1] |= RC_MASK_XYZ; break; @@ -467,6 +468,10 @@ void rc_compute_sources_for_writemask( srcmasks[0] |= RC_MASK_XYZW; srcmasks[1] |= RC_MASK_XYZW; break; + case RC_OPCODE_DPH: + srcmasks[0] |= RC_MASK_XYZ; + srcmasks[1] |= RC_MASK_XYZW; + break; case RC_OPCODE_TXB: case RC_OPCODE_TXP: srcmasks[0] |= RC_MASK_W; -- cgit v1.2.3 From 2ff9d4474bdf5f05852ad4963d0b597d20743678 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Tue, 7 Dec 2010 21:57:18 +0100 Subject: r300/compiler: make lowering passes possibly use up to two less temps CMP may now use two less temps, other non-native instructions may end up using one less temp, except for SIN/COS/SCS, which I am leaving unchanged for now. This may reduce register pressure inside loops, because the register allocator doesn't do a very good job there. --- .../drivers/dri/r300/compiler/radeon_program_alu.c | 149 ++++++++++++--------- 1 file changed, 86 insertions(+), 63 deletions(-) (limited to 'src/mesa/drivers/dri/r300/compiler') diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c index 01c2e74e7b..58977a40c7 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c @@ -85,16 +85,6 @@ static struct rc_instruction *emit3( return fpi; } -static struct rc_dst_register dstreg(int file, int index) -{ - struct rc_dst_register dst; - dst.File = file; - dst.Index = index; - dst.WriteMask = RC_MASK_XYZW; - dst.RelAddr = 0; - return dst; -} - static struct rc_dst_register dstregtmpmask(int index, int mask) { struct rc_dst_register dst = {0}; @@ -187,6 +177,38 @@ static struct rc_src_register swizzle_wwww(struct rc_src_register reg) return swizzle_smear(reg, RC_SWIZZLE_W); } +static int is_dst_safe_to_reuse(struct rc_instruction *inst) +{ + const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode); + unsigned i; + + assert(info->HasDstReg); + + if (inst->U.I.DstReg.File != RC_FILE_TEMPORARY) + return 0; + + for (i = 0; i < info->NumSrcRegs; i++) { + if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY && + inst->U.I.SrcReg[i].Index == inst->U.I.DstReg.Index) + return 0; + } + + return 1; +} + +static struct rc_dst_register try_to_reuse_dst(struct radeon_compiler *c, + struct rc_instruction *inst) +{ + unsigned tmp; + + if (is_dst_safe_to_reuse(inst)) + tmp = inst->U.I.DstReg.Index; + else + tmp = rc_find_free_temporary(c); + + return dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask); +} + static void transform_ABS(struct radeon_compiler* c, struct rc_instruction* inst) { @@ -210,10 +232,10 @@ static void transform_CEIL(struct radeon_compiler* c, * ceil(x) = x+frac(-x) */ - int tempreg = rc_find_free_temporary(c); - emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0])); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, negate(inst->U.I.SrcReg[0])); emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg, - inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, tempreg)); + inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index)); rc_remove_instruction(inst); } @@ -225,11 +247,11 @@ static void transform_CLAMP(struct radeon_compiler *c, * MIN tmp, src, max * MAX dst, tmp, min */ - int tempreg = rc_find_free_temporary(c); - emit2(c, inst->Prev, RC_OPCODE_MIN, 0, dstreg(RC_FILE_TEMPORARY, tempreg), + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + emit2(c, inst->Prev, RC_OPCODE_MIN, 0, dst, inst->U.I.SrcReg[0], inst->U.I.SrcReg[2]); emit2(c, inst->Prev, RC_OPCODE_MAX, inst->U.I.SaturateMode, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[1]); + srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1]); rc_remove_instruction(inst); } @@ -275,10 +297,10 @@ static void transform_DST(struct radeon_compiler* c, static void transform_FLR(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); - emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0]); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, inst->U.I.SrcReg[0]); emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg, - inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, tempreg))); + inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, dst.Index))); rc_remove_instruction(inst); } @@ -368,14 +390,14 @@ static void transform_LIT(struct radeon_compiler* c, static void transform_LRP(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); emit2(c, inst->Prev, RC_OPCODE_ADD, 0, - dstreg(RC_FILE_TEMPORARY, tempreg), + dst, inst->U.I.SrcReg[1], negate(inst->U.I.SrcReg[2])); emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, inst->U.I.DstReg, - inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[2]); + inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[2]); rc_remove_instruction(inst); } @@ -383,9 +405,8 @@ static void transform_LRP(struct radeon_compiler* c, static void transform_POW(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); - struct rc_dst_register tempdst = dstreg(RC_FILE_TEMPORARY, tempreg); - struct rc_src_register tempsrc = srcreg(RC_FILE_TEMPORARY, tempreg); + struct rc_dst_register tempdst = try_to_reuse_dst(c, inst); + struct rc_src_register tempsrc = srcreg(RC_FILE_TEMPORARY, tempdst.Index); tempdst.WriteMask = RC_MASK_W; tempsrc.Swizzle = RC_SWIZZLE_WWWW; @@ -405,11 +426,11 @@ static void transform_RSQ(struct radeon_compiler* c, static void transform_SEQ(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); - emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, - negate(absolute(srcreg(RC_FILE_TEMPORARY, tempreg))), builtin_zero, builtin_one); + negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_zero, builtin_one); rc_remove_instruction(inst); } @@ -424,11 +445,11 @@ static void transform_SFL(struct radeon_compiler* c, static void transform_SGE(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); - emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, tempreg), builtin_zero, builtin_one); + srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one); rc_remove_instruction(inst); } @@ -436,11 +457,11 @@ static void transform_SGE(struct radeon_compiler* c, static void transform_SGT(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); - emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, tempreg), builtin_one, builtin_zero); + srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero); rc_remove_instruction(inst); } @@ -448,11 +469,11 @@ static void transform_SGT(struct radeon_compiler* c, static void transform_SLE(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); - emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, tempreg), builtin_zero, builtin_one); + srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one); rc_remove_instruction(inst); } @@ -460,11 +481,11 @@ static void transform_SLE(struct radeon_compiler* c, static void transform_SLT(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); - emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, tempreg), builtin_one, builtin_zero); + srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero); rc_remove_instruction(inst); } @@ -472,11 +493,11 @@ static void transform_SLT(struct radeon_compiler* c, static void transform_SNE(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); - emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, - negate(absolute(srcreg(RC_FILE_TEMPORARY, tempreg))), builtin_one, builtin_zero); + negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_one, builtin_zero); rc_remove_instruction(inst); } @@ -490,12 +511,13 @@ static void transform_SSG(struct radeon_compiler* c, * CMP tmp1, x, 1, 0 * ADD result, tmp0, -tmp1; */ - unsigned tmp0, tmp1; + struct rc_dst_register dst0; + unsigned tmp1; /* 0 < x */ - tmp0 = rc_find_free_temporary(c); + dst0 = try_to_reuse_dst(c, inst); emit3(c, inst->Prev, RC_OPCODE_CMP, 0, - dstregtmpmask(tmp0, inst->U.I.DstReg.WriteMask), + dst0, negate(inst->U.I.SrcReg[0]), builtin_one, builtin_zero); @@ -512,7 +534,7 @@ static void transform_SSG(struct radeon_compiler* c, /* result = tmp0 - tmp1 */ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, tmp0), + srcreg(RC_FILE_TEMPORARY, dst0.Index), negate(srcreg(RC_FILE_TEMPORARY, tmp1))); rc_remove_instruction(inst); @@ -534,15 +556,15 @@ static void transform_SWZ(struct radeon_compiler* c, static void transform_XPD(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); - emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstreg(RC_FILE_TEMPORARY, tempreg), + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dst, swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W), swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W)); emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, inst->U.I.DstReg, swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W), swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W), - negate(srcreg(RC_FILE_TEMPORARY, tempreg))); + negate(srcreg(RC_FILE_TEMPORARY, dst.Index))); rc_remove_instruction(inst); } @@ -610,7 +632,7 @@ static void transform_r300_vertex_CMP(struct radeon_compiler* c, { /* There is no decent CMP available, so let's rig one up. * CMP is defined as dst = src0 < 0.0 ? src1 : src2 - * The following sequence consumes two temps and two extra slots + * The following sequence consumes zero to two temps and two extra slots * (the second temp and the second slot is consumed by transform_LRP), * but should be equivalent: * @@ -618,18 +640,18 @@ static void transform_r300_vertex_CMP(struct radeon_compiler* c, * LRP dst, tmp0, src1, src2 * * Yes, I know, I'm a mad scientist. ~ C. & M. */ - int tempreg0 = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); /* SLT tmp0, src0, 0.0 */ emit2(c, inst->Prev, RC_OPCODE_SLT, 0, - dstreg(RC_FILE_TEMPORARY, tempreg0), + dst, inst->U.I.SrcReg[0], builtin_zero); /* LRP dst, tmp0, src1, src2 */ transform_LRP(c, emit3(c, inst->Prev, RC_OPCODE_LRP, 0, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, tempreg0), inst->U.I.SrcReg[1], inst->U.I.SrcReg[2])); + srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1], inst->U.I.SrcReg[2])); rc_remove_instruction(inst); } @@ -660,7 +682,7 @@ static void transform_r300_vertex_DP3(struct radeon_compiler* c, static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); unsigned constant_swizzle; int constant = rc_constants_add_immediate_scalar(&c->Program.Constants, 0.0000000000000000001, @@ -668,16 +690,16 @@ static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c, /* MOV dst, src */ emit1(c, inst->Prev, RC_OPCODE_MOV, 0, - dstreg(RC_FILE_TEMPORARY, tempreg), + dst, inst->U.I.SrcReg[0]); /* MAX dst.z, src, 0.00...001 */ emit2(c, inst->Prev, RC_OPCODE_MAX, 0, - dstregtmpmask(tempreg, RC_MASK_Y), - srcreg(RC_FILE_TEMPORARY, tempreg), + dstregtmpmask(dst.Index, RC_MASK_Y), + srcreg(RC_FILE_TEMPORARY, dst.Index), srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle)); - inst->U.I.SrcReg[0] = srcreg(RC_FILE_TEMPORARY, tempreg); + inst->U.I.SrcReg[0] = srcreg(RC_FILE_TEMPORARY, dst.Index); } static void transform_r300_vertex_SEQ(struct radeon_compiler *c, @@ -761,12 +783,13 @@ static void transform_r300_vertex_SSG(struct radeon_compiler* c, * SLT tmp1, x, 0; * ADD result, tmp0, -tmp1; */ - unsigned tmp0, tmp1; + struct rc_dst_register dst0 = try_to_reuse_dst(c, inst); + unsigned tmp1; /* 0 < x */ - tmp0 = rc_find_free_temporary(c); + dst0 = try_to_reuse_dst(c, inst); emit2(c, inst->Prev, RC_OPCODE_SLT, 0, - dstregtmpmask(tmp0, inst->U.I.DstReg.WriteMask), + dst0, builtin_zero, inst->U.I.SrcReg[0]); @@ -781,7 +804,7 @@ static void transform_r300_vertex_SSG(struct radeon_compiler* c, /* result = tmp0 - tmp1 */ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, tmp0), + srcreg(RC_FILE_TEMPORARY, dst0.Index), negate(srcreg(RC_FILE_TEMPORARY, tmp1))); rc_remove_instruction(inst); -- cgit v1.2.3 From c4f9e55d61a75751a202f8028f64893c4425f541 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Wed, 8 Dec 2010 04:27:58 +0100 Subject: r300/compiler: remove at least unused immediates if externals cannot be removed --- src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c | 3 +-- src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 3 +-- src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c | 8 ++++---- 3 files changed, 6 insertions(+), 8 deletions(-) (limited to 'src/mesa/drivers/dri/r300/compiler') diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index 3716deaa7e..e0d349b98c 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -81,7 +81,6 @@ static void rc_rewrite_depth_out(struct radeon_compiler *cc, void *user) void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) { int is_r500 = c->Base.is_r500; - int kill_consts = c->Base.remove_unused_constants; int opt = !c->Base.disable_optimizations; /* Lists of instruction transformations. */ @@ -121,7 +120,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) {"emulate loops", 1, !is_r500, rc_emulate_loops, NULL}, {"dataflow optimize", 1, opt, rc_optimize, NULL}, {"dataflow swizzles", 1, 1, rc_dataflow_swizzles, NULL}, - {"dead constants", 1, kill_consts, rc_remove_unused_constants, &c->code->constants_remap_table}, + {"dead constants", 1, 1, rc_remove_unused_constants, &c->code->constants_remap_table}, /* This pass makes it easier for the scheduler to group TEX * instructions and reduces the chances of creating too * many texture indirections.*/ diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index 54f4cf3316..472029f63d 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -1014,7 +1014,6 @@ static struct rc_swizzle_caps r300_vertprog_swizzle_caps = { void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c) { int is_r500 = c->Base.is_r500; - int kill_consts = c->Base.remove_unused_constants; int opt = !c->Base.disable_optimizations; /* Lists of instruction transformations. */ @@ -1059,7 +1058,7 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c) /* This pass must be done after optimizations. */ {"source conflict resolve", 1, 1, rc_local_transform, resolve_src_conflicts}, {"register allocation", 1, opt, allocate_temporary_registers, NULL}, - {"dead constants", 1, kill_consts, rc_remove_unused_constants, &c->code->constants_remap_table}, + {"dead constants", 1, 1, rc_remove_unused_constants, &c->code->constants_remap_table}, {"final code validation", 0, 1, rc_validate_final_shader, NULL}, {"machine code generation", 0, 1, translate_vertex_program, NULL}, {"dump machine code", 0, c->Base.Debug & RC_DBG_LOG, r300_vertex_program_dump, NULL}, diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c b/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c index 5f67f536f6..7d76585a59 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c @@ -87,8 +87,9 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user) rc_for_all_reads_src(inst, mark_used, &d); } - /* Pass 2: If there is relative addressing, mark all externals as used. */ - if (has_rel_addr) { + /* Pass 2: If there is relative addressing or dead constant elimination + * is disabled, mark all externals as used. */ + if (has_rel_addr || !c->remove_unused_constants) { for (unsigned i = 0; i < c->Program.Constants.Count; i++) if (constants[i].Type == RC_CONSTANT_EXTERNAL) const_used[i] = 1; @@ -119,7 +120,7 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user) /* is_identity ==> new_count == old_count * !is_identity ==> new_count < old_count */ assert( is_identity || new_count < c->Program.Constants.Count); - assert(!(has_rel_addr && are_externals_remapped)); + assert(!((has_rel_addr || !c->remove_unused_constants) && are_externals_remapped)); /* Pass 4: Redirect reads of all constants to their new locations. */ if (!is_identity) { @@ -127,7 +128,6 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user) inst != &c->Program.Instructions; inst = inst->Next) { rc_remap_registers(inst, remap_regs, inv_remap_table); } - } /* Set the new constant count. Note that new_count may be less than -- cgit v1.2.3 From 9e1fbd3d6e2a8accd0ebdc5937dd6eb7a8ef9b10 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 11 Dec 2010 07:32:24 +0100 Subject: r300/compiler: fix LIT in VS This fixes broken rendering of trees in ETQW. The trees still disappear for an unknown reason when they are close. Broken since: 2ff9d4474bdf5f05852ad4963d0b597d20743678 r300/compiler: make lowering passes possibly use up to two less temps NOTE: This is a candidate for the 7.10 branch. --- src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri/r300/compiler') diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c index 58977a40c7..c8063171b8 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c @@ -689,11 +689,12 @@ static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c, &constant_swizzle); /* MOV dst, src */ + dst.WriteMask = RC_MASK_XYZW; emit1(c, inst->Prev, RC_OPCODE_MOV, 0, dst, inst->U.I.SrcReg[0]); - /* MAX dst.z, src, 0.00...001 */ + /* MAX dst.y, src, 0.00...001 */ emit2(c, inst->Prev, RC_OPCODE_MAX, 0, dstregtmpmask(dst.Index, RC_MASK_Y), srcreg(RC_FILE_TEMPORARY, dst.Index), -- cgit v1.2.3 From d0990db6bd7d6e9e1cd780dc6b50ce00408a3cbc Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 11 Dec 2010 13:22:19 +0100 Subject: r300/compiler: fix swizzle lowering with a presubtract source operand If a source operand has a non-native swizzle (e.g. the KIL instruction cannot have a swizzle other than .xyzw), the lowering pass uses one or more MOV instructions to move the operand to an intermediate temporary with native swizzles. This commit fixes that the presubtract information was lost during the lowering. NOTE: This is a candidate for both the 7.9 and 7.10 branches. --- src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/mesa/drivers/dri/r300/compiler') diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c index a0f7bd8174..133a9f72ec 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c @@ -56,6 +56,7 @@ static void rewrite_source(struct radeon_compiler * c, mov->U.I.DstReg.Index = tempreg; mov->U.I.DstReg.WriteMask = split.Phase[phase]; mov->U.I.SrcReg[0] = inst->U.I.SrcReg[src]; + mov->U.I.PreSub = inst->U.I.PreSub; phase_refmask = 0; for(unsigned int chan = 0; chan < 4; ++chan) { -- cgit v1.2.3