diff options
Diffstat (limited to 'src/mesa/drivers/dri/r300')
25 files changed, 1128 insertions, 499 deletions
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c index 8be32ea91f..1db8678e89 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c @@ -76,6 +76,9 @@ static void use_temporary(struct r300_fragment_program_code *code, unsigned int static unsigned int use_source(struct r300_fragment_program_code* code, struct rc_pair_instruction_source src) { + if (!src.Used) + return 0; + if (src.File == RC_FILE_CONSTANT) { return src.Index | (1 << 5); } else if (src.File == RC_FILE_TEMPORARY) { diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c index 2d28b06539..05d3da8a10 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c @@ -94,6 +94,7 @@ static const struct swizzle_data* lookup_native_swizzle(unsigned int swizzle) */ static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) { + const struct swizzle_data* sd; unsigned int relevant; int j; @@ -127,7 +128,8 @@ static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant)) return 0; - if (!lookup_native_swizzle(reg.Swizzle)) + sd = lookup_native_swizzle(reg.Swizzle); + if (!sd || (reg.File == RC_FILE_PRESUB && sd->srcp_stride == 0)) return 0; return 1; @@ -201,7 +203,7 @@ unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle) { const struct swizzle_data* sd = lookup_native_swizzle(swizzle); - if (!sd) { + if (!sd || (src == RC_PAIR_PRESUB_SRC && sd->srcp_stride == 0)) { fprintf(stderr, "Not a native swizzle: %08x\n", swizzle); return 0; } diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index 2f130198d3..7b9c316794 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -137,7 +137,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) /* This pass makes it easier for the scheduler to group TEX * instructions and reduces the chances of creating too * many texture indirections.*/ - {"register rename", 1, !is_r500, rc_rename_regs, NULL}, + {"register rename", 1, !is_r500 || opt, rc_rename_regs, NULL}, {"pair translate", 1, 1, rc_pair_translate, NULL}, {"pair scheduling", 1, 1, rc_pair_schedule, NULL}, {"register allocation", 1, opt, rc_pair_regalloc, NULL}, diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c index 6f101c68eb..5da82d90f6 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c @@ -45,9 +45,6 @@ #include "radeon_program_pair.h" -#define MAX_BRANCH_DEPTH_FULL 32 -#define MAX_BRANCH_DEPTH_PARTIAL 4 - #define PROG_CODE \ struct r500_fragment_program_code *code = &c->code->code.r500 @@ -200,6 +197,9 @@ static void use_temporary(struct r500_fragment_program_code* code, unsigned int static unsigned int use_source(struct r500_fragment_program_code* code, struct rc_pair_instruction_source src) { + if (!src.Used) + return 0; + if (src.File == RC_FILE_CONSTANT) { return src.Index | 0x100; } else if (src.File == RC_FILE_TEMPORARY) { @@ -506,7 +506,7 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst break; } case RC_OPCODE_IF: - if ( s->CurrentBranchDepth >= MAX_BRANCH_DEPTH_FULL) { + if ( s->CurrentBranchDepth >= R500_PFS_MAX_BRANCH_DEPTH_FULL) { rc_error(s->C, "Branch depth exceeds hardware limit"); return; } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h index cfb6df2cd7..b69e81698a 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h @@ -34,6 +34,8 @@ #define R500_PFS_MAX_INST 512 #define R500_PFS_NUM_TEMP_REGS 128 #define R500_PFS_NUM_CONST_REGS 256 +#define R500_PFS_MAX_BRANCH_DEPTH_FULL 32 +#define R500_PFS_MAX_BRANCH_DEPTH_PARTIAL 4 #define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0) diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c index 97f4c75849..2b8d284ce9 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c @@ -31,6 +31,8 @@ #include "radeon_compiler_util.h" +#include "radeon_compiler.h" +#include "radeon_dataflow.h" /** */ unsigned int rc_swizzle_to_writemask(unsigned int swz) @@ -59,3 +61,123 @@ unsigned int rc_src_reads_dst_mask( } return dst_mask & rc_swizzle_to_writemask(src_swz); } + +unsigned int rc_source_type_swz(unsigned int swizzle, unsigned int channels) +{ + unsigned int chan; + unsigned int swz = RC_SWIZZLE_UNUSED; + unsigned int ret = RC_SOURCE_NONE; + + for(chan = 0; chan < channels; chan++) { + swz = GET_SWZ(swizzle, chan); + if (swz == RC_SWIZZLE_W) { + ret |= RC_SOURCE_ALPHA; + } else if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y + || swz == RC_SWIZZLE_Z) { + ret |= RC_SOURCE_RGB; + } + } + return ret; +} + +unsigned int rc_source_type_mask(unsigned int mask) +{ + unsigned int ret = RC_SOURCE_NONE; + + if (mask & RC_MASK_XYZ) + ret |= RC_SOURCE_RGB; + + if (mask & RC_MASK_W) + ret |= RC_SOURCE_ALPHA; + + return ret; +} + +struct can_use_presub_data { + struct rc_src_register RemoveSrcs[3]; + unsigned int RGBCount; + unsigned int AlphaCount; +}; + +static void can_use_presub_read_cb( + void * userdata, + struct rc_instruction * inst, + rc_register_file file, + unsigned int index, + unsigned int mask) +{ + struct can_use_presub_data * d = userdata; + unsigned int src_type = rc_source_type_mask(mask); + unsigned int i; + + if (file == RC_FILE_NONE) + return; + + for(i = 0; i < 3; i++) { + if (d->RemoveSrcs[i].File == file + && d->RemoveSrcs[i].Index == index) { + src_type &= + ~rc_source_type_swz(d->RemoveSrcs[i].Swizzle, 4); + } + } + + if (src_type & RC_SOURCE_RGB) + d->RGBCount++; + + if (src_type & RC_SOURCE_ALPHA) + d->AlphaCount++; +} + +unsigned int rc_inst_can_use_presub( + struct rc_instruction * inst, + rc_presubtract_op presub_op, + unsigned int presub_writemask, + struct rc_src_register replace_reg, + struct rc_src_register presub_src0, + struct rc_src_register presub_src1) +{ + struct can_use_presub_data d; + unsigned int num_presub_srcs; + unsigned int presub_src_type = rc_source_type_mask(presub_writemask); + const struct rc_opcode_info * info = + rc_get_opcode_info(inst->U.I.Opcode); + + if (presub_op == RC_PRESUB_NONE) { + return 1; + } + + if (info->HasTexture) { + return 0; + } + + /* We can't use more than one presubtract value in an + * instruction, unless the two prsubtract operations + * are the same and read from the same registers. + * XXX For now we will limit instructions to only one presubtract + * value.*/ + if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) { + return 0; + } + + memset(&d, 0, sizeof(d)); + d.RemoveSrcs[0] = replace_reg; + d.RemoveSrcs[1] = presub_src0; + d.RemoveSrcs[2] = presub_src1; + + rc_for_all_reads_mask(inst, can_use_presub_read_cb, &d); + + num_presub_srcs = rc_presubtract_src_reg_count(presub_op); + + if ((presub_src_type & RC_SOURCE_RGB) + && d.RGBCount + num_presub_srcs > 3) { + return 0; + } + + if ((presub_src_type & RC_SOURCE_ALPHA) + && d.AlphaCount + num_presub_srcs > 3) { + return 0; + } + + return 1; +} + diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h index 1a14e7cb0e..e50dfbd4fb 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h @@ -3,6 +3,9 @@ #ifndef RADEON_PROGRAM_UTIL_H #define RADEON_PROGRAM_UTIL_H +struct rc_instruction; +struct rc_src_register; + unsigned int rc_swizzle_to_writemask(unsigned int swz); unsigned int rc_src_reads_dst_mask( @@ -13,4 +16,16 @@ unsigned int rc_src_reads_dst_mask( unsigned int dst_idx, unsigned int dst_mask); +unsigned int rc_source_type_swz(unsigned int swizzle, unsigned int channels); + +unsigned int rc_source_type_mask(unsigned int mask); + +unsigned int rc_inst_can_use_presub( + struct rc_instruction * inst, + rc_presubtract_op presub_op, + unsigned int presub_writemask, + struct rc_src_register replace_reg, + struct rc_src_register presub_src0, + struct rc_src_register presub_src1); + #endif /* RADEON_PROGRAM_UTIL_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c index 5927498818..9df07edf2b 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c @@ -139,7 +139,46 @@ static void pair_sub_for_all_args( const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); for(i = 0; i < info->NumSrcRegs; i++) { - cb(userdata, fullinst, &sub->Arg[i]); + unsigned int src_type; + unsigned int channels = 0; + if (&fullinst->U.P.RGB == sub) + channels = 3; + else if (&fullinst->U.P.Alpha == sub) + channels = 1; + + assert(channels > 0); + src_type = rc_source_type_swz(sub->Arg[i].Swizzle, channels); + + if (src_type == RC_SOURCE_NONE) + continue; + + if (sub->Arg[i].Source == RC_PAIR_PRESUB_SRC) { + unsigned int presub_type; + unsigned int presub_src_count; + struct rc_pair_instruction_source * src_array; + unsigned int j; + if (src_type & RC_SOURCE_RGB) { + presub_type = fullinst-> + U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index; + src_array = fullinst->U.P.RGB.Src; + } else { + presub_type = fullinst-> + U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index; + src_array = fullinst->U.P.Alpha.Src; + } + presub_src_count + = rc_presubtract_src_reg_count(presub_type); + for(j = 0; j < presub_src_count; j++) { + cb(userdata, fullinst, &sub->Arg[i], + &src_array[j]); + } + } else { + struct rc_pair_instruction_source * src = + rc_pair_get_src(&fullinst->U.P, &sub->Arg[i]); + if (src) { + cb(userdata, fullinst, &sub->Arg[i], src); + } + } } } @@ -430,12 +469,29 @@ static rc_opcode get_flow_control_inst(struct rc_instruction * inst) } +struct branch_write_mask { + unsigned int IfWriteMask:4; + unsigned int ElseWriteMask:4; + unsigned int HasElse:1; +}; + +union get_readers_read_cb { + rc_read_src_fn I; + rc_pair_read_arg_fn P; +}; + struct get_readers_callback_data { struct radeon_compiler * C; struct rc_reader_data * ReaderData; - rc_read_src_fn ReadCB; + rc_read_src_fn ReadNormalCB; + rc_pair_read_arg_fn ReadPairCB; rc_read_write_mask_fn WriteCB; + rc_register_file DstFile; + unsigned int DstIndex; + unsigned int DstMask; unsigned int AliveWriteMask; + /* For convenience, this is indexed starting at 1 */ + struct branch_write_mask BranchMasks[R500_PFS_MAX_BRANCH_DEPTH_FULL + 1]; }; static void add_reader( @@ -443,7 +499,7 @@ static void add_reader( struct rc_reader_data * data, struct rc_instruction * inst, unsigned int mask, - struct rc_src_register * src) + void * arg_or_src) { struct rc_reader * new; memory_pool_array_reserve(pool, struct rc_reader, data->Readers, @@ -451,7 +507,74 @@ static void add_reader( new = &data->Readers[data->ReaderCount++]; new->Inst = inst; new->WriteMask = mask; - new->Src = src; + if (inst->Type == RC_INSTRUCTION_NORMAL) { + new->U.Src = arg_or_src; + } else { + new->U.Arg = arg_or_src; + } +} + +static unsigned int get_readers_read_callback( + struct get_readers_callback_data * cb_data, + unsigned int has_rel_addr, + rc_register_file file, + unsigned int index, + unsigned int swizzle) +{ + unsigned int shared_mask, read_mask; + + if (has_rel_addr) { + cb_data->ReaderData->Abort = 1; + return RC_MASK_NONE; + } + + shared_mask = rc_src_reads_dst_mask(file, index, swizzle, + cb_data->DstFile, cb_data->DstIndex, cb_data->AliveWriteMask); + + if (shared_mask == RC_MASK_NONE) + return shared_mask; + + /* If we make it this far, it means that this source reads from the + * same register written to by d->ReaderData->Writer. */ + + read_mask = rc_swizzle_to_writemask(swizzle); + if (cb_data->ReaderData->AbortOnRead & read_mask) { + cb_data->ReaderData->Abort = 1; + return shared_mask; + } + + /* XXX The behavior in this case should be configurable. */ + if ((read_mask & cb_data->AliveWriteMask) != read_mask) { + cb_data->ReaderData->Abort = 1; + return shared_mask; + } + + return shared_mask; +} + +static void get_readers_pair_read_callback( + void * userdata, + struct rc_instruction * inst, + struct rc_pair_instruction_arg * arg, + struct rc_pair_instruction_source * src) +{ + unsigned int shared_mask; + struct get_readers_callback_data * d = userdata; + + shared_mask = get_readers_read_callback(d, + 0 /*Pair Instructions don't use RelAddr*/, + src->File, src->Index, arg->Swizzle); + + if (shared_mask == RC_MASK_NONE) + return; + + if (d->ReadPairCB) + d->ReadPairCB(d->ReaderData, inst, arg, src); + + if (d->ReaderData->Abort) + return; + + add_reader(&d->C->Pool, d->ReaderData, inst, shared_mask, arg); } /** @@ -464,36 +587,18 @@ static void get_readers_normal_read_callback( struct rc_src_register * src) { struct get_readers_callback_data * d = userdata; - unsigned int read_mask; - - if (src->RelAddr) - d->ReaderData->Abort = 1; + unsigned int shared_mask; - unsigned int shared_mask = rc_src_reads_dst_mask(src->File, src->Index, - src->Swizzle, - d->ReaderData->Writer->U.I.DstReg.File, - d->ReaderData->Writer->U.I.DstReg.Index, - d->AliveWriteMask); + shared_mask = get_readers_read_callback(d, + src->RelAddr, src->File, src->Index, src->Swizzle); if (shared_mask == RC_MASK_NONE) return; + /* The callback function could potentially clear d->ReaderData->Abort, + * so we need to call it before we return. */ + if (d->ReadNormalCB) + d->ReadNormalCB(d->ReaderData, inst, src); - /* If we make it this far, it means that this source reads from the - * same register written to by d->ReaderData->Writer. */ - - if (d->ReaderData->AbortOnRead) { - d->ReaderData->Abort = 1; - return; - } - - read_mask = rc_swizzle_to_writemask(src->Swizzle); - /* XXX The behavior in this case should be configurable. */ - if ((read_mask & d->AliveWriteMask) != read_mask) { - d->ReaderData->Abort = 1; - return; - } - - d->ReadCB(d->ReaderData, inst, src); if (d->ReaderData->Abort) return; @@ -514,29 +619,132 @@ static void get_readers_write_callback( { struct get_readers_callback_data * d = userdata; - if (index == d->ReaderData->Writer->U.I.DstReg.Index - && file == d->ReaderData->Writer->U.I.DstReg.File) { - unsigned int shared_mask = mask - & d->ReaderData->Writer->U.I.DstReg.WriteMask; - if (d->ReaderData->InElse) { - if (shared_mask & d->AliveWriteMask) { - /* We set AbortOnRead here because the - * destination register of d->ReaderData->Writer - * is written to in both the IF and the - * ELSE block of this IF/ELSE statement. - * This means that readers of this - * destination register that follow this IF/ELSE - * statement use the value of different - * instructions depending on the control flow - * decisions made by the program. */ - d->ReaderData->AbortOnRead = 1; + if (index == d->DstIndex && file == d->DstFile) { + unsigned int shared_mask = mask & d->DstMask; + d->ReaderData->AbortOnRead &= ~shared_mask; + d->AliveWriteMask &= ~shared_mask; + } + + if(d->WriteCB) + d->WriteCB(d->ReaderData, inst, file, index, mask); +} + +static void get_readers_for_single_write( + void * userdata, + struct rc_instruction * writer, + rc_register_file dst_file, + unsigned int dst_index, + unsigned int dst_mask) +{ + struct rc_instruction * tmp; + unsigned int branch_depth = 0; + struct get_readers_callback_data * d = userdata; + + d->ReaderData->Writer = writer; + d->ReaderData->AbortOnRead = 0; + d->ReaderData->InElse = 0; + d->DstFile = dst_file; + d->DstIndex = dst_index; + d->DstMask = dst_mask; + d->AliveWriteMask = dst_mask; + memset(d->BranchMasks, 0, sizeof(d->BranchMasks)); + + if (!dst_mask) + return; + + for(tmp = writer->Next; tmp != &d->C->Program.Instructions; + tmp = tmp->Next){ + rc_opcode opcode = get_flow_control_inst(tmp); + switch(opcode) { + case RC_OPCODE_BGNLOOP: + /* XXX We can do better when we see a BGNLOOP if we + * add a flag called AbortOnWrite to struct + * rc_reader_data and leave it set until the next + * ENDLOOP. */ + case RC_OPCODE_ENDLOOP: + /* XXX We can do better when we see an ENDLOOP by + * searching backwards from writer and looking for + * readers of writer's destination index. If we find a + * reader before we get to the BGNLOOP, we must abort + * unless there is another writer between that reader + * and the BGNLOOP. */ + case RC_OPCODE_BRK: + case RC_OPCODE_CONT: + d->ReaderData->Abort = 1; + return; + case RC_OPCODE_IF: + branch_depth++; + if (branch_depth > R500_PFS_MAX_BRANCH_DEPTH_FULL) { + d->ReaderData->Abort = 1; + return; + } + d->BranchMasks[branch_depth].IfWriteMask = + d->AliveWriteMask; + break; + case RC_OPCODE_ELSE: + if (branch_depth == 0) { + d->ReaderData->InElse = 1; + } else { + unsigned int temp_mask = d->AliveWriteMask; + d->AliveWriteMask = + d->BranchMasks[branch_depth].IfWriteMask; + d->BranchMasks[branch_depth].ElseWriteMask = + temp_mask; + d->BranchMasks[branch_depth].HasElse = 1; + } + break; + case RC_OPCODE_ENDIF: + if (branch_depth == 0) { + d->ReaderData->AbortOnRead = d->AliveWriteMask; + d->ReaderData->InElse = 0; + } + else { + struct branch_write_mask * masks = + &d->BranchMasks[branch_depth]; + + if (masks->HasElse) { + d->ReaderData->AbortOnRead |= + masks->IfWriteMask + & ~masks->ElseWriteMask; + d->AliveWriteMask = masks->IfWriteMask + ^ ((masks->IfWriteMask ^ + masks->ElseWriteMask) + & (masks->IfWriteMask + ^ d->AliveWriteMask)); + } else { + d->ReaderData->AbortOnRead |= + masks->IfWriteMask + & ~d->AliveWriteMask; + d->AliveWriteMask = masks->IfWriteMask; + + } + memset(masks, 0, + sizeof(struct branch_write_mask)); + branch_depth--; } + break; + default: + break; + } + + if (d->ReaderData->InElse) + continue; + + if (tmp->Type == RC_INSTRUCTION_NORMAL) { + rc_for_all_reads_src(tmp, + get_readers_normal_read_callback, d); } else { - d->AliveWriteMask &= ~shared_mask; + rc_pair_for_all_reads_arg(tmp, + get_readers_pair_read_callback, d); } - } + rc_for_all_writes_mask(tmp, get_readers_write_callback, d); + + if (d->ReaderData->Abort) + return; - d->WriteCB(d->ReaderData, inst, file, index, mask); + if (branch_depth == 0 && !d->AliveWriteMask) + return; + } } /** @@ -577,80 +785,26 @@ static void get_readers_write_callback( * @param write_cb This function will be called for every instruction after * writer. */ -void rc_get_readers_normal( +void rc_get_readers( struct radeon_compiler * c, struct rc_instruction * writer, struct rc_reader_data * data, - rc_read_src_fn read_cb, + rc_read_src_fn read_normal_cb, + rc_pair_read_arg_fn read_pair_cb, rc_read_write_mask_fn write_cb) { - struct rc_instruction * tmp; struct get_readers_callback_data d; - unsigned int branch_depth = 0; - data->Writer = writer; data->Abort = 0; - data->AbortOnRead = 0; - data->InElse = 0; data->ReaderCount = 0; data->ReadersReserved = 0; data->Readers = NULL; d.C = c; - d.AliveWriteMask = writer->U.I.DstReg.WriteMask; d.ReaderData = data; - d.ReadCB = read_cb; + d.ReadNormalCB = read_normal_cb; + d.ReadPairCB = read_pair_cb; d.WriteCB = write_cb; - if (!writer->U.I.DstReg.WriteMask) - return; - - for(tmp = writer->Next; tmp != &c->Program.Instructions; - tmp = tmp->Next){ - rc_opcode opcode = get_flow_control_inst(tmp); - switch(opcode) { - case RC_OPCODE_BGNLOOP: - /* XXX We can do better when we see a BGNLOOP if we - * add a flag called AbortOnWrite to struct - * rc_reader_data and leave it set until the next - * ENDLOOP. */ - case RC_OPCODE_ENDLOOP: - /* XXX We can do better when we see an ENDLOOP by - * searching backwards from writer and looking for - * readers of writer's destination index. If we find a - * reader before we get to the BGNLOOP, we must abort - * unless there is another writer between that reader - * and the BGNLOOP. */ - data->Abort = 1; - return; - case RC_OPCODE_IF: - branch_depth++; - break; - case RC_OPCODE_ELSE: - if (branch_depth == 0) - data->InElse = 1; - break; - case RC_OPCODE_ENDIF: - if (branch_depth == 0) { - data->AbortOnRead = 1; - data->InElse = 0; - } - else { - branch_depth--; - } - break; - default: - break; - } - - if (!data->InElse) - rc_for_all_reads_src(tmp, get_readers_normal_read_callback, &d); - rc_for_all_writes_mask(tmp, get_readers_write_callback, &d); - - if (data->Abort) - return; - - if (!d.AliveWriteMask) - return; - } + rc_for_all_writes_mask(writer, get_readers_for_single_write, &d); } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h index 7de6b98f76..ef971c5b23 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h @@ -36,6 +36,7 @@ struct rc_instruction; struct rc_swizzle_caps; struct rc_src_register; struct rc_pair_instruction_arg; +struct rc_pair_instruction_source; struct rc_compiler; @@ -59,7 +60,8 @@ void rc_for_all_reads_src(struct rc_instruction * inst, rc_read_src_fn cb, void * userdata); typedef void (*rc_pair_read_arg_fn)(void * userdata, - struct rc_instruction * inst, struct rc_pair_instruction_arg * arg); + struct rc_instruction * inst, struct rc_pair_instruction_arg * arg, + struct rc_pair_instruction_source * src); void rc_pair_for_all_reads_arg(struct rc_instruction * inst, rc_pair_read_arg_fn cb, void * userdata); @@ -71,7 +73,10 @@ void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, v struct rc_reader { struct rc_instruction * Inst; unsigned int WriteMask; - struct rc_src_register * Src; + union { + struct rc_src_register * Src; + struct rc_pair_instruction_arg * Arg; + } U; }; struct rc_reader_data { @@ -87,14 +92,13 @@ struct rc_reader_data { void * CbData; }; -void rc_get_readers_normal( +void rc_get_readers( struct radeon_compiler * c, - struct rc_instruction * inst, + struct rc_instruction * writer, struct rc_reader_data * data, - /*XXX: These should be their own function types. */ - rc_read_src_fn read_cb, + rc_read_src_fn read_normal_cb, + rc_pair_read_arg_fn read_pair_cb, rc_read_write_mask_fn write_cb); - /** * Compiler passes based on dataflow analysis. */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c index 4d9120ffd0..27b10ffbd6 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c @@ -32,15 +32,14 @@ #include "radeon_compiler_util.h" #include "radeon_swizzle.h" -struct peephole_state { - struct rc_instruction * Inst; - /** Stores a bitmask of the components that are still "alive" (i.e. - * they have not been written to since Inst was executed.) - */ - unsigned int WriteMask; +struct src_clobbered_reads_cb_data { + rc_register_file File; + unsigned int Index; + unsigned int Mask; + struct rc_reader_data * ReaderData; }; -typedef void (*rc_presub_replace_fn)(struct peephole_state *, +typedef void (*rc_presub_replace_fn)(struct rc_instruction *, struct rc_instruction *, unsigned int); @@ -67,38 +66,18 @@ static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct return combine; } -struct copy_propagate_state { - struct radeon_compiler * C; - struct rc_instruction * Mov; - unsigned int Conflict:1; - - /** Whether Mov's source has been clobbered */ - unsigned int SourceClobbered:1; - - /** Which components of Mov's destination register are still from that Mov? */ - unsigned int MovMask:4; - - /** Which components of Mov's destination register are clearly *not* from that Mov */ - unsigned int DefinedMask:4; - - /** Which components of Mov's source register are sourced */ - unsigned int SourcedMask:4; - - /** Branch depth beyond Mov; negative value indicates we left the Mov's block */ - int BranchDepth; -}; - static void copy_propagate_scan_read(void * data, struct rc_instruction * inst, struct rc_src_register * src) { rc_register_file file = src->File; struct rc_reader_data * reader_data = data; - const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode); - /* It is possible to do copy propigation in this situation, - * just not right now, see peephole_add_presub_inv() */ - if (reader_data->Writer->U.I.PreSub.Opcode != RC_PRESUB_NONE && - (info->NumSrcRegs > 2 || info->HasTexture)) { + if(!rc_inst_can_use_presub(inst, + reader_data->Writer->U.I.PreSub.Opcode, + rc_swizzle_to_writemask(src->Swizzle), + *src, + reader_data->Writer->U.I.PreSub.SrcReg[0], + reader_data->Writer->U.I.PreSub.SrcReg[1])) { reader_data->Abort = 1; return; } @@ -123,23 +102,44 @@ static void copy_propagate_scan_read(void * data, struct rc_instruction * inst, } } -static void copy_propagate_scan_write(void * data, struct rc_instruction * inst, - rc_register_file file, unsigned int index, unsigned int mask) +static void src_clobbered_reads_cb( + void * data, + struct rc_instruction * inst, + struct rc_src_register * src) { - struct rc_reader_data * reader_data = data; - struct copy_propagate_state * s = reader_data->CbData; + struct src_clobbered_reads_cb_data * sc_data = data; + + if (src->File == sc_data->File + && src->Index == sc_data->Index + && (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) { - if (file == reader_data->Writer->U.I.SrcReg[0].File && index == reader_data->Writer->U.I.SrcReg[0].Index) { - if (mask & s->SourcedMask) - reader_data->AbortOnRead = 1; - } else if (s->Mov->U.I.SrcReg[0].RelAddr && file == RC_FILE_ADDRESS) { - reader_data->AbortOnRead = 1; + sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW; } + + if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) { + sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW; + } +} + +static void is_src_clobbered_scan_write( + void * data, + struct rc_instruction * inst, + rc_register_file file, + unsigned int index, + unsigned int mask) +{ + struct src_clobbered_reads_cb_data sc_data; + struct rc_reader_data * reader_data = data; + sc_data.File = file; + sc_data.Index = index; + sc_data.Mask = mask; + sc_data.ReaderData = reader_data; + rc_for_all_reads_src(reader_data->Writer, + src_clobbered_reads_cb, &sc_data); } static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov) { - struct copy_propagate_state s; struct rc_reader_data reader_data; unsigned int i; @@ -149,22 +149,10 @@ static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * i inst_mov->U.I.SaturateMode) return; - memset(&s, 0, sizeof(s)); - s.C = c; - s.Mov = inst_mov; - s.MovMask = inst_mov->U.I.DstReg.WriteMask; - s.DefinedMask = RC_MASK_XYZW & ~s.MovMask; - - reader_data.CbData = &s; - - for(unsigned int chan = 0; chan < 4; ++chan) { - unsigned int swz = GET_SWZ(inst_mov->U.I.SrcReg[0].Swizzle, chan); - s.SourcedMask |= (1 << swz) & RC_MASK_XYZW; - } - /* Get a list of all the readers of this MOV instruction. */ - rc_get_readers_normal(c, inst_mov, &reader_data, - copy_propagate_scan_read, copy_propagate_scan_write); + rc_get_readers(c, inst_mov, &reader_data, + copy_propagate_scan_read, NULL, + is_src_clobbered_scan_write); if (reader_data.Abort || reader_data.ReaderCount == 0) return; @@ -172,10 +160,10 @@ static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * i /* Propagate the MOV instruction. */ for (i = 0; i < reader_data.ReaderCount; i++) { struct rc_instruction * inst = reader_data.Readers[i].Inst; - *reader_data.Readers[i].Src = chain_srcregs(*reader_data.Readers[i].Src, s.Mov->U.I.SrcReg[0]); + *reader_data.Readers[i].U.Src = chain_srcregs(*reader_data.Readers[i].U.Src, inst_mov->U.I.SrcReg[0]); - if (s.Mov->U.I.SrcReg[0].File == RC_FILE_PRESUB) - inst->U.I.PreSub = s.Mov->U.I.PreSub; + if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB) + inst->U.I.PreSub = inst_mov->U.I.PreSub; } /* Finally, remove the original MOV instruction */ @@ -222,8 +210,8 @@ static int is_src_uniform_constant(struct rc_src_register src, static void constant_folding_mad(struct rc_instruction * inst) { - rc_swizzle swz; - unsigned int negate; + rc_swizzle swz = 0; + unsigned int negate= 0; if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) { if (swz == RC_SWIZZLE_ZERO) { @@ -263,8 +251,8 @@ static void constant_folding_mad(struct rc_instruction * inst) static void constant_folding_mul(struct rc_instruction * inst) { - rc_swizzle swz; - unsigned int negate; + rc_swizzle swz = 0; + unsigned int negate = 0; if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { if (swz == RC_SWIZZLE_ONE) { @@ -296,8 +284,8 @@ static void constant_folding_mul(struct rc_instruction * inst) static void constant_folding_add(struct rc_instruction * inst) { - rc_swizzle swz; - unsigned int negate; + rc_swizzle swz = 0; + unsigned int negate = 0; if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { if (swz == RC_SWIZZLE_ZERO) { @@ -431,132 +419,88 @@ static int src_has_const_swz(struct rc_src_register src) { return 0; } -static void peephole_scan_write(void * data, struct rc_instruction * inst, - rc_register_file file, unsigned int index, unsigned int mask) +static void presub_scan_read( + void * data, + struct rc_instruction * inst, + struct rc_src_register * src) { - struct peephole_state * s = data; - if(s->Inst->U.I.DstReg.File == file - && s->Inst->U.I.DstReg.Index == index) { - unsigned int common_mask = s->WriteMask & mask; - s->WriteMask &= ~common_mask; + struct rc_reader_data * reader_data = data; + rc_presubtract_op * presub_opcode = reader_data->CbData; + + if (!rc_inst_can_use_presub(inst, *presub_opcode, + reader_data->Writer->U.I.DstReg.WriteMask, + *src, + reader_data->Writer->U.I.SrcReg[0], + reader_data->Writer->U.I.SrcReg[1])) { + reader_data->Abort = 1; + return; } } static int presub_helper( struct radeon_compiler * c, - struct peephole_state * s, + struct rc_instruction * inst_add, rc_presubtract_op presub_opcode, rc_presub_replace_fn presub_replace) { - struct rc_instruction * inst; - unsigned int can_remove = 0; - unsigned int cant_sub = 0; - - for(inst = s->Inst->Next; inst != &c->Program.Instructions; - inst = inst->Next) { - unsigned int i; - unsigned char can_use_presub = 1; - const struct rc_opcode_info * info = - rc_get_opcode_info(inst->U.I.Opcode); - /* XXX: There are some situations where instructions - * with more than 2 src registers can use the - * presubtract select, but to keep things simple we - * will disable presubtract on these instructions for - * now. */ - if (info->NumSrcRegs > 2 || info->HasTexture) { - can_use_presub = 0; - } + struct rc_reader_data reader_data; + unsigned int i; + rc_presubtract_op cb_op = presub_opcode; - /* We can't use more than one presubtract value in an - * instruction, unless the two prsubtract operations - * are the same and read from the same registers. */ - if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) { - if (inst->U.I.PreSub.Opcode != presub_opcode - || inst->U.I.PreSub.SrcReg[0].File != - s->Inst->U.I.SrcReg[1].File - || inst->U.I.PreSub.SrcReg[0].Index != - s->Inst->U.I.SrcReg[1].Index) { - can_use_presub = 0; - } - } + reader_data.CbData = &cb_op; + rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL, + is_src_clobbered_scan_write); - /* Even if the instruction can't use a presubtract operation - * we still need to check if the instruction reads from - * s->Inst->U.I.DstReg, because if it does we must not - * remove s->Inst. */ - for(i = 0; i < info->NumSrcRegs; i++) { - unsigned int mask = src_reads_dst_mask( - inst->U.I.SrcReg[i], s->Inst->U.I.DstReg); - /* XXX We could be more aggressive here using - * presubtract. It is okay if SrcReg[i] only reads - * from some of the mask components. */ - if(s->Inst->U.I.DstReg.WriteMask != mask) { - if (s->Inst->U.I.DstReg.WriteMask & mask) { - can_remove = 0; - break; - } else { - continue; - } - } - if (cant_sub || !can_use_presub) { - can_remove = 0; - break; - } - presub_replace(s, inst, i); - can_remove = 1; - } - if(!can_remove) - break; - rc_for_all_writes_mask(inst, peephole_scan_write, s); - /* If all components of inst_add's destination register have - * been written to by subsequent instructions, the original - * value of the destination register is no longer valid and - * we can't keep doing substitutions. */ - if (!s->WriteMask){ - break; - } - /* Make this instruction doesn't write to the presubtract source. */ - if (inst->U.I.DstReg.WriteMask & - src_reads_dst_mask(s->Inst->U.I.SrcReg[1], - inst->U.I.DstReg) - || src_reads_dst_mask(s->Inst->U.I.SrcReg[0], - inst->U.I.DstReg) - || info->IsFlowControl) { - cant_sub = 1; + if (reader_data.Abort || reader_data.ReaderCount == 0) + return 0; + + for(i = 0; i < reader_data.ReaderCount; i++) { + unsigned int src_index; + struct rc_reader reader = reader_data.Readers[i]; + const struct rc_opcode_info * info = + rc_get_opcode_info(reader.Inst->U.I.Opcode); + + for (src_index = 0; src_index < info->NumSrcRegs; src_index++) { + if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.Src) + presub_replace(inst_add, reader.Inst, src_index); } } - return can_remove; + return 1; } -/* This function assumes that s->Inst->U.I.SrcReg[0] and - * s->Inst->U.I.SrcReg[1] aren't both negative. */ -static void presub_replace_add(struct peephole_state *s, - struct rc_instruction * inst, - unsigned int src_index) +/* This function assumes that inst_add->U.I.SrcReg[0] and + * inst_add->U.I.SrcReg[1] aren't both negative. */ +static void presub_replace_add( + struct rc_instruction * inst_add, + struct rc_instruction * inst_reader, + unsigned int src_index) { rc_presubtract_op presub_opcode; - if (s->Inst->U.I.SrcReg[1].Negate || s->Inst->U.I.SrcReg[0].Negate) + if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate) presub_opcode = RC_PRESUB_SUB; else presub_opcode = RC_PRESUB_ADD; - if (s->Inst->U.I.SrcReg[1].Negate) { - inst->U.I.PreSub.SrcReg[0] = s->Inst->U.I.SrcReg[1]; - inst->U.I.PreSub.SrcReg[1] = s->Inst->U.I.SrcReg[0]; + if (inst_add->U.I.SrcReg[1].Negate) { + inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1]; + inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0]; } else { - inst->U.I.PreSub.SrcReg[0] = s->Inst->U.I.SrcReg[0]; - inst->U.I.PreSub.SrcReg[1] = s->Inst->U.I.SrcReg[1]; + inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0]; + inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1]; } - inst->U.I.PreSub.SrcReg[0].Negate = 0; - inst->U.I.PreSub.SrcReg[1].Negate = 0; - inst->U.I.PreSub.Opcode = presub_opcode; - inst->U.I.SrcReg[src_index] = chain_srcregs(inst->U.I.SrcReg[src_index], - inst->U.I.PreSub.SrcReg[0]); - inst->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; - inst->U.I.SrcReg[src_index].Index = presub_opcode; + inst_reader->U.I.PreSub.SrcReg[0].Negate = 0; + inst_reader->U.I.PreSub.SrcReg[1].Negate = 0; + inst_reader->U.I.PreSub.Opcode = presub_opcode; + inst_reader->U.I.SrcReg[src_index] = + chain_srcregs(inst_reader->U.I.SrcReg[src_index], + inst_reader->U.I.PreSub.SrcReg[0]); + inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; + inst_reader->U.I.SrcReg[src_index].Index = presub_opcode; } -static int is_presub_candidate(struct rc_instruction * inst) +static int is_presub_candidate( + struct radeon_compiler * c, + struct rc_instruction * inst) { const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode); unsigned int i; @@ -565,7 +509,12 @@ static int is_presub_candidate(struct rc_instruction * inst) return 0; for(i = 0; i < info->NumSrcRegs; i++) { - if (src_reads_dst_mask(inst->U.I.SrcReg[i], inst->U.I.DstReg)) + struct rc_src_register src = inst->U.I.SrcReg[i]; + if (src_reads_dst_mask(src, inst->U.I.DstReg)) + return 0; + + src.File = RC_FILE_PRESUB; + if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src)) return 0; } return 1; @@ -578,9 +527,8 @@ static int peephole_add_presub_add( struct rc_src_register * src0 = NULL; struct rc_src_register * src1 = NULL; unsigned int i; - struct peephole_state s; - if (!is_presub_candidate(inst_add)) + if (!is_presub_candidate(c, inst_add)) return 0; if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle) @@ -604,30 +552,28 @@ static int peephole_add_presub_add( if (!src1) return 0; - s.Inst = inst_add; - s.WriteMask = inst_add->U.I.DstReg.WriteMask; - if (presub_helper(c, &s, RC_PRESUB_ADD, presub_replace_add)) { + if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) { rc_remove_instruction(inst_add); return 1; } return 0; } -static void presub_replace_inv(struct peephole_state * s, - struct rc_instruction * inst, - unsigned int src_index) +static void presub_replace_inv( + struct rc_instruction * inst_add, + struct rc_instruction * inst_reader, + unsigned int src_index) { - /* We must be careful not to modify s->Inst, since it - * is possible it will remain part of the program. - * XXX Maybe pass a struct instead of a pointer for s->Inst.*/ - inst->U.I.PreSub.SrcReg[0] = s->Inst->U.I.SrcReg[1]; - inst->U.I.PreSub.SrcReg[0].Negate = 0; - inst->U.I.PreSub.Opcode = RC_PRESUB_INV; - inst->U.I.SrcReg[src_index] = chain_srcregs(inst->U.I.SrcReg[src_index], - inst->U.I.PreSub.SrcReg[0]); - - inst->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; - inst->U.I.SrcReg[src_index].Index = RC_PRESUB_INV; + /* We must be careful not to modify inst_add, since it + * is possible it will remain part of the program.*/ + inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1]; + inst_reader->U.I.PreSub.SrcReg[0].Negate = 0; + inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV; + inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index], + inst_reader->U.I.PreSub.SrcReg[0]); + + inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; + inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV; } /** @@ -645,9 +591,8 @@ static int peephole_add_presub_inv( struct rc_instruction * inst_add) { unsigned int i, swz, mask; - struct peephole_state s; - if (!is_presub_candidate(inst_add)) + if (!is_presub_candidate(c, inst_add)) return 0; mask = inst_add->U.I.DstReg.WriteMask; @@ -674,11 +619,7 @@ static int peephole_add_presub_inv( return 0; } - /* Setup the peephole_state information. */ - s.Inst = inst_add; - s.WriteMask = inst_add->U.I.DstReg.WriteMask; - - if (presub_helper(c, &s, RC_PRESUB_INV, presub_replace_inv)) { + if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) { rc_remove_instruction(inst_add); return 1; } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c index 91524f5ec6..3f880c88fa 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c @@ -66,10 +66,13 @@ struct regalloc_state { struct hardware_register * HwTemporary; unsigned int NumHwTemporaries; /** - * If an instruction is inside of a loop, end_loop will be the - * IP of the ENDLOOP instruction, otherwise end_loop will be 0 + * If an instruction is inside of a loop, EndLoop will be the + * IP of the ENDLOOP instruction, and BeginLoop will be the IP + * of the BGNLOOP instruction. Otherwise, EndLoop and BeginLoop + * will be -1. */ - int end_loop; + int EndLoop; + int BeginLoop; }; static void print_live_intervals(struct live_intervals * src) @@ -180,11 +183,13 @@ static void scan_callback(void * data, struct rc_instruction * inst, reg->Used = 1; if (file == RC_FILE_INPUT) reg->Live.Start = -1; + else if (s->BeginLoop >= 0) + reg->Live.Start = s->BeginLoop; else reg->Live.Start = inst->IP; reg->Live.End = inst->IP; - } else if (s->end_loop) - reg->Live.End = s->end_loop; + } else if (s->EndLoop >= 0) + reg->Live.End = s->EndLoop; else if (inst->IP > reg->Live.End) reg->Live.End = inst->IP; } @@ -195,6 +200,8 @@ static void compute_live_intervals(struct radeon_compiler *c, memset(s, 0, sizeof(*s)); s->C = c; s->NumHwTemporaries = c->max_temp_regs; + s->BeginLoop = -1; + s->EndLoop = -1; s->HwTemporary = memory_pool_malloc(&c->Pool, s->NumHwTemporaries * sizeof(struct hardware_register)); @@ -207,8 +214,10 @@ static void compute_live_intervals(struct radeon_compiler *c, inst = inst->Next) { /* For all instructions inside of a loop, the ENDLOOP - * instruction is used as the end of the live interval. */ - if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP && !s->end_loop) { + * instruction is used as the end of the live interval and + * the BGNLOOP instruction is used as the beginning. */ + if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP && s->EndLoop < 0) { + s->BeginLoop = inst->IP; int loops = 1; struct rc_instruction * tmp; for(tmp = inst->Next; @@ -219,15 +228,17 @@ static void compute_live_intervals(struct radeon_compiler *c, } else if (tmp->U.I.Opcode == RC_OPCODE_ENDLOOP) { if(!--loops) { - s->end_loop = tmp->IP; + s->EndLoop = tmp->IP; break; } } } } - if (inst->IP == s->end_loop) - s->end_loop = 0; + if (inst->IP == s->EndLoop) { + s->EndLoop = -1; + s->BeginLoop = -1; + } rc_for_all_reads_mask(inst, scan_callback, s); rc_for_all_writes_mask(inst, scan_callback, s); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c index d4a38607d9..cbb5ef6237 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c @@ -30,6 +30,7 @@ #include <stdio.h> #include "radeon_compiler.h" +#include "radeon_compiler_util.h" #include "radeon_dataflow.h" @@ -54,6 +55,11 @@ struct schedule_instruction { * this instruction can be scheduled. */ unsigned int NumDependencies:5; + + /** List of all readers (see rc_get_readers() for the definition of + * "all readers"), even those outside the basic block this instruction + * lives in. */ + struct rc_reader_data GlobalReaders; }; @@ -94,6 +100,16 @@ struct register_state { struct reg_value * Values[4]; }; +struct remap_reg { + struct rc_instruciont * Inst; + unsigned int OldIndex:(RC_REGISTER_INDEX_BITS+1); + unsigned int OldSwizzle:3; + unsigned int NewIndex:(RC_REGISTER_INDEX_BITS+1); + unsigned int NewSwizzle:3; + unsigned int OnlyTexReads:1; + struct remap_reg * Next; +}; + struct schedule_state { struct radeon_compiler * C; struct schedule_instruction * Current; @@ -126,15 +142,6 @@ static struct reg_value ** get_reg_valuep(struct schedule_state * s, return &s->Temporary[index].Values[chan]; } -static struct reg_value * get_reg_value(struct schedule_state * s, - rc_register_file file, unsigned int index, unsigned int chan) -{ - struct reg_value ** pv = get_reg_valuep(s, file, index, chan); - if (!pv) - return 0; - return *pv; -} - static void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst) { inst->NextReady = *list; @@ -290,16 +297,17 @@ static int merge_presub_sources( { unsigned int srcp_src, srcp_regs, is_rgb, is_alpha; struct rc_pair_sub_instruction * dst_sub; + const struct rc_opcode_info * info; assert(dst_full->Alpha.Opcode == RC_OPCODE_NOP); switch(type) { - case RC_PAIR_SOURCE_RGB: + case RC_SOURCE_RGB: is_rgb = 1; is_alpha = 0; dst_sub = &dst_full->RGB; break; - case RC_PAIR_SOURCE_ALPHA: + case RC_SOURCE_ALPHA: is_rgb = 0; is_alpha = 1; dst_sub = &dst_full->Alpha; @@ -309,8 +317,8 @@ static int merge_presub_sources( return 0; } - const struct rc_opcode_info * info = - rc_get_opcode_info(dst_full->RGB.Opcode); + info = rc_get_opcode_info(dst_full->RGB.Opcode); + if (dst_sub->Src[RC_PAIR_PRESUB_SRC].Used) return 0; @@ -340,6 +348,8 @@ static int merge_presub_sources( continue; free_source = rc_pair_alloc_source(dst_full, is_rgb, is_alpha, temp.File, temp.Index); + if (free_source < 0) + return 0; one_way = 1; } else { dst_sub->Src[free_source] = temp; @@ -355,11 +365,11 @@ static int merge_presub_sources( for(arg = 0; arg < info->NumSrcRegs; arg++) { /*If this arg does not read from an rgb source, * do nothing. */ - if (!(rc_source_type_that_arg_reads( - dst_full->RGB.Arg[arg].Source, - dst_full->RGB.Arg[arg].Swizzle) & type)) { + if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle, + 3) & type)) { continue; } + if (dst_full->RGB.Arg[arg].Source == srcp_src) dst_full->RGB.Arg[arg].Source = free_source; /* We need to do this just in case register @@ -391,13 +401,13 @@ static int destructive_merge_instructions( /* Merge the rgb presubtract registers. */ if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) { - if (!merge_presub_sources(rgb, alpha->RGB, RC_PAIR_SOURCE_RGB)) { + if (!merge_presub_sources(rgb, alpha->RGB, RC_SOURCE_RGB)) { return 0; } } /* Merge the alpha presubtract registers */ if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) { - if(!merge_presub_sources(rgb, alpha->Alpha, RC_PAIR_SOURCE_ALPHA)){ + if(!merge_presub_sources(rgb, alpha->Alpha, RC_SOURCE_ALPHA)){ return 0; } } @@ -524,6 +534,222 @@ static void presub_nop(struct rc_instruction * emitted) { } } } + +static void rgb_to_alpha_remap ( + struct rc_instruction * inst, + struct rc_pair_instruction_arg * arg, + rc_register_file old_file, + rc_swizzle old_swz, + unsigned int new_index) +{ + int new_src_index; + unsigned int i; + struct rc_pair_instruction_source * old_src = + rc_pair_get_src(&inst->U.P, arg); + if (!old_src) { + return; + } + + for (i = 0; i < 3; i++) { + if (get_swz(arg->Swizzle, i) == old_swz) { + SET_SWZ(arg->Swizzle, i, RC_SWIZZLE_W); + } + } + memset(old_src, 0, sizeof(struct rc_pair_instruction_source)); + new_src_index = rc_pair_alloc_source(&inst->U.P, 0, 1, + old_file, new_index); + /* This conversion is not possible, we must have made a mistake in + * is_rgb_to_alpha_possible. */ + if (new_src_index < 0) { + assert(0); + return; + } + + arg->Source = new_src_index; +} + +static int can_remap(unsigned int opcode) +{ + switch(opcode) { + case RC_OPCODE_DDX: + case RC_OPCODE_DDY: + return 0; + default: + return 1; + } +} + +static int can_convert_opcode_to_alpha(unsigned int opcode) +{ + switch(opcode) { + case RC_OPCODE_DDX: + case RC_OPCODE_DDY: + case RC_OPCODE_DP2: + case RC_OPCODE_DP3: + case RC_OPCODE_DP4: + case RC_OPCODE_DPH: + return 0; + default: + return 1; + } +} + +static void is_rgb_to_alpha_possible( + void * userdata, + struct rc_instruction * inst, + struct rc_pair_instruction_arg * arg, + struct rc_pair_instruction_source * src) +{ + unsigned int chan_count = 0; + unsigned int alpha_sources = 0; + unsigned int i; + struct rc_reader_data * reader_data = userdata; + + if (!can_remap(inst->U.P.RGB.Opcode) + || !can_remap(inst->U.P.Alpha.Opcode)) { + reader_data->Abort = 1; + return; + } + + if (!src) + return; + + /* XXX There are some cases where we can still do the conversion if + * a reader reads from a presubtract source, but for now we'll prevent + * it. */ + if (arg->Source == RC_PAIR_PRESUB_SRC) { + reader_data->Abort = 1; + return; + } + + /* Make sure the source only reads from one component. + * XXX We should allow the source to read from the same component twice. + * XXX If the index we will be converting to is the same as the + * current index, then it is OK to read from more than one component. + */ + for (i = 0; i < 3; i++) { + rc_swizzle swz = get_swz(arg->Swizzle, i); + switch(swz) { + case RC_SWIZZLE_X: + case RC_SWIZZLE_Y: + case RC_SWIZZLE_Z: + case RC_SWIZZLE_W: + chan_count++; + break; + default: + break; + } + } + if (chan_count > 1) { + reader_data->Abort = 1; + return; + } + + /* Make sure there are enough alpha sources. + * XXX If we know what register all the readers are going + * to be remapped to, then in some situations we can still do + * the subsitution, even if all 3 alpha sources are being used.*/ + for (i = 0; i < 3; i++) { + if (inst->U.P.Alpha.Src[i].Used) { + alpha_sources++; + } + } + if (alpha_sources > 2) { + reader_data->Abort = 1; + return; + } +} + +static int convert_rgb_to_alpha( + struct schedule_state * s, + struct schedule_instruction * sched_inst) +{ + struct rc_pair_instruction * pair_inst = &sched_inst->Instruction->U.P; + unsigned int old_mask = pair_inst->RGB.WriteMask; + unsigned int old_swz = rc_mask_to_swizzle(old_mask); + const struct rc_opcode_info * info = + rc_get_opcode_info(pair_inst->RGB.Opcode); + int new_index = -1; + unsigned int i; + + if (sched_inst->GlobalReaders.Abort) + return 0; + + if (!pair_inst->RGB.WriteMask) + return 0; + + if (!can_convert_opcode_to_alpha(pair_inst->RGB.Opcode) + || !can_convert_opcode_to_alpha(pair_inst->Alpha.Opcode)) { + return 0; + } + + assert(sched_inst->NumWriteValues == 1); + + if (!sched_inst->WriteValues[0]) { + assert(0); + return 0; + } + + /* We start at the old index, because if we can reuse the same + * register and just change the swizzle then it is more likely we + * will be able to convert all the readers. */ + for (i = pair_inst->RGB.DestIndex; i < RC_REGISTER_MAX_INDEX; i++) { + struct reg_value ** new_regvalp = get_reg_valuep( + s, RC_FILE_TEMPORARY, i, 3); + if (!*new_regvalp) { + struct reg_value ** old_regvalp = + get_reg_valuep(s, + RC_FILE_TEMPORARY, + pair_inst->RGB.DestIndex, + rc_mask_to_swz(old_mask)); + new_index = i; + *new_regvalp = *old_regvalp; + *old_regvalp = NULL; + new_regvalp = get_reg_valuep(s, RC_FILE_TEMPORARY, i, 3); + break; + } + } + if (new_index < 0) { + return 0; + } + + pair_inst->Alpha.Opcode = pair_inst->RGB.Opcode; + pair_inst->Alpha.DestIndex = new_index; + pair_inst->Alpha.WriteMask = 1; + pair_inst->Alpha.Target = pair_inst->RGB.Target; + pair_inst->Alpha.OutputWriteMask = pair_inst->RGB.OutputWriteMask; + pair_inst->Alpha.DepthWriteMask = pair_inst->RGB.DepthWriteMask; + pair_inst->Alpha.Saturate = pair_inst->RGB.Saturate; + memcpy(pair_inst->Alpha.Arg, pair_inst->RGB.Arg, + sizeof(pair_inst->Alpha.Arg)); + /* Move the swizzles into the first chan */ + for (i = 0; i < info->NumSrcRegs; i++) { + unsigned int j; + for (j = 0; j < 3; j++) { + unsigned int swz = get_swz(pair_inst->Alpha.Arg[i].Swizzle, j); + if (swz != RC_SWIZZLE_UNUSED) { + pair_inst->Alpha.Arg[i].Swizzle = swz; + break; + } + } + } + pair_inst->RGB.Opcode = RC_OPCODE_NOP; + pair_inst->RGB.DestIndex = 0; + pair_inst->RGB.WriteMask = 0; + pair_inst->RGB.Target = 0; + pair_inst->RGB.OutputWriteMask = 0; + pair_inst->RGB.DepthWriteMask = 0; + pair_inst->RGB.Saturate = 0; + memset(pair_inst->RGB.Arg, 0, sizeof(pair_inst->RGB.Arg)); + + for(i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) { + struct rc_reader reader = sched_inst->GlobalReaders.Readers[i]; + rgb_to_alpha_remap(reader.Inst, reader.U.Arg, + RC_FILE_TEMPORARY, old_swz, new_index); + } + return 1; +} + /** * Find a good ALU instruction or pair of ALU instruction and emit it. * @@ -535,24 +761,16 @@ static void emit_one_alu(struct schedule_state *s, struct rc_instruction * befor { struct schedule_instruction * sinst; - if (s->ReadyFullALU || !(s->ReadyRGB && s->ReadyAlpha)) { - if (s->ReadyFullALU) { - sinst = s->ReadyFullALU; - s->ReadyFullALU = s->ReadyFullALU->NextReady; - } else if (s->ReadyRGB) { - sinst = s->ReadyRGB; - s->ReadyRGB = s->ReadyRGB->NextReady; - } else { - sinst = s->ReadyAlpha; - s->ReadyAlpha = s->ReadyAlpha->NextReady; - } - + if (s->ReadyFullALU) { + sinst = s->ReadyFullALU; + s->ReadyFullALU = s->ReadyFullALU->NextReady; rc_insert_instruction(before->Prev, sinst->Instruction); commit_alu_instruction(s, sinst); } else { struct schedule_instruction **prgb; struct schedule_instruction **palpha; - + struct schedule_instruction *prev; +pair: /* Some pairings might fail because they require too * many source slots; try all possible pairings if necessary */ for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) { @@ -571,10 +789,43 @@ static void emit_one_alu(struct schedule_state *s, struct rc_instruction * befor goto success; } } - - /* No success in pairing; just take the first RGB instruction */ - sinst = s->ReadyRGB; - s->ReadyRGB = s->ReadyRGB->NextReady; + prev = NULL; + /* No success in pairing, now try to convert one of the RGB + * instructions to an Alpha so we can pair it with another RGB. + */ + if (s->ReadyRGB && s->ReadyRGB->NextReady) { + for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) { + if ((*prgb)->NumWriteValues == 1) { + struct schedule_instruction * prgb_next; + if (!convert_rgb_to_alpha(s, *prgb)) + goto cont_loop; + prgb_next = (*prgb)->NextReady; + /* Add instruction to the Alpha ready list. */ + (*prgb)->NextReady = s->ReadyAlpha; + s->ReadyAlpha = *prgb; + /* Remove instruction from the RGB ready list.*/ + if (prev) + prev->NextReady = prgb_next; + else + s->ReadyRGB = prgb_next; + goto pair; + } +cont_loop: + prev = *prgb; + } + } + /* Still no success in pairing, just take the first RGB + * or alpha instruction. */ + if (s->ReadyRGB) { + sinst = s->ReadyRGB; + s->ReadyRGB = s->ReadyRGB->NextReady; + } else if (s->ReadyAlpha) { + sinst = s->ReadyAlpha; + s->ReadyAlpha = s->ReadyAlpha->NextReady; + } else { + /*XXX Something real bad has happened. */ + assert(0); + } rc_insert_instruction(before->Prev, sinst->Instruction); commit_alu_instruction(s, sinst); @@ -590,13 +841,13 @@ static void scan_read(void * data, struct rc_instruction * inst, rc_register_file file, unsigned int index, unsigned int chan) { struct schedule_state * s = data; - struct reg_value * v = get_reg_value(s, file, index, chan); + struct reg_value ** v = get_reg_valuep(s, file, index, chan); struct reg_value_reader * reader; if (!v) return; - if (v->Writer == s->Current) { + if (*v && (*v)->Writer == s->Current) { /* The instruction reads and writes to a register component. * In this case, we only want to increment dependencies by one. */ @@ -607,16 +858,28 @@ static void scan_read(void * data, struct rc_instruction * inst, reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader)); reader->Reader = s->Current; - reader->Next = v->Readers; - v->Readers = reader; - v->NumReaders++; - - s->Current->NumDependencies++; + if (!*v) { + /* In this situation, the instruction reads from a register + * that hasn't been written to or read from in the current + * block. */ + *v = memory_pool_malloc(&s->C->Pool, sizeof(struct reg_value)); + memset(*v, 0, sizeof(struct reg_value)); + (*v)->Readers = reader; + } else { + reader->Next = (*v)->Readers; + (*v)->Readers = reader; + /* Only update the current instruction's dependencies if the + * register it reads from has been written to in this block. */ + if ((*v)->Writer) { + s->Current->NumDependencies++; + } + } + (*v)->NumReaders++; if (s->Current->NumReadValues >= 12) { rc_error(s->C, "%s: NumReadValues overflow\n", __FUNCTION__); } else { - s->Current->ReadValues[s->Current->NumReadValues++] = v; + s->Current->ReadValues[s->Current->NumReadValues++] = *v; } } @@ -651,6 +914,16 @@ static void scan_write(void * data, struct rc_instruction * inst, } } +static void is_rgb_to_alpha_possible_normal( + void * userdata, + struct rc_instruction * inst, + struct rc_src_register * src) +{ + struct rc_reader_data * reader_data = userdata; + reader_data->Abort = 1; + +} + static void schedule_block(struct r300_fragment_program_compiler * c, struct rc_instruction * begin, struct rc_instruction * end) { @@ -682,6 +955,11 @@ static void schedule_block(struct r300_fragment_program_compiler * c, if (!s.Current->NumDependencies) instruction_ready(&s, s.Current); + + /* Get global readers for possible RGB->Alpha conversion. */ + rc_get_readers(s.C, inst, &s.Current->GlobalReaders, + is_rgb_to_alpha_possible_normal, + is_rgb_to_alpha_possible, NULL); } /* Temporarily unlink all instructions */ @@ -710,8 +988,13 @@ static int is_controlflow(struct rc_instruction * inst) void rc_pair_schedule(struct radeon_compiler *cc, void *user) { + struct schedule_state s; + struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc; struct rc_instruction * inst = c->Base.Program.Instructions.Next; + + memset(&s, 0, sizeof(s)); + s.C = &c->Base; while(inst != &c->Base.Program.Instructions) { struct rc_instruction * first; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c index c549be5218..fc05366f50 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c @@ -280,9 +280,12 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c, pair->RGB.DestIndex = inst->DstReg.Index; pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ; } + if (needalpha) { - pair->Alpha.DestIndex = inst->DstReg.Index; pair->Alpha.WriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); + if (pair->Alpha.WriteMask) { + pair->Alpha.DestIndex = inst->DstReg.Index; + } } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.c b/src/mesa/drivers/dri/r300/compiler/radeon_program.c index 24b685fbeb..d7bedc5729 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.c @@ -30,6 +30,7 @@ #include <stdio.h> #include "radeon_compiler.h" +#include "radeon_dataflow.h" /** @@ -91,37 +92,98 @@ struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register return tmp; } -unsigned int rc_find_free_temporary(struct radeon_compiler * c) +struct get_used_temporaries_data { + unsigned char * Used; + unsigned int UsedLength; +}; + +static void get_used_temporaries_cb( + void * userdata, + struct rc_instruction * inst, + rc_register_file file, + unsigned int index, + unsigned int mask) { - char used[RC_REGISTER_MAX_INDEX]; - unsigned int i; - struct rc_instruction * rcinst; + struct get_used_temporaries_data * d = userdata; - memset(used, 0, sizeof(used)); + if (file != RC_FILE_TEMPORARY) + return; - for (rcinst = c->Program.Instructions.Next; rcinst != &c->Program.Instructions; rcinst = rcinst->Next) { - const struct rc_sub_instruction *inst = &rcinst->U.I; - const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->Opcode); - unsigned int k; + if (index >= d->UsedLength) + return; - for (k = 0; k < opcode->NumSrcRegs; k++) { - if (inst->SrcReg[k].File == RC_FILE_TEMPORARY) - used[inst->SrcReg[k].Index] = 1; - } + d->Used[index] |= mask; +} - if (opcode->HasDstReg) { - if (inst->DstReg.File == RC_FILE_TEMPORARY) - used[inst->DstReg.Index] = 1; - } +/** + * This function fills in the parameter 'used' with a writemask that + * represent which components of each temporary register are used by the + * program. This is meant to be combined with rc_find_free_temporary_list as a + * more efficient version of rc_find_free_temporary. + * @param used The function does not initialize this parameter. + */ +void rc_get_used_temporaries( + struct radeon_compiler * c, + unsigned char * used, + unsigned int used_length) +{ + struct rc_instruction * inst; + struct get_used_temporaries_data d; + d.Used = used; + d.UsedLength = used_length; + + for(inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; inst = inst->Next) { + + rc_for_all_reads_mask(inst, get_used_temporaries_cb, &d); + rc_for_all_writes_mask(inst, get_used_temporaries_cb, &d); } +} - for (i = 0; i < RC_REGISTER_MAX_INDEX; i++) { - if (!used[i]) +/* Search a list of used temporaries for a free one + * \sa rc_get_used_temporaries + * @note If this functions finds a free temporary, it will mark it as used + * in the used temporary list (param 'used') + * @param used list of used temporaries + * @param used_length number of items in param 'used' + * @param mask which components must be free in the temporary index that is + * returned. + * @return -1 If there are no more free temporaries, otherwise the index of + * a temporary register where the components specified in param 'mask' are + * not being used. + */ +int rc_find_free_temporary_list( + struct radeon_compiler * c, + unsigned char * used, + unsigned int used_length, + unsigned int mask) +{ + int i; + for(i = 0; i < used_length; i++) { + if ((~used[i] & mask) == mask) { + used[i] |= mask; return i; + } } + return -1; +} - rc_error(c, "Ran out of temporary registers\n"); - return 0; +unsigned int rc_find_free_temporary(struct radeon_compiler * c) +{ + unsigned char used[RC_REGISTER_MAX_INDEX]; + int free; + + memset(used, 0, sizeof(used)); + + rc_get_used_temporaries(c, used, RC_REGISTER_MAX_INDEX); + + free = rc_find_free_temporary_list(c, used, RC_REGISTER_MAX_INDEX, + RC_MASK_XYZW); + if (free < 0) { + rc_error(c, "Ran out of temporary registers\n"); + return 0; + } + return free; } @@ -182,3 +244,14 @@ unsigned int rc_recompute_ips(struct radeon_compiler * c) return ip; } + +rc_swizzle rc_mask_to_swizzle(unsigned int mask) +{ + switch(mask) { + case RC_MASK_X: return RC_SWIZZLE_X; + case RC_MASK_Y: return RC_SWIZZLE_Y; + case RC_MASK_Z: return RC_SWIZZLE_Z; + case RC_MASK_W: return RC_SWIZZLE_W; + default: return RC_SWIZZLE_UNUSED; + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h index f0a77d7b53..be078b4f4f 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h @@ -191,6 +191,20 @@ static inline unsigned int combine_swizzles(unsigned int src, unsigned int swz) return ret; } +/** + * @param mask Must be either RC_MASK_X, RC_MASK_Y, RC_MASK_Z, or RC_MASK_W + */ +static inline rc_swizzle rc_mask_to_swz(unsigned int mask) +{ + switch (mask) { + case RC_MASK_X: return RC_SWIZZLE_X; + case RC_MASK_Y: return RC_SWIZZLE_Y; + case RC_MASK_Z: return RC_SWIZZLE_Z; + case RC_MASK_W: return RC_SWIZZLE_W; + default: assert(0); + } + return RC_SWIZZLE_UNUSED; +} struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg); static inline void reset_srcreg(struct rc_src_register* reg) @@ -222,6 +236,17 @@ void rc_local_transform( struct radeon_compiler *c, void *user); +void rc_get_used_temporaries( + struct radeon_compiler * c, + unsigned char * used, + unsigned int used_length); + +int rc_find_free_temporary_list( + struct radeon_compiler * c, + unsigned char * used, + unsigned int used_length, + unsigned int mask); + unsigned int rc_find_free_temporary(struct radeon_compiler * c); struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c); @@ -233,4 +258,5 @@ unsigned int rc_recompute_ips(struct radeon_compiler * c); void rc_print_program(const struct rc_program *prog); +rc_swizzle rc_mask_to_swizzle(unsigned int mask); #endif diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h index 9dcd44c522..45f79ece5b 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h @@ -181,4 +181,9 @@ static inline int rc_presubtract_src_reg_count(rc_presubtract_op op){ return 0; } } + +#define RC_SOURCE_NONE 0x0 +#define RC_SOURCE_RGB 0x1 +#define RC_SOURCE_ALPHA 0x2 + #endif /* RADEON_PROGRAM_CONSTANTS_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c index a21fe8d3df..5905d26e52 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c @@ -27,6 +27,9 @@ #include "radeon_program_pair.h" +#include "radeon_compiler_util.h" + +#include <stdlib.h> /** * Return the source slot where we installed the given register access, @@ -204,24 +207,37 @@ void rc_pair_foreach_source_that_rgb_reads( } } -/*return 0 for rgb, 1 for alpha -1 for error. */ - -unsigned int rc_source_type_that_arg_reads( - unsigned int source, - unsigned int swizzle) +struct rc_pair_instruction_source * rc_pair_get_src( + struct rc_pair_instruction * pair_inst, + struct rc_pair_instruction_arg * arg) { - unsigned int chan; - unsigned int swz = RC_SWIZZLE_UNUSED; - unsigned int ret = RC_PAIR_SOURCE_NONE; - - for(chan = 0; chan < 3; chan++) { - swz = GET_SWZ(swizzle, chan); - if (swz == RC_SWIZZLE_W) { - ret |= RC_PAIR_SOURCE_ALPHA; - } else if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y - || swz == RC_SWIZZLE_Z) { - ret |= RC_PAIR_SOURCE_RGB; + unsigned int i, type; + unsigned int channels = 0; + + for(i = 0; i < 3; i++) { + if (arg == pair_inst->RGB.Arg + i) { + channels = 3; + break; } } - return ret; + + if (channels == 0) { + for (i = 0; i < 3; i++) { + if (arg == pair_inst->Alpha.Arg + i) { + channels = 1; + break; + } + } + } + + assert(channels > 0); + type = rc_source_type_swz(arg->Swizzle, channels); + + if (type & RC_SOURCE_RGB) { + return &pair_inst->RGB.Src[arg->Source]; + } else if (type & RC_SOURCE_ALPHA) { + return &pair_inst->Alpha.Src[arg->Source]; + } else { + return NULL; + } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h index 54d44a2098..ccf7a0070c 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h @@ -55,10 +55,6 @@ struct radeon_compiler; */ #define RC_PAIR_PRESUB_SRC 3 -#define RC_PAIR_SOURCE_NONE 0x0 -#define RC_PAIR_SOURCE_RGB 0x1 -#define RC_PAIR_SOURCE_ALPHA 0x2 - struct rc_pair_instruction_source { unsigned int Used:1; unsigned int File:3; @@ -115,9 +111,9 @@ void rc_pair_foreach_source_that_rgb_reads( void * data, rc_pair_foreach_src_fn cb); -unsigned int rc_source_type_that_arg_reads( - unsigned int source, - unsigned int swizzle); +struct rc_pair_instruction_source * rc_pair_get_src( + struct rc_pair_instruction * pair_inst, + struct rc_pair_instruction_arg * arg); /*@}*/ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c index 618ab5a099..ae13f6742f 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c @@ -129,6 +129,7 @@ static char rc_swizzle_char(unsigned int swz) case RC_SWIZZLE_HALF: return 'H'; case RC_SWIZZLE_UNUSED: return '_'; } + fprintf(stderr, "bad swz: %u\n", swz); return '?'; } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c index 60e228be5b..88165f7895 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c @@ -33,100 +33,51 @@ #include "radeon_compiler.h" #include "radeon_dataflow.h" - -struct reg_rename { - int old_index; - int new_index; - int temp_index; -}; - -static void rename_reg(void * data, struct rc_instruction * inst, - rc_register_file * file, unsigned int * index) -{ - struct reg_rename *r = data; - - if(r->old_index == *index && *file == RC_FILE_TEMPORARY) { - *index = r->new_index; - } - else if(r->new_index == *index && *file == RC_FILE_TEMPORARY) { - *index = r->temp_index; - } -} - -static void rename_all( - struct radeon_compiler *c, - struct rc_instruction * start, - unsigned int old, - unsigned int new, - unsigned int temp) -{ - struct rc_instruction * inst; - struct reg_rename r; - r.old_index = old; - r.new_index = new; - r.temp_index = temp; - for(inst = start; inst != &c->Program.Instructions; - inst = inst->Next) { - rc_remap_registers(inst, rename_reg, &r); - } -} +#include "radeon_program.h" /** * This function renames registers in an attempt to get the code close to * SSA form. After this function has completed, most of the register are only - * written to one time, with a few exceptions. For example, this block of code - * will not be modified by this function: - * Mov Temp[0].x Const[0].x - * Mov Temp[0].y Const[0].y - * Basically, destination registers will be renamed if: - * 1. There have been no previous writes to that register - * or - * 2. If the instruction is writting to the exact components (no more, no less) - * of a register that has been written to by previous instructions. + * written to one time, with a few exceptions. * * This function assumes all the instructions are still of type * RC_INSTRUCTION_NORMAL. */ void rc_rename_regs(struct radeon_compiler *c, void *user) { - unsigned int cur_index = 0; - unsigned int icount; + unsigned int i, used_length; + int new_index; struct rc_instruction * inst; - unsigned int * masks; + struct rc_reader_data reader_data; + unsigned char * used; - /* The number of instructions in the program is also the maximum - * number of temp registers that could potentially be used. */ - icount = rc_recompute_ips(c); - masks = memory_pool_malloc(&c->Pool, icount * sizeof(unsigned int)); - memset(masks, 0, icount * sizeof(unsigned int)); + used_length = 2 * rc_recompute_ips(c); + used = memory_pool_malloc(&c->Pool, sizeof(unsigned char) * used_length); + memset(used, 0, sizeof(unsigned char) * used_length); + rc_get_used_temporaries(c, used, used_length); for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { - const struct rc_opcode_info * info; - unsigned int old_index, temp_index; - struct rc_dst_register * dst; - if(inst->Type != RC_INSTRUCTION_NORMAL) { - rc_error(c, "%s only works with normal instructions.", - __FUNCTION__); - return; - } - dst = &inst->U.I.DstReg; - info = rc_get_opcode_info(inst->U.I.Opcode); - if(!info->HasDstReg || dst->File != RC_FILE_TEMPORARY) { + + if (inst->U.I.DstReg.File != RC_FILE_TEMPORARY) continue; + + rc_get_readers(c, inst, &reader_data, NULL, NULL, NULL); + + if (reader_data.Abort || reader_data.ReaderCount == 0) + continue; + + new_index = rc_find_free_temporary_list(c, used, used_length, + RC_MASK_XYZW); + if (new_index < 0) { + rc_error(c, "Ran out of temporary registers\n"); + return; } - if(dst->Index >= icount || !masks[dst->Index] || - masks[dst->Index] == dst->WriteMask) { - old_index = dst->Index; - /* We need to set dst->Index here so get free temporary - * will work. */ - dst->Index = cur_index++; - temp_index = rc_find_free_temporary(c); - rename_all(c, inst->Next, old_index, - dst->Index, temp_index); + + reader_data.Writer->U.I.DstReg.Index = new_index; + for(i = 0; i < reader_data.ReaderCount; i++) { + reader_data.Readers[i].U.Src->Index = new_index; } - assert(dst->Index < icount); - masks[dst->Index] |= dst->WriteMask; } } diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c index 9fbd36bfe6..c288834d24 100644 --- a/src/mesa/drivers/dri/r300/r300_context.c +++ b/src/mesa/drivers/dri/r300/r300_context.c @@ -86,6 +86,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define need_GL_EXT_stencil_two_side #define need_GL_ATI_separate_stencil #define need_GL_NV_vertex_program +#define need_GL_OES_EGL_image #include "main/remap_helper.h" @@ -134,6 +135,9 @@ static const struct dri_extension card_extensions[] = { {"GL_MESAX_texture_float", NULL}, {"GL_NV_blend_square", NULL}, {"GL_NV_vertex_program", GL_NV_vertex_program_functions}, +#if FEATURE_OES_EGL_image + {"GL_OES_EGL_image", GL_OES_EGL_image_functions }, +#endif {NULL, NULL} /* *INDENT-ON* */ }; diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c index 81769e1ee5..0c4d8537c6 100644 --- a/src/mesa/drivers/dri/r300/r300_draw.c +++ b/src/mesa/drivers/dri/r300/r300_draw.c @@ -717,6 +717,10 @@ static void r300DrawPrims(struct gl_context *ctx, GLuint max_index) { GLboolean retval; + struct r300_context *r300 = R300_CONTEXT(ctx); + radeonContextPtr radeon = &r300->radeon; + + radeon_prepare_render(radeon); /* This check should get folded into just the places that * min/max index are really needed. diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index 821318e7a5..44090ec289 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -327,8 +327,6 @@ void r300RunRenderPrimitive(struct gl_context * ctx, int start, int end, int pri BATCH_LOCALS(&rmesa->radeon); int type, num_verts; - radeon_prepare_render(&rmesa->radeon); - type = r300PrimitiveType(rmesa, prim); num_verts = r300NumVerts(rmesa, end - start, prim); diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c index a6bda0e499..de66293999 100644 --- a/src/mesa/drivers/dri/r300/r300_tex.c +++ b/src/mesa/drivers/dri/r300/r300_tex.c @@ -382,5 +382,9 @@ void r300InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *fun functions->GenerateMipmap = radeonGenerateMipmap; +#if FEATURE_OES_EGL_image + functions->EGLImageTargetTexture2D = radeon_image_target_texture_2d; +#endif + driInitTextureFormats(); } diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c index 0116c5d2fa..ed9955b05d 100644 --- a/src/mesa/drivers/dri/r300/r300_texstate.c +++ b/src/mesa/drivers/dri/r300/r300_texstate.c @@ -428,6 +428,7 @@ void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_format radeonTexObjPtr t; uint32_t pitch_val; uint32_t internalFormat, type, format; + gl_format texFormat; type = GL_BGRA; format = GL_UNSIGNED_BYTE; @@ -467,9 +468,6 @@ void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_format radeon_miptree_unreference(&t->mt); radeon_miptree_unreference(&rImage->mt); - _mesa_init_teximage_fields(radeon->glCtx, target, texImage, - rb->base.Width, rb->base.Height, 1, 0, rb->cpp); - texImage->RowStride = rb->pitch / rb->cpp; rImage->bo = rb->bo; radeon_bo_ref(rImage->bo); t->bo = rb->bo; @@ -481,22 +479,35 @@ void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_format pitch_val = rb->pitch; switch (rb->cpp) { case 4: - if (texture_format == __DRI_TEXTURE_FORMAT_RGB) + if (texture_format == __DRI_TEXTURE_FORMAT_RGB) { + texFormat = MESA_FORMAT_RGB888; t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8); - else + } + else { + texFormat = MESA_FORMAT_ARGB8888; t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8); + } pitch_val /= 4; break; case 3: default: + texFormat = MESA_FORMAT_RGB888; t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8); pitch_val /= 4; break; case 2: + texFormat = MESA_FORMAT_RGB565; t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5); pitch_val /= 2; break; } + + _mesa_init_teximage_fields(radeon->glCtx, target, texImage, + rb->base.Width, rb->base.Height, 1, 0, + rb->cpp, texFormat); + texImage->RowStride = rb->pitch / rb->cpp; + + pitch_val--; t->pp_txsize = (((R300_TX_WIDTHMASK_MASK & ((rb->base.Width - 1) << R300_TX_WIDTHMASK_SHIFT))) | ((R300_TX_HEIGHTMASK_MASK & ((rb->base.Height - 1) << R300_TX_HEIGHTMASK_SHIFT)))); |