diff options
Diffstat (limited to 'src/mesa')
| -rw-r--r-- | src/mesa/drivers/dri/r300/compiler/radeon_optimize.c | 226 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c | 58 | 
2 files changed, 200 insertions, 84 deletions
| diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c index e01ba85aa5..3ff07d6039 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c @@ -38,6 +38,10 @@ struct peephole_state {  	unsigned int WriteMask;  }; +typedef void (*rc_presub_replace_fn)(struct peephole_state *, +						struct rc_instruction *, +						unsigned int); +  static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)  {  	struct rc_src_register combine; @@ -516,68 +520,26 @@ static void peephole_scan_write(void * data, struct rc_instruction * inst,  	}  } -/** - * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1] - * Use the presubtract 1 - src0 for all readers of TEMP[0].  The first source - * of the add instruction must have the constatnt 1 swizzle.  This function - * does not check const registers to see if their value is 1.0, so it should - * be called after the constant_folding optimization. - * @return  - * 	0 if the ADD instruction is still part of the program. - * 	1 if the ADD instruction is no longer part of the program. - */ -static int peephole_add_presub_inv( +static int presub_helper(  	struct radeon_compiler * c, -	struct rc_instruction * inst_add) +	struct peephole_state * s, +	rc_presubtract_op presub_opcode, +	rc_presub_replace_fn presub_replace)  { -	unsigned int i, swz, mask; +	struct rc_instruction * inst;  	unsigned int can_remove = 0;  	unsigned int cant_sub = 0; -	struct rc_instruction * inst; -	struct peephole_state s; - -	if (inst_add->U.I.SaturateMode) -		return 0; - -	mask = inst_add->U.I.DstReg.WriteMask; - -	/* Check if src0 is 1. */ -	/* XXX It would be nice to use is_src_uniform_constant here, but that -	 * function only works if the register's file is RC_FILE_NONE */ -	for(i = 0; i < 4; i++ ) { -		swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i); -		if(((1 << i) & inst_add->U.I.DstReg.WriteMask) -						&& swz != RC_SWIZZLE_ONE) { -			return 0; -		} -	} -	/* Check src1. */ -	if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) != -						inst_add->U.I.DstReg.WriteMask -		|| inst_add->U.I.SrcReg[1].Abs -		|| (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY -			&& inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT) -		|| src_has_const_swz(inst_add->U.I.SrcReg[1])) { - -		return 0; -	} - -	/* Setup the peephole_state information. */ -	s.Inst = inst_add; -	s.WriteMask = inst_add->U.I.DstReg.WriteMask; - -	/* For all instructions that read inst_add->U.I.DstReg before it is -	 * written again, use the 1 - src0 presubtact instead. */ -	for(inst = inst_add->Next; inst != &c->Program.Instructions; +	for(inst = s->Inst->Next; inst != &c->Program.Instructions;  							inst = inst->Next) { +		unsigned int i;  		const struct rc_opcode_info * info =  					rc_get_opcode_info(inst->U.I.Opcode);  		for(i = 0; i < info->NumSrcRegs; i++) { -			if(inst_add->U.I.DstReg.WriteMask != +			if(s->Inst->U.I.DstReg.WriteMask !=  					src_reads_dst_mask(inst->U.I.SrcReg[i], -						inst_add->U.I.DstReg)) { +						s->Inst->U.I.DstReg)) {  				continue;  			}  			if (cant_sub) { @@ -601,47 +563,173 @@ static int peephole_add_presub_inv(  			 * instruction, unless the two prsubtract operations  			 * are the same and read from the same registers. */  			if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) { -				if (inst->U.I.PreSub.Opcode != RC_PRESUB_INV +				if (inst->U.I.PreSub.Opcode != presub_opcode  					|| inst->U.I.PreSub.SrcReg[0].File != -						inst_add->U.I.SrcReg[1].File +						s->Inst->U.I.SrcReg[1].File  					|| inst->U.I.PreSub.SrcReg[0].Index != -						inst_add->U.I.SrcReg[1].Index) { +						s->Inst->U.I.SrcReg[1].Index) {  					can_remove = 0;  					break;  				}  			} -			/* We must be careful not to modify inst_add, since it -			 * is possible it will remain part of the program. */ -			inst->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1]; -			inst->U.I.PreSub.SrcReg[0].Negate = 0; -			inst->U.I.PreSub.Opcode = RC_PRESUB_INV; -			inst->U.I.SrcReg[i] = chain_srcregs(inst->U.I.SrcReg[i], -						inst->U.I.PreSub.SrcReg[0]); - -			inst->U.I.SrcReg[i].File = RC_FILE_PRESUB; -			inst->U.I.SrcReg[i].Index = RC_PRESUB_INV; +			presub_replace(s, inst, i);  			can_remove = 1;  		}  		if(!can_remove)  			break; -		rc_for_all_writes_mask(inst, peephole_scan_write, &s); +		rc_for_all_writes_mask(inst, peephole_scan_write, s);  		/* If all components of inst_add's destination register have  		 * been written to by subsequent instructions, the original  		 * value of the destination register is no longer valid and  		 * we can't keep doing substitutions. */ -		if (!s.WriteMask){ +		if (!s->WriteMask){  			break;  		}  		/* Make this instruction doesn't write to the presubtract source. */  		if (inst->U.I.DstReg.WriteMask & -				src_reads_dst_mask(inst_add->U.I.SrcReg[1], +				src_reads_dst_mask(s->Inst->U.I.SrcReg[1],  							inst->U.I.DstReg)  				|| info->IsFlowControl) {  			cant_sub = 1;  		}  	} -	if(can_remove) { +	return can_remove; +} + +static void presub_replace_add(struct peephole_state *s, +						struct rc_instruction * inst, +						unsigned int src_index) +{ +	inst->U.I.PreSub.SrcReg[0] = s->Inst->U.I.SrcReg[0]; +	inst->U.I.PreSub.SrcReg[1] = s->Inst->U.I.SrcReg[1]; +	inst->U.I.PreSub.SrcReg[0].Negate = 0; +	inst->U.I.PreSub.SrcReg[1].Negate = 0; +	inst->U.I.PreSub.Opcode = RC_PRESUB_ADD; +	inst->U.I.SrcReg[src_index] = chain_srcregs(inst->U.I.SrcReg[src_index], +						inst->U.I.PreSub.SrcReg[0]); +	inst->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; +	inst->U.I.SrcReg[src_index].Index = RC_PRESUB_ADD; +} + +static int peephole_add_presub_add( +	struct radeon_compiler * c, +	struct rc_instruction * inst_add) +{ +	struct rc_src_register * src0 = NULL; +	struct rc_src_register * src1 = NULL; +	unsigned int i; +	struct peephole_state s; + +	if (inst_add->U.I.PreSub.Opcode != RC_PRESUB_NONE) +		return 0; + +	if (inst_add->U.I.SaturateMode) +		return 0; + +	if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle) +		return 0; + +	/* src0 and src1 can't have absolute values only one can be negative and they must be all negative or all positive. */ +	for (i = 0; i < 2; i++) { +		if (inst_add->U.I.SrcReg[i].Abs) +			return 0; +		if ((inst_add->U.I.SrcReg[i].Negate +					& inst_add->U.I.DstReg.WriteMask) == +						inst_add->U.I.DstReg.WriteMask) { +			src0 = &inst_add->U.I.SrcReg[i]; +		} else if (!src1) { +			src1 = &inst_add->U.I.SrcReg[i]; +		} else { +			src0 = &inst_add->U.I.SrcReg[i]; +		} +	} + +	if (!src1) +		return 0; + +	/* XXX Only do add for now. */ +	if (src0->Negate) +		return 0; + +	s.Inst = inst_add; +	s.WriteMask = inst_add->U.I.DstReg.WriteMask; +	if (presub_helper(c, &s, RC_PRESUB_ADD, presub_replace_add)) { +		rc_remove_instruction(inst_add); +		return 1; +	} +	return 0; +} + +static void presub_replace_inv(struct peephole_state * s, +						struct rc_instruction * inst, +						unsigned int src_index) +{ +	/* We must be careful not to modify s->Inst, since it +	 * is possible it will remain part of the program.  +	 * XXX Maybe pass a struct instead of a pointer for s->Inst.*/ +	inst->U.I.PreSub.SrcReg[0] = s->Inst->U.I.SrcReg[1]; +	inst->U.I.PreSub.SrcReg[0].Negate = 0; +	inst->U.I.PreSub.Opcode = RC_PRESUB_INV; +	inst->U.I.SrcReg[src_index] = chain_srcregs(inst->U.I.SrcReg[src_index], +						inst->U.I.PreSub.SrcReg[0]); + +	inst->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; +	inst->U.I.SrcReg[src_index].Index = RC_PRESUB_INV; +} + +/** + * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1] + * Use the presubtract 1 - src0 for all readers of TEMP[0].  The first source + * of the add instruction must have the constatnt 1 swizzle.  This function + * does not check const registers to see if their value is 1.0, so it should + * be called after the constant_folding optimization. + * @return  + * 	0 if the ADD instruction is still part of the program. + * 	1 if the ADD instruction is no longer part of the program. + */ +static int peephole_add_presub_inv( +	struct radeon_compiler * c, +	struct rc_instruction * inst_add) +{ +	unsigned int i, swz, mask; +	struct peephole_state s; + +	if (inst_add->U.I.PreSub.Opcode != RC_PRESUB_NONE) +		return 0; + +	if (inst_add->U.I.SaturateMode) +		return 0; + +	mask = inst_add->U.I.DstReg.WriteMask; + +	/* Check if src0 is 1. */ +	/* XXX It would be nice to use is_src_uniform_constant here, but that +	 * function only works if the register's file is RC_FILE_NONE */ +	for(i = 0; i < 4; i++ ) { +		swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i); +		if(((1 << i) & inst_add->U.I.DstReg.WriteMask) +						&& swz != RC_SWIZZLE_ONE) { +			return 0; +		} +	} + +	/* Check src1. */ +	if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) != +						inst_add->U.I.DstReg.WriteMask +		|| inst_add->U.I.SrcReg[1].Abs +		|| (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY +			&& inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT) +		|| src_has_const_swz(inst_add->U.I.SrcReg[1])) { + +		return 0; +	} + +	/* Setup the peephole_state information. */ +	s.Inst = inst_add; +	s.WriteMask = inst_add->U.I.DstReg.WriteMask; + +	if (presub_helper(c, &s, RC_PRESUB_INV, presub_replace_inv)) {  		rc_remove_instruction(inst_add);  		return 1;  	} @@ -660,6 +748,8 @@ static int peephole(struct radeon_compiler * c, struct rc_instruction * inst)  		if (c->has_presub) {  			if(peephole_add_presub_inv(c, inst))  				return 1; +			if(peephole_add_presub_add(c, inst)) +				return 1;  		}  		break;  	default: diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c index 32c54fd74b..5269d65985 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c @@ -300,6 +300,7 @@ static int destructive_merge_instructions(  		for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) {  			unsigned int arg;  			int free_source; +			unsigned int one_way = 0;  			struct radeon_pair_instruction_source srcp =  						alpha->RGB.Src[srcp_src];  			struct radeon_pair_instruction_source temp; @@ -307,14 +308,27 @@ static int destructive_merge_instructions(  			 * 3rd arg of 0 means this is not an alpha source. */  			free_source = rc_pair_alloc_source(rgb, 1, 0,  							srcp.File, srcp.Index); -			/* If free_source == srcp_src, then either the -			 * presubtract source is already in the correct place. */ -			if (free_source == srcp_src) -				continue;  			/* If free_source < 0 then there are no free source  			 * slots. */  			if (free_source < 0)  				return 0; + +			temp = rgb->RGB.Src[srcp_src]; +			rgb->RGB.Src[srcp_src] = rgb->RGB.Src[free_source]; +			/* srcp needs src0 and src1 to be the same */ +			if (free_source < srcp_src) { +				if (!temp.Used) +					continue; +				free_source = rc_pair_alloc_source(rgb, 1, 0, +							srcp.File, srcp.Index); +				one_way = 1; +			} else { +				rgb->RGB.Src[free_source] = temp; +			} +			/* If free_source == srcp_src, then the presubtract +			 * source is already in the correct place. */ +			if (free_source == srcp_src) +				continue;  			/* Shuffle the sources, so we can put the  			 * presubtract source in the correct place. */  			for (arg = 0; arg < rgb_info->NumSrcRegs; arg++) { @@ -331,12 +345,11 @@ static int destructive_merge_instructions(  				/* We need to do this just in case register  				 * is one of the sources already, but in the  				 * wrong spot. */ -				else if(rgb->RGB.Arg[arg].Source == free_source) +				else if(rgb->RGB.Arg[arg].Source == free_source +								&& !one_way) {  					rgb->RGB.Arg[arg].Source = srcp_src; +				}  			} -			temp = rgb->RGB.Src[srcp_src]; -			rgb->RGB.Src[srcp_src] = rgb->RGB.Src[free_source]; -			rgb->RGB.Src[free_source] = temp;  		}  	} @@ -352,6 +365,7 @@ static int destructive_merge_instructions(  		for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) {  			unsigned int arg;  			int free_source; +			unsigned int one_way = 0;  			struct radeon_pair_instruction_source srcp =  						alpha->Alpha.Src[srcp_src];  			struct radeon_pair_instruction_source temp; @@ -359,14 +373,27 @@ static int destructive_merge_instructions(  			 * 3rd arg of 1 means this is an alpha source. */  			free_source = rc_pair_alloc_source(rgb, 0, 1,  							srcp.File, srcp.Index); -			/* If free_source == srcp_src, then either the -			 * presubtract source is already in the correct place. */ -			if (free_source == srcp_src) -				continue;  			/* If free_source < 0 then there are no free source  			 * slots. */  			if (free_source < 0)  				return 0; + +			temp = rgb->Alpha.Src[srcp_src]; +			rgb->Alpha.Src[srcp_src] = rgb->Alpha.Src[free_source]; +			/* srcp needs src0 and src1 to be the same. */ +			if (free_source < srcp_src) { +				if (!temp.Used) +					continue; +				free_source = rc_pair_alloc_source(rgb, 0, 1, +							temp.File, temp.Index); +				one_way = 1; +			} else { +				rgb->Alpha.Src[free_source] = temp; +			} +			/* If free_source == srcp_src, then the presubtract +			 * source is already in the correct place. */ +			if (free_source == srcp_src) +				continue;  			/* Shuffle the sources, so we can put the  			 * presubtract source in the correct place. */  			for(arg = 0; arg < rgb_info->NumSrcRegs; arg++) { @@ -380,12 +407,11 @@ static int destructive_merge_instructions(  				}  				if (rgb->RGB.Arg[arg].Source == srcp_src)  					rgb->RGB.Arg[arg].Source = free_source; -				else if (rgb->RGB.Arg[arg].Source == free_source) +				else if (rgb->RGB.Arg[arg].Source == free_source +								&& !one_way) {  					rgb->RGB.Arg[arg].Source = srcp_src; +				}  			} -			temp = rgb->Alpha.Src[srcp_src]; -			rgb->Alpha.Src[srcp_src] = rgb->Alpha.Src[free_source]; -			rgb->Alpha.Src[free_source] = temp;  		}  	} | 
