diff options
Diffstat (limited to 'src')
8 files changed, 308 insertions, 47 deletions
| diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile index e432afc3d4..34d22b4559 100644 --- a/src/mesa/drivers/dri/r300/compiler/Makefile +++ b/src/mesa/drivers/dri/r300/compiler/Makefile @@ -21,6 +21,7 @@ C_SOURCES = \  		radeon_dataflow.c \  		radeon_dataflow_deadcode.c \  		radeon_dataflow_swizzles.c \ +		radeon_optimize.c \  		r3xx_fragprog.c \  		r300_fragprog.c \  		r300_fragprog_swizzle.c \ diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index 25bf373b6f..3e88ccbc46 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -152,6 +152,10 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)  	debug_program_log(c, "after deadcode"); +	rc_optimize(&c->Base); + +	debug_program_log(c, "after dataflow optimize"); +  	rc_dataflow_swizzles(&c->Base);  	if (c->Base.Error)  		return; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c index 16e2f3a218..0e6c62541f 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c @@ -30,7 +30,7 @@  #include "radeon_program.h" -static void reads_normal(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata) +static void reads_normal(struct rc_instruction * fullinst, rc_read_write_chan_fn cb, void * userdata)  {  	struct rc_sub_instruction * inst = &fullinst->U.I;  	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); @@ -46,18 +46,15 @@ static void reads_normal(struct rc_instruction * fullinst, rc_read_write_fn cb,  		refmask &= RC_MASK_XYZW; -		for(unsigned int chan = 0; chan < 4; ++chan) { -			if (GET_BIT(refmask, chan)) { -				cb(userdata, fullinst, inst->SrcReg[src].File, inst->SrcReg[src].Index, chan); -			} -		} +		if (refmask) +			cb(userdata, fullinst, inst->SrcReg[src].File, inst->SrcReg[src].Index, refmask);  		if (refmask && inst->SrcReg[src].RelAddr)  			cb(userdata, fullinst, RC_FILE_ADDRESS, 0, RC_MASK_X);  	}  } -static void reads_pair(struct rc_instruction * fullinst,  rc_read_write_fn cb, void * userdata) +static void reads_pair(struct rc_instruction * fullinst,  rc_read_write_mask_fn cb, void * userdata)  {  	struct rc_pair_instruction * inst = &fullinst->U.P;  	unsigned int refmasks[3] = { 0, 0, 0 }; @@ -84,27 +81,23 @@ static void reads_pair(struct rc_instruction * fullinst,  rc_read_write_fn cb, v  	}  	for(unsigned int src = 0; src < 3; ++src) { -		if (inst->RGB.Src[src].Used) { -			for(unsigned int chan = 0; chan < 3; ++chan) { -				if (GET_BIT(refmasks[src], chan)) -					cb(userdata, fullinst, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, chan); -			} -		} +		if (inst->RGB.Src[src].Used && (refmasks[src] & RC_MASK_XYZ)) +			cb(userdata, fullinst, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, +			   refmasks[src] & RC_MASK_XYZ); -		if (inst->Alpha.Src[src].Used) { -			if (GET_BIT(refmasks[src], 3)) -				cb(userdata, fullinst, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, 3); -		} +		if (inst->Alpha.Src[src].Used && (refmasks[src] & RC_MASK_W)) +			cb(userdata, fullinst, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, RC_MASK_W);  	}  }  /** - * Calls a callback function for all sourced register channels. + * Calls a callback function for all register reads.   * - * This is conservative, i.e. channels may be called multiple times, - * and the writemask of the instruction is not taken into account. + * This is conservative, i.e. if the same register is referenced multiple times, + * the callback may also be called multiple times. + * Also, the writemask of the instruction is not taken into account.   */ -void rc_for_all_reads(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata) +void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata)  {  	if (inst->Type == RC_INSTRUCTION_NORMAL) {  		reads_normal(inst, cb, userdata); @@ -115,44 +108,39 @@ void rc_for_all_reads(struct rc_instruction * inst, rc_read_write_fn cb, void * -static void writes_normal(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata) +static void writes_normal(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata)  {  	struct rc_sub_instruction * inst = &fullinst->U.I;  	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); -	if (opcode->HasDstReg) { -		for(unsigned int chan = 0; chan < 4; ++chan) { -			if (GET_BIT(inst->DstReg.WriteMask, chan)) -				cb(userdata, fullinst, inst->DstReg.File, inst->DstReg.Index, chan); -		} -	} +	if (opcode->HasDstReg && inst->DstReg.WriteMask) +		cb(userdata, fullinst, inst->DstReg.File, inst->DstReg.Index, inst->DstReg.WriteMask);  	if (inst->WriteALUResult) -		cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, 0); +		cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X);  } -static void writes_pair(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata) +static void writes_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata)  {  	struct rc_pair_instruction * inst = &fullinst->U.P; -	for(unsigned int chan = 0; chan < 3; ++chan) { -		if (GET_BIT(inst->RGB.WriteMask, chan)) -			cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->RGB.DestIndex, chan); -	} +	if (inst->RGB.WriteMask) +		cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->RGB.DestIndex, inst->RGB.WriteMask);  	if (inst->Alpha.WriteMask) -		cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->Alpha.DestIndex, 3); +		cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->Alpha.DestIndex, RC_MASK_W);  	if (inst->WriteALUResult) -		cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, 0); +		cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X);  }  /** - * Calls a callback function for all written register channels. + * Calls a callback function for all register writes in the instruction, + * reporting writemasks to the callback function.   *   * \warning Does not report output registers for paired instructions!   */ -void rc_for_all_writes(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata) +void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata)  {  	if (inst->Type == RC_INSTRUCTION_NORMAL) {  		writes_normal(inst, cb, userdata); @@ -162,6 +150,48 @@ void rc_for_all_writes(struct rc_instruction * inst, rc_read_write_fn cb, void *  } +struct mask_to_chan_data { +	void * UserData; +	rc_read_write_chan_fn Fn; +}; + +static void mask_to_chan_cb(void * data, struct rc_instruction * inst, +		rc_register_file file, unsigned int index, unsigned int mask) +{ +	struct mask_to_chan_data * d = data; +	for(unsigned int chan = 0; chan < 4; ++chan) { +		if (GET_BIT(mask, chan)) +			d->Fn(d->UserData, inst, file, index, chan); +	} +} + +/** + * Calls a callback function for all sourced register channels. + * + * This is conservative, i.e. channels may be called multiple times, + * and the writemask of the instruction is not taken into account. + */ +void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata) +{ +	struct mask_to_chan_data d; +	d.UserData = userdata; +	d.Fn = cb; +	rc_for_all_reads_mask(inst, &mask_to_chan_cb, &d); +} + +/** + * Calls a callback function for all written register channels. + * + * \warning Does not report output registers for paired instructions! + */ +void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata) +{ +	struct mask_to_chan_data d; +	d.UserData = userdata; +	d.Fn = cb; +	rc_for_all_writes_mask(inst, &mask_to_chan_cb, &d); +} +  static void remap_normal_instruction(struct rc_instruction * fullinst,  		rc_remap_register_fn cb, void * userdata)  { diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h index 62cda20eea..60a6e192a9 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h @@ -39,10 +39,15 @@ struct rc_swizzle_caps;   * Help analyze and modify the register accesses of instructions.   */  /*@{*/ -typedef void (*rc_read_write_fn)(void * userdata, struct rc_instruction * inst, +typedef void (*rc_read_write_chan_fn)(void * userdata, struct rc_instruction * inst,  			rc_register_file file, unsigned int index, unsigned int chan); -void rc_for_all_reads(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata); -void rc_for_all_writes(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata); +void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata); +void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata); + +typedef void (*rc_read_write_mask_fn)(void * userdata, struct rc_instruction * inst, +			rc_register_file file, unsigned int index, unsigned int mask); +void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata); +void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata);  typedef void (*rc_remap_register_fn)(void * userdata, struct rc_instruction * inst,  			rc_register_file * pfile, unsigned int * pindex); @@ -60,4 +65,6 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f  void rc_dataflow_swizzles(struct radeon_compiler * c);  /*@}*/ +void rc_optimize(struct radeon_compiler * c); +  #endif /* RADEON_DATAFLOW_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c index d889612f4f..863654cf68 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c @@ -150,7 +150,7 @@ static void allocate_and_insert_proxies(struct emulate_branch_state * s,  	sap.Proxies = proxies;  	for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) { -		rc_for_all_writes(inst, scan_write, &sap); +		rc_for_all_writes_mask(inst, scan_write, &sap);  		rc_remap_registers(inst, remap_proxy_function, &sap);  	} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c new file mode 100644 index 0000000000..80e3eeacfc --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c @@ -0,0 +1,219 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_dataflow.h" + +#include "radeon_compiler.h" + + +static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner) +{ +	struct rc_src_register combine; +	combine.File = inner.File; +	combine.Index = inner.Index; +	combine.RelAddr = inner.RelAddr; +	if (outer.Abs) { +		combine.Abs = 1; +		combine.Negate = outer.Negate; +	} else { +		combine.Abs = inner.Abs; +		combine.Negate = 0; +		for(unsigned int chan = 0; chan < 4; ++chan) { +			unsigned int swz = GET_SWZ(outer.Swizzle, chan); +			if (swz < 4) +				combine.Negate |= GET_BIT(inner.Negate, swz) << chan; +		} +		combine.Negate ^= outer.Negate; +	} +	combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle); +	return combine; +} + +struct peephole_state { +	struct radeon_compiler * C; +	struct rc_instruction * Mov; +	unsigned int Conflict:1; + +	/** Whether Mov's source has been clobbered */ +	unsigned int SourceClobbered:1; + +	/** Which components of Mov's destination register are still from that Mov? */ +	unsigned int MovMask:4; + +	/** Which components of Mov's destination register are clearly *not* from that Mov */ +	unsigned int DefinedMask:4; + +	/** Which components of Mov's source register are sourced */ +	unsigned int SourcedMask:4; + +	/** Branch depth beyond Mov; negative value indicates we left the Mov's block */ +	int BranchDepth; +}; + +static void peephole_scan_read(void * data, struct rc_instruction * inst, +		rc_register_file file, unsigned int index, unsigned int mask) +{ +	struct peephole_state * s = data; + +	if (file != RC_FILE_TEMPORARY || index != s->Mov->U.I.DstReg.Index) +		return; + +	if ((mask & s->MovMask) == mask) { +		if (s->SourceClobbered) { +			s->Conflict = 1; +		} +	} else if ((mask & s->DefinedMask) == mask) { +		/* read from something entirely written by other instruction: this is okay */ +	} else { +		/* read from component combination that is not well-defined without +		 * the MOV: cannot remove it */ +		s->Conflict = 1; +	} +} + +static void peephole_scan_write(void * data, struct rc_instruction * inst, +		rc_register_file file, unsigned int index, unsigned int mask) +{ +	struct peephole_state * s = data; + +	if (s->BranchDepth < 0) +		return; + +	if (file == s->Mov->U.I.DstReg.File && index == s->Mov->U.I.DstReg.Index) { +		s->MovMask &= ~mask; +		if (s->BranchDepth == 0) +			s->DefinedMask |= mask; +		else +			s->DefinedMask &= ~mask; +	} else if (file == s->Mov->U.I.SrcReg[0].File && index == s->Mov->U.I.SrcReg[0].Index) { +		if (mask & s->SourcedMask) +			s->SourceClobbered = 1; +	} else if (s->Mov->U.I.SrcReg[0].RelAddr && file == RC_FILE_ADDRESS) { +		s->SourceClobbered = 1; +	} +} + +static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mov) +{ +	struct peephole_state s; + +	if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY || inst_mov->U.I.WriteALUResult) +		return; + +	memset(&s, 0, sizeof(s)); +	s.C = c; +	s.Mov = inst_mov; +	s.MovMask = inst_mov->U.I.DstReg.WriteMask; +	s.DefinedMask = RC_MASK_XYZW & ~s.MovMask; + +	for(unsigned int chan = 0; chan < 4; ++chan) { +		unsigned int swz = GET_SWZ(inst_mov->U.I.SrcReg[0].Swizzle, chan); +		s.SourcedMask |= (1 << swz) & RC_MASK_XYZW; +	} + +	/* 1st pass: Check whether all subsequent readers can be changed */ +	for(struct rc_instruction * inst = inst_mov->Next; +	    inst != &c->Program.Instructions; +	    inst = inst->Next) { +		rc_for_all_reads_mask(inst, peephole_scan_read, &s); +		rc_for_all_writes_mask(inst, peephole_scan_write, &s); +		if (s.Conflict) +			return; + +		if (s.BranchDepth >= 0) { +			if (inst->U.I.Opcode == RC_OPCODE_IF) { +				s.BranchDepth++; +			} else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) { +				s.BranchDepth--; +				if (s.BranchDepth < 0) { +					s.DefinedMask &= ~s.MovMask; +					s.MovMask = 0; +				} +			} +		} +	} + +	if (s.Conflict) +		return; + +	/* 2nd pass: We can satisfy all readers, so switch them over all at once */ +	s.MovMask = inst_mov->U.I.DstReg.WriteMask; +	s.BranchDepth = 0; + +	for(struct rc_instruction * inst = inst_mov->Next; +	    inst != &c->Program.Instructions; +	    inst = inst->Next) { +		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + +		for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { +			if (inst->U.I.SrcReg[src].File == RC_FILE_TEMPORARY && +			    inst->U.I.SrcReg[src].Index == s.Mov->U.I.DstReg.Index) { +				unsigned int refmask = 0; + +				for(unsigned int chan = 0; chan < 4; ++chan) { +					unsigned int swz = GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan); +					refmask |= (1 << swz) & RC_MASK_XYZW; +				} + +				if ((refmask & s.MovMask) == refmask) +					inst->U.I.SrcReg[src] = chain_srcregs(inst->U.I.SrcReg[src], s.Mov->U.I.SrcReg[0]); +			} +		} + +		if (opcode->HasDstReg) { +			if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY && +			    inst->U.I.DstReg.Index == s.Mov->U.I.DstReg.Index) { +				s.MovMask &= ~inst->U.I.DstReg.WriteMask; +			} +		} + +		if (s.BranchDepth >= 0) { +			if (inst->U.I.Opcode == RC_OPCODE_IF) { +				s.BranchDepth++; +			} else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) { +				s.BranchDepth--; +				if (s.BranchDepth < 0) +					break; /* no more readers after this point */ +			} +		} +	} + +	/* Finally, remove the original MOV instruction */ +	rc_remove_instruction(inst_mov); +} + +void rc_optimize(struct radeon_compiler * c) +{ +	struct rc_instruction * inst = c->Program.Instructions.Next; +	while(inst != &c->Program.Instructions) { +		struct rc_instruction * cur = inst; +		inst = inst->Next; + +		if (cur->U.I.Opcode == RC_OPCODE_MOV) +			peephole(c, cur); +	} +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c index fdfee86701..8a912da461 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c @@ -159,7 +159,7 @@ static int try_add_live_intervals(struct regalloc_state * s,  }  static void scan_callback(void * data, struct rc_instruction * inst, -		rc_register_file file, unsigned int index, unsigned int chan) +		rc_register_file file, unsigned int index, unsigned int mask)  {  	struct regalloc_state * s = data;  	struct register_info * reg; @@ -191,8 +191,8 @@ static void compute_live_intervals(struct regalloc_state * s)  	for(struct rc_instruction * inst = s->C->Program.Instructions.Next;  	    inst != &s->C->Program.Instructions;  	    inst = inst->Next) { -		rc_for_all_reads(inst, scan_callback, s); -		rc_for_all_writes(inst, scan_callback, s); +		rc_for_all_reads_mask(inst, scan_callback, s); +		rc_for_all_writes_mask(inst, scan_callback, s);  	}  } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c index df67aafe02..a279549ff8 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c @@ -448,8 +448,8 @@ static void schedule_block(struct r300_fragment_program_compiler * c,  		 * counter-intuitive, to account for the case where an  		 * instruction writes to the same register as it reads  		 * from. */ -		rc_for_all_writes(inst, &scan_write, &s); -		rc_for_all_reads(inst, &scan_read, &s); +		rc_for_all_writes_chan(inst, &scan_write, &s); +		rc_for_all_reads_chan(inst, &scan_read, &s);  		DBG("%i: Has %i dependencies\n", inst->IP, s.Current->NumDependencies); | 
