diff options
Diffstat (limited to 'src/mesa/drivers/dri/r300/compiler')
25 files changed, 345 insertions, 275 deletions
| diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c index 782671bac0..deba9ca834 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c @@ -49,6 +49,11 @@ static void presub_string(char out[10], unsigned int inst)  	}  } +static int get_msb(unsigned int bit, unsigned int r400_ext_addr) +{ +	return (r400_ext_addr & bit) ? 1 << 5 : 0; +} +  /* just some random things... */  void r300FragmentProgramDump(struct radeon_compiler *c, void *user)  { @@ -61,16 +66,21 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user)  	fprintf(stderr, "Hardware program\n");  	fprintf(stderr, "----------------\n"); +	if (c->is_r400) { +		fprintf(stderr, "code_offset_ext: %08x\n", code->r400_code_offset_ext); +	}  	for (n = 0; n <= (code->config & 3); n++) {  		uint32_t code_addr = code->code_addr[3 - (code->config & 3) + n]; -		int alu_offset = (code_addr & R300_ALU_START_MASK) >> R300_ALU_START_SHIFT; -		int alu_end = (code_addr & R300_ALU_SIZE_MASK) >> R300_ALU_SIZE_SHIFT; +		unsigned int alu_offset = ((code_addr & R300_ALU_START_MASK) >> R300_ALU_START_SHIFT) + +				(((code->r400_code_offset_ext >> (24 - (n * 6))) & 0x7) << 6); +		unsigned int alu_end = ((code_addr & R300_ALU_SIZE_MASK) >> R300_ALU_SIZE_SHIFT) + +				(((code->r400_code_offset_ext >> (27 - (n * 6))) & 0x7) << 6);  		int tex_offset = (code_addr & R300_TEX_START_MASK) >> R300_TEX_START_SHIFT;  		int tex_end = (code_addr & R300_TEX_SIZE_MASK) >> R300_TEX_SIZE_SHIFT; -		fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, " -			"alu_end: %d, tex_end: %d  (code_addr: %08x)\n", n, +		fprintf(stderr, "NODE %d: alu_offset: %u, tex_offset: %d, " +			"alu_end: %u, tex_end: %d  (code_addr: %08x)\n", n,  			alu_offset, tex_offset, alu_end, tex_end, code_addr);  		if (n > 0 || (code->config & R300_PFS_CNTL_FIRST_NODE_HAS_TEX)) { @@ -125,11 +135,15 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user)  			for (j = 0; j < 3; ++j) {  				int regc = code->alu.inst[i].rgb_addr >> (j * 6);  				int rega = code->alu.inst[i].alpha_addr >> (j * 6); +				int msbc = get_msb(R400_ADDR_EXT_RGB_MSB_BIT(j), +					code->alu.inst[i].r400_ext_addr); +				int msba = get_msb(R400_ADDR_EXT_A_MSB_BIT(j), +					code->alu.inst[i].r400_ext_addr);  				sprintf(srcc[j], "%c%i", -					(regc & 32) ? 'c' : 't', regc & 31); +					(regc & 32) ? 'c' : 't', (regc & 31) | msbc);  				sprintf(srca[j], "%c%i", -					(rega & 32) ? 'c' : 't', rega & 31); +					(rega & 32) ? 'c' : 't', (rega & 31) | msba);  			}  			dstc[0] = 0; @@ -141,9 +155,14 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user)  				(code->alu.inst[i].  				 rgb_addr & R300_ALU_DSTC_REG_Z) ? "z" : "");  			if (flags[0] != 0) { +				unsigned int msb = get_msb( +					R400_ADDRD_EXT_RGB_MSB_BIT, +					code->alu.inst[i].r400_ext_addr); +  				sprintf(dstc, "t%i.%s ", -					(code->alu.inst[i]. -					 rgb_addr >> R300_ALU_DSTC_SHIFT) & 31, +					((code->alu.inst[i]. +					 rgb_addr >> R300_ALU_DSTC_SHIFT) +					 & 31) | msb,  					flags);  			}  			sprintf(flags, "%s%s%s", @@ -166,9 +185,13 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user)  			dsta[0] = 0;  			if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_REG) { +				unsigned int msb = get_msb( +					R400_ADDRD_EXT_A_MSB_BIT, +					code->alu.inst[i].r400_ext_addr);  				sprintf(dsta, "t%i.w ", -					(code->alu.inst[i]. -					 alpha_addr >> R300_ALU_DSTA_SHIFT) & 31); +					((code->alu.inst[i]. +					 alpha_addr >> R300_ALU_DSTA_SHIFT) & 31) +					 | msb);  			}  			if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_OUTPUT) {  				sprintf(tmp, "o%i.w ", diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c index 1db8678e89..28d132a5fe 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c @@ -64,6 +64,20 @@ struct r300_emit_state {  			__FILE__, __FUNCTION__, ##args);	\  	} while(0) +static unsigned int get_msbs_alu(unsigned int bits) +{ +	return (bits >> 6) & 0x7; +} + +/** + * @param lsbs The number of least significant bits + */ +static unsigned int get_msbs_tex(unsigned int bits, unsigned int lsbs) +{ +	return (bits >> lsbs) & 0x15; +} + +#define R400_EXT_GET_MSBS(x, lsbs, mask) (((x) >> lsbs) & mask)  /**   * Mark a temporary register as used. @@ -83,7 +97,7 @@ static unsigned int use_source(struct r300_fragment_program_code* code, struct r  		return src.Index | (1 << 5);  	} else if (src.File == RC_FILE_TEMPORARY) {  		use_temporary(code, src.Index); -		return src.Index; +		return src.Index & 0x1f;  	}  	return 0; @@ -151,11 +165,19 @@ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* i  	code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode);  	for(j = 0; j < 3; ++j) { +		/* Set the RGB address */  		unsigned int src = use_source(code, inst->RGB.Src[j]);  		unsigned int arg; +		if (inst->RGB.Src[j].Index >= R300_PFS_NUM_TEMP_REGS) +			code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_RGB_MSB_BIT(j); +  		code->alu.inst[ip].rgb_addr |= src << (6*j); +		/* Set the Alpha address */  		src = use_source(code, inst->Alpha.Src[j]); +		if (inst->Alpha.Src[j].Index >= R300_PFS_NUM_TEMP_REGS) +			code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_A_MSB_BIT(j); +  		code->alu.inst[ip].alpha_addr |= src << (6*j);  		arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle); @@ -223,8 +245,10 @@ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* i  	if (inst->RGB.WriteMask) {  		use_temporary(code, inst->RGB.DestIndex); +		if (inst->RGB.DestIndex >= R300_PFS_NUM_TEMP_REGS) +			code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_RGB_MSB_BIT;  		code->alu.inst[ip].rgb_addr |= -			(inst->RGB.DestIndex << R300_ALU_DSTC_SHIFT) | +			((inst->RGB.DestIndex & 0x1f) << R300_ALU_DSTC_SHIFT) |  			(inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT);  	}  	if (inst->RGB.OutputWriteMask) { @@ -236,8 +260,10 @@ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* i  	if (inst->Alpha.WriteMask) {  		use_temporary(code, inst->Alpha.DestIndex); +		if (inst->Alpha.DestIndex >= R300_PFS_NUM_TEMP_REGS) +			code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_A_MSB_BIT;  		code->alu.inst[ip].alpha_addr |= -			(inst->Alpha.DestIndex << R300_ALU_DSTA_SHIFT) | +			((inst->Alpha.DestIndex & 0x1f) << R300_ALU_DSTA_SHIFT) |  			R300_ALU_DSTA_REG;  	}  	if (inst->Alpha.OutputWriteMask) { @@ -269,6 +295,8 @@ static int finish_node(struct r300_emit_state * emit)  	unsigned tex_offset;  	unsigned tex_end; +	unsigned int alu_offset_msbs, alu_end_msbs; +  	if (code->alu.length == emit->node_first_alu) {  		/* Generate a single NOP for this node */  		struct rc_pair_instruction inst; @@ -301,13 +329,48 @@ static int finish_node(struct r300_emit_state * emit)  	 *  	 * Also note that the register specification from AMD is slightly  	 * incorrect in its description of this register. */ -	code->code_addr[emit->current_node] = -			(alu_offset << R300_ALU_START_SHIFT) | -			(alu_end << R300_ALU_SIZE_SHIFT) | -			(tex_offset << R300_TEX_START_SHIFT) | -			(tex_end << R300_TEX_SIZE_SHIFT) | -			emit->node_flags; +	code->code_addr[emit->current_node]  = +			((alu_offset << R300_ALU_START_SHIFT) +				& R300_ALU_START_MASK) +			| ((alu_end << R300_ALU_SIZE_SHIFT) +				& R300_ALU_SIZE_MASK) +			| ((tex_offset << R300_TEX_START_SHIFT) +				& R300_TEX_START_MASK) +			| ((tex_end << R300_TEX_SIZE_SHIFT) +				& R300_TEX_SIZE_MASK) +			| emit->node_flags +			| (get_msbs_tex(tex_offset, 5) +				<< R400_TEX_START_MSB_SHIFT) +			| (get_msbs_tex(tex_end, 5) +				<< R400_TEX_SIZE_MSB_SHIFT) +			; +	/* Write r400 extended instruction fields.  These will be ignored on +	 * r300 cards.  */ +	alu_offset_msbs = get_msbs_alu(alu_offset); +	alu_end_msbs = get_msbs_alu(alu_end); +	switch(emit->current_node) { +	case 0: +		code->r400_code_offset_ext |= +			alu_offset_msbs << R400_ALU_START3_MSB_SHIFT +			| alu_end_msbs << R400_ALU_SIZE3_MSB_SHIFT; +		break; +	case 1: +		code->r400_code_offset_ext |= +			alu_offset_msbs << R400_ALU_START2_MSB_SHIFT +			| alu_end_msbs << R400_ALU_SIZE2_MSB_SHIFT; +		break; +	case 2: +		code->r400_code_offset_ext |= +			alu_offset_msbs << R400_ALU_START1_MSB_SHIFT +			| alu_end_msbs << R400_ALU_SIZE1_MSB_SHIFT; +		break; +	case 3: +		code->r400_code_offset_ext |= +			alu_offset_msbs << R400_ALU_START0_MSB_SHIFT +			| alu_end_msbs << R400_ALU_SIZE0_MSB_SHIFT; +		break; +	}  	return 1;  } @@ -348,7 +411,7 @@ static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst)  	unsigned int opcode;  	PROG_CODE; -	if (code->tex.length >= R300_PFS_MAX_TEX_INST) { +	if (code->tex.length >= emit->compiler->Base.max_tex_insts) {  		error("Too many TEX instructions");  		return 0;  	} @@ -376,10 +439,17 @@ static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst)  	use_temporary(code, inst->U.I.SrcReg[0].Index);  	code->tex.inst[code->tex.length++] = -		(inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT) | -		(dest << R300_DST_ADDR_SHIFT) | -		(unit << R300_TEX_ID_SHIFT) | -		(opcode << R300_TEX_INST_SHIFT); +		((inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT) +			& R300_SRC_ADDR_MASK) +		| ((dest << R300_DST_ADDR_SHIFT) +			& R300_DST_ADDR_MASK) +		| (unit << R300_TEX_ID_SHIFT) +		| (opcode << R300_TEX_INST_SHIFT) +		| (inst->U.I.SrcReg[0].Index >= R300_PFS_NUM_TEMP_REGS ? +			R400_SRC_ADDR_EXT_BIT : 0) +		| (dest >= R300_PFS_NUM_TEMP_REGS ? +			R400_DST_ADDR_EXT_BIT : 0) +		;  	return 1;  } @@ -393,6 +463,7 @@ void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)  	struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;  	struct r300_emit_state emit;  	struct r300_fragment_program_code *code = &compiler->code->code.r300; +	unsigned int tex_end;  	memset(&emit, 0, sizeof(emit));  	emit.compiler = compiler; @@ -424,11 +495,28 @@ void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)  	finish_node(&emit);  	code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */ + +	/* Set r400 extended instruction fields.  These values will be ignored +	 * on r300 cards. */ +	code->r400_code_offset_ext |= +		(get_msbs_alu(0) +				<< R400_ALU_OFFSET_MSB_SHIFT) +		| (get_msbs_alu(code->alu.length - 1) +				<< R400_ALU_SIZE_MSB_SHIFT); + +	tex_end = code->tex.length ? code->tex.length - 1 : 0;  	code->code_offset = -		(0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) | -		((code->alu.length-1) << R300_PFS_CNTL_ALU_END_SHIFT) | -		(0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) | -		((code->tex.length ? code->tex.length-1 : 0) << R300_PFS_CNTL_TEX_END_SHIFT); +		((0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) +			& R300_PFS_CNTL_ALU_OFFSET_MASK) +		| (((code->alu.length - 1) << R300_PFS_CNTL_ALU_END_SHIFT) +			& R300_PFS_CNTL_ALU_END_MASK) +		| ((0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) +			& R300_PFS_CNTL_TEX_OFFSET_MASK) +		| ((tex_end << R300_PFS_CNTL_TEX_END_SHIFT) +			& R300_PFS_CNTL_TEX_END_MASK) +		| (get_msbs_tex(0, 5) << R400_TEX_START_MSB_SHIFT) +		| (get_msbs_tex(tex_end, 6) << R400_TEX_SIZE_MSB_SHIFT) +		;  	if (emit.current_node < 3) {  		int shift = 3 - emit.current_node; @@ -438,4 +526,11 @@ void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)  		for(i = 0; i < shift; ++i)  			code->code_addr[i] = 0;  	} + +	if (code->pixsize >= R300_PFS_NUM_TEMP_REGS +	    || code->alu.length > R300_PFS_MAX_ALU_INST +	    || code->tex.length > R300_PFS_MAX_TEX_INST) { + +		code->r390_mode = 1; +	}  } diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c index 05d3da8a10..5223aaa71a 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c @@ -98,9 +98,6 @@ static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)  	unsigned int relevant;  	int j; -	if (reg.Abs) -		reg.Negate = RC_MASK_NONE; -  	if (opcode == RC_OPCODE_KIL ||  	    opcode == RC_OPCODE_TEX ||  	    opcode == RC_OPCODE_TXB || @@ -140,9 +137,6 @@ static void r300_swizzle_split(  		struct rc_src_register src, unsigned int mask,  		struct rc_swizzle_split * split)  { -	if (src.Abs) -		src.Negate = RC_MASK_NONE; -  	split->NumPhases = 0;  	while(mask) { @@ -222,13 +216,14 @@ unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle)   */  unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle)  { +	unsigned int swz = GET_SWZ(swizzle, 0);  	if (src == RC_PAIR_PRESUB_SRC) { -		return R300_ALU_ARGA_SRCP_X + swizzle; +		return R300_ALU_ARGA_SRCP_X + swz;  	} -	if (swizzle < 3) -		return swizzle + 3*src; +	if (swz < 3) +		return swz + 3*src; -	switch(swizzle) { +	switch(swz) {  	case RC_SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src;  	case RC_SWIZZLE_ONE: return R300_ALU_ARGA_ONE;  	case RC_SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO; diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index e0d349b98c..9286733635 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -78,12 +78,32 @@ static void rc_rewrite_depth_out(struct radeon_compiler *cc, void *user)  	}  } +static int radeon_saturate_output( +		struct radeon_compiler * c, +		struct rc_instruction * inst, +		void* data) +{ +	const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode); + +	if (!info->HasDstReg || inst->U.I.DstReg.File != RC_FILE_OUTPUT) +		return 0; + +	inst->U.I.SaturateMode = RC_SATURATE_ZERO_ONE; +	return 1; +} +  void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)  {  	int is_r500 = c->Base.is_r500;  	int opt = !c->Base.disable_optimizations; +	int sat_out = c->state.frag_clamp;  	/* Lists of instruction transformations. */ +	struct radeon_program_transformation saturate_output[] = { +		{ &radeon_saturate_output, c }, +		{ 0, 0 } +	}; +  	struct radeon_program_transformation rewrite_tex[] = {  		{ &radeonTransformTEX, c },  		{ 0, 0 } @@ -113,6 +133,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)  		{"unroll loops",		1, is_r500,	rc_unroll_loops,		NULL},  		{"transform loops",		1, !is_r500,	rc_transform_loops,		NULL},  		{"emulate branches",		1, !is_r500,	rc_emulate_branches,		NULL}, +		{"saturate output writes",	1, sat_out,	rc_local_transform,		saturate_output},  		{"transform TEX",		1, 1,		rc_local_transform,		rewrite_tex},  		{"native rewrite",		1, is_r500,	rc_local_transform,		native_rewrite_r500},  		{"native rewrite",		1, !is_r500,	rc_local_transform,		native_rewrite_r300}, @@ -124,7 +145,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)  		/* This pass makes it easier for the scheduler to group TEX  		 * instructions and reduces the chances of creating too  		 * many texture indirections.*/ -		{"register rename",		1, !is_r500 || opt, rc_rename_regs,		NULL}, +		{"register rename",		1, !is_r500,	rc_rename_regs,			NULL},  		{"pair translate",		1, 1,		rc_pair_translate,		NULL},  		{"pair scheduling",		1, 1,		rc_pair_schedule,		NULL},  		{"register allocation",		1, opt,		rc_pair_regalloc,		NULL}, diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index 472029f63d..8ad2175ead 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -490,13 +490,6 @@ static void translate_vertex_program(struct radeon_compiler *c, void *user)  			continue;  		if (info->HasDstReg) { -			/* Relative addressing of destination operands is not supported yet. */ -			if (vpi->DstReg.RelAddr) { -				rc_error(&compiler->Base, "Vertex program does not support relative " -					 "addressing of destination operands (yet).\n"); -				return; -			} -  			/* Neither is Saturate. */  			if (vpi->SaturateMode != RC_SATURATE_NONE) {  				rc_error(&compiler->Base, "Vertex program does not support the Saturate " @@ -668,7 +661,6 @@ static void allocate_temporary_registers(struct radeon_compiler *c, void *user)  	char hwtemps[RC_REGISTER_MAX_INDEX];  	struct temporary_allocation * ta;  	unsigned int i, j; -	struct rc_instruction *last_inst_src_reladdr = NULL;  	memset(hwtemps, 0, sizeof(hwtemps)); @@ -693,28 +685,11 @@ static void allocate_temporary_registers(struct radeon_compiler *c, void *user)  		}  	} -	/* Pass 2: If there is relative addressing of dst temporaries, we cannot change register indices. Give up. -	 * For src temporaries, save the last instruction which uses relative addressing. */ -	for (inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { -		const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); - -		if (opcode->HasDstReg) -			if (inst->U.I.DstReg.RelAddr) -				return; - -		for (i = 0; i < opcode->NumSrcRegs; ++i) { -			if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY && -			    inst->U.I.SrcReg[i].RelAddr) { -				last_inst_src_reladdr = inst; -			} -		} -	} -  	ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool,  			sizeof(struct temporary_allocation) * num_orig_temps);  	memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps); -	/* Pass 3: Determine original temporary lifetimes */ +	/* Pass 2: Determine original temporary lifetimes */  	for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {  		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);  		/* Instructions inside of loops need to use the ENDLOOP @@ -744,41 +719,22 @@ static void allocate_temporary_registers(struct radeon_compiler *c, void *user)  		for (i = 0; i < opcode->NumSrcRegs; ++i) {  			if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { -				struct rc_instruction *last_read; - -				/* From "last_inst_src_reladdr", "end_loop", and "inst", -				 * select the instruction with the highest instruction index (IP). -				 * Note that "end_loop", if available, has always a higher index than "inst". */ -				if (last_inst_src_reladdr) { -					if (end_loop) { -						last_read = last_inst_src_reladdr->IP > end_loop->IP ? -							    last_inst_src_reladdr : end_loop; -					} else { -						last_read = last_inst_src_reladdr->IP > inst->IP ? -							    last_inst_src_reladdr : inst; -					} -				} else { -					last_read = end_loop ? end_loop : inst; -				} - -				ta[inst->U.I.SrcReg[i].Index].LastRead = last_read; +				ta[inst->U.I.SrcReg[i].Index].LastRead = end_loop ? end_loop : inst;  			}  		}  	} -	/* Pass 4: Register allocation */ +	/* Pass 3: Register allocation */  	for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {  		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); -		if (!last_inst_src_reladdr || last_inst_src_reladdr->IP < inst->IP) { -			for (i = 0; i < opcode->NumSrcRegs; ++i) { -				if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { -					unsigned int orig = inst->U.I.SrcReg[i].Index; -					inst->U.I.SrcReg[i].Index = ta[orig].HwTemp; +		for (i = 0; i < opcode->NumSrcRegs; ++i) { +			if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { +				unsigned int orig = inst->U.I.SrcReg[i].Index; +				inst->U.I.SrcReg[i].Index = ta[orig].HwTemp; -					if (ta[orig].Allocated && inst == ta[orig].LastRead) -						hwtemps[ta[orig].HwTemp] = 0; -				} +				if (ta[orig].Allocated && inst == ta[orig].LastRead) +					hwtemps[ta[orig].HwTemp] = 0;  			}  		} @@ -792,12 +748,7 @@ static void allocate_temporary_registers(struct radeon_compiler *c, void *user)  							break;  					}  					ta[orig].Allocated = 1; -					if (last_inst_src_reladdr && -					    last_inst_src_reladdr->IP > inst->IP) { -						ta[orig].HwTemp = orig; -					} else { -						ta[orig].HwTemp = j; -					} +					ta[orig].HwTemp = j;  					hwtemps[ta[orig].HwTemp] = 1;  				} diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c index ef81be48f7..140eeed3de 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c @@ -77,9 +77,6 @@ static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)  		if (opcode == RC_OPCODE_KIL && (reg.Swizzle != RC_SWIZZLE_XYZW || reg.Negate != RC_MASK_NONE))  			return 0; -		if (reg.Negate) -			reg.Negate ^= RC_MASK_XYZW; -  		for(i = 0; i < 4; ++i) {  			unsigned int swz = GET_SWZ(reg.Swizzle, i);  			if (swz == RC_SWIZZLE_UNUSED) { @@ -103,9 +100,6 @@ static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)  		return 0;  	} else {  		/* ALU instructions support almost everything */ -		if (reg.Abs) -			return 1; -  		relevant = 0;  		for(i = 0; i < 3; ++i) {  			unsigned int swz = GET_SWZ(reg.Swizzle, i); diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c index 5da82d90f6..301b444669 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c @@ -170,7 +170,7 @@ static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg)  static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i)  {  	unsigned int t = inst->Alpha.Arg[i].Source; -	t |= fix_hw_swizzle(inst->Alpha.Arg[i].Swizzle) << 2; +	t |= fix_hw_swizzle(GET_SWZ(inst->Alpha.Arg[i].Swizzle, 0)) << 2;  	t |= inst->Alpha.Arg[i].Negate << 5;  	t |= inst->Alpha.Arg[i].Abs << 6;  	return t; @@ -372,7 +372,7 @@ static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_inst  		| (inst->DstReg.WriteMask << 11)  		| R500_INST_TEX_SEM_WAIT;  	code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit) -		| R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; +		| R500_TEX_SEM_ACQUIRE;  	if (inst->TexSrcTarget == RC_TEXTURE_RECT)  		code->inst[ip].inst1 |= R500_TEX_UNSCALED; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h index b69e81698a..35360aa70f 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h @@ -31,6 +31,9 @@  #define R300_PFS_NUM_TEMP_REGS    32  #define R300_PFS_NUM_CONST_REGS   32 +#define R400_PFS_MAX_ALU_INST     512 +#define R400_PFS_MAX_TEX_INST     512 +  #define R500_PFS_MAX_INST         512  #define R500_PFS_NUM_TEMP_REGS    128  #define R500_PFS_NUM_CONST_REGS   256 @@ -170,6 +173,8 @@ struct r300_fragment_program_external_state {  		 * RC_STATE_R300_TEXSCALE_FACTOR. */  		unsigned clamp_and_scale_before_fetch : 1;  	} unit[16]; + +	unsigned frag_clamp:1;  }; @@ -187,24 +192,29 @@ struct r300_fragment_program_node {   */  struct r300_fragment_program_code {  	struct { -		int length; /**< total # of texture instructions used */ -		uint32_t inst[R300_PFS_MAX_TEX_INST]; +		unsigned int length; /**< total # of texture instructions used */ +		uint32_t inst[R400_PFS_MAX_TEX_INST];  	} tex;  	struct { -		int length; /**< total # of ALU instructions used */ +		unsigned int length; /**< total # of ALU instructions used */  		struct {  			uint32_t rgb_inst;  			uint32_t rgb_addr;  			uint32_t alpha_inst;  			uint32_t alpha_addr; -		} inst[R300_PFS_MAX_ALU_INST]; +			uint32_t r400_ext_addr; +		} inst[R400_PFS_MAX_ALU_INST];  	} alu;  	uint32_t config; /* US_CONFIG */  	uint32_t pixsize; /* US_PIXSIZE */  	uint32_t code_offset; /* US_CODE_OFFSET */ +	uint32_t r400_code_offset_ext; /* US_CODE_EXT */  	uint32_t code_addr[4]; /* US_CODE_ADDR */ +	/*US_CODE_BANK.R390_MODE: Enables 512 instructions and 64 temporaries +	 * for r400 cards */ +	unsigned int r390_mode:1;  }; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c index 65548604bc..79cd7996f7 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c @@ -373,9 +373,11 @@ void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s)  		const struct rc_opcode_info * info;  		rc_for_all_reads_mask(tmp, reg_count_callback, &max_reg);  		if (tmp->Type == RC_INSTRUCTION_NORMAL) { +			info = rc_get_opcode_info(tmp->U.I.Opcode); +			if (info->Opcode == RC_OPCODE_BEGIN_TEX) +				continue;  			if (tmp->U.I.PreSub.Opcode != RC_PRESUB_NONE)  				s->num_presub_ops++; -			info = rc_get_opcode_info(tmp->U.I.Opcode);  		} else {  			if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used)  				s->num_presub_ops++; @@ -402,11 +404,11 @@ static void print_stats(struct radeon_compiler * c)  {  	struct rc_program_stats s; -	rc_get_stats(c, &s); - -	if (s.num_insts < 4) +	if (c->initial_num_insts <= 5)  		return; +	rc_get_stats(c, &s); +  	switch (c->type) {  	case RC_VERTEX_PROGRAM:  		fprintf(stderr,"~~~~~~~~~ VERTEX PROGRAM ~~~~~~~~\n" @@ -461,6 +463,11 @@ void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pa  /* Executes a list of compiler passes given in the parameter 'list'. */  void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list)  { +	struct rc_program_stats s; + +	rc_get_stats(c, &s); +	c->initial_num_insts = s.num_insts; +  	if (c->Debug & RC_DBG_LOG) {  		fprintf(stderr, "%s: before compilation\n", shader_name[c->type]);  		rc_print_program(&c->Program); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h index e663339589..2d8e415f35 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h @@ -50,6 +50,7 @@ struct radeon_compiler {  	char * ErrorMsg;  	/* Hardware specification. */ +	unsigned is_r400:1;  	unsigned is_r500:1;  	unsigned has_half_swizzles:1;  	unsigned has_presub:1; @@ -57,6 +58,7 @@ struct radeon_compiler {  	unsigned max_temp_regs;  	unsigned max_constants;  	int max_alu_insts; +	unsigned max_tex_insts;  	/* Whether to remove unused constants and empty holes in constant space. */  	unsigned remove_unused_constants:1; @@ -70,6 +72,8 @@ struct radeon_compiler {  	/*@}*/  	struct emulate_loop_state loop_state; + +	unsigned initial_num_insts; /* Number of instructions at start. */  };  void rc_init(struct radeon_compiler * c); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c index bf393a9fb1..15ec4418cb 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c @@ -55,6 +55,24 @@ rc_swizzle get_swz(unsigned int swz, rc_swizzle idx)  	return GET_SWZ(swz, idx);  } +/** + * The purpose of this function is to standardize the number channels used by + * swizzles.  All swizzles regardless of what instruction they are a part of + * should have 4 channels initialized with values. + * @param channels The number of channels in initial_value that have a + * meaningful value. + * @return An initialized swizzle that has all of the unused channels set to + * RC_SWIZZLE_UNUSED. + */ +unsigned int rc_init_swizzle(unsigned int initial_value, unsigned int channels) +{ +	unsigned int i; +	for (i = channels; i < 4; i++) { +		SET_SWZ(initial_value, i, RC_SWIZZLE_UNUSED); +	} +	return initial_value; +} +  unsigned int combine_swizzles4(unsigned int src,  		rc_swizzle swz_x, rc_swizzle swz_y, rc_swizzle swz_z, rc_swizzle swz_w)  { @@ -147,13 +165,17 @@ unsigned int rc_src_reads_dst_mask(  	return dst_mask & rc_swizzle_to_writemask(src_swz);  } -unsigned int rc_source_type_swz(unsigned int swizzle, unsigned int channels) +/** + * @return A bit mask specifying whether this swizzle will select from an RGB + * source, an Alpha source, or both. + */ +unsigned int rc_source_type_swz(unsigned int swizzle)  {  	unsigned int chan;  	unsigned int swz = RC_SWIZZLE_UNUSED;  	unsigned int ret = RC_SOURCE_NONE; -	for(chan = 0; chan < channels; chan++) { +	for(chan = 0; chan < 4; chan++) {  		swz = GET_SWZ(swizzle, chan);  		if (swz == RC_SWIZZLE_W) {  			ret |= RC_SOURCE_ALPHA; @@ -202,7 +224,7 @@ static void can_use_presub_read_cb(  		if (d->RemoveSrcs[i].File == file  		    && d->RemoveSrcs[i].Index == index) {  			src_type &= -				~rc_source_type_swz(d->RemoveSrcs[i].Swizzle, 4); +				~rc_source_type_swz(d->RemoveSrcs[i].Swizzle);  		}  	} @@ -223,7 +245,6 @@ unsigned int rc_inst_can_use_presub(  {  	struct can_use_presub_data d;  	unsigned int num_presub_srcs; -	unsigned int presub_src_type = rc_source_type_mask(presub_writemask);  	const struct rc_opcode_info * info =  					rc_get_opcode_info(inst->U.I.Opcode); @@ -253,13 +274,7 @@ unsigned int rc_inst_can_use_presub(  	num_presub_srcs = rc_presubtract_src_reg_count(presub_op); -	if ((presub_src_type & RC_SOURCE_RGB) -					&& d.RGBCount + num_presub_srcs > 3) { -		return 0; -	} - -	if ((presub_src_type & RC_SOURCE_ALPHA) -					&& d.AlphaCount + num_presub_srcs > 3) { +	if (d.RGBCount + num_presub_srcs > 3 || d.AlphaCount + num_presub_srcs > 3) {  		return 0;  	} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h index 461ab9ffb1..dd0f6c6615 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h @@ -10,6 +10,8 @@ unsigned int rc_swizzle_to_writemask(unsigned int swz);  rc_swizzle get_swz(unsigned int swz, rc_swizzle idx); +unsigned int rc_init_swizzle(unsigned int initial_value, unsigned int channels); +  unsigned int combine_swizzles4(unsigned int src,  			       rc_swizzle swz_x, rc_swizzle swz_y,  			       rc_swizzle swz_z, rc_swizzle swz_w); @@ -32,7 +34,7 @@ unsigned int rc_src_reads_dst_mask(  		unsigned int dst_idx,  		unsigned int dst_mask); -unsigned int rc_source_type_swz(unsigned int swizzle, unsigned int channels); +unsigned int rc_source_type_swz(unsigned int swizzle);  unsigned int rc_source_type_mask(unsigned int mask); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c index d0a64d936e..c080d5aecc 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c @@ -140,14 +140,8 @@ static void pair_sub_for_all_args(  	for(i = 0; i < info->NumSrcRegs; i++) {  		unsigned int src_type; -		unsigned int channels = 0; -		if (&fullinst->U.P.RGB == sub) -			channels = 3; -		else if (&fullinst->U.P.Alpha == sub) -			channels = 1; -		assert(channels > 0); -		src_type = rc_source_type_swz(sub->Arg[i].Swizzle, channels); +		src_type = rc_source_type_swz(sub->Arg[i].Swizzle);  		if (src_type == RC_SOURCE_NONE)  			continue; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c index 87906f37b1..678e147588 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c @@ -160,12 +160,8 @@ static void update_instruction(struct deadcode_state * s, struct rc_instruction  		unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index);  		if (pused) {  			usedmask = *pused & inst->U.I.DstReg.WriteMask; -			if (!inst->U.I.DstReg.RelAddr) -				*pused &= ~usedmask; +			*pused &= ~usedmask;  		} - -		if (inst->U.I.DstReg.RelAddr) -			mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X);  	}  	insts->WriteMask |= usedmask; @@ -219,22 +215,9 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, void *user)  {  	struct deadcode_state s;  	unsigned int nr_instructions; -	unsigned has_temp_reladdr_src = 0;  	rc_dataflow_mark_outputs_fn dce = (rc_dataflow_mark_outputs_fn)user;  	unsigned int ip; -	/* Give up if there is relative addressing of destination operands. */ -	for(struct rc_instruction * inst = c->Program.Instructions.Next; -	    inst != &c->Program.Instructions; -	    inst = inst->Next) { -		const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); -		if (opcode->HasDstReg && -		    inst->U.I.DstReg.WriteMask && -		    inst->U.I.DstReg.RelAddr) { -			return; -		} -	} -  	memset(&s, 0, sizeof(s));  	s.C = c; @@ -321,32 +304,6 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, void *user)  					rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name);  				}  			} - -			if (!has_temp_reladdr_src) { -				for (unsigned i = 0; i < opcode->NumSrcRegs; i++) { -					if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY && -					    inst->U.I.SrcReg[i].RelAddr) { -						/* If there is a register read from a temporary file with relative addressing, -						 * mark all preceding written registers as used. */ -						for (struct rc_instruction *ptr = inst->Prev; -						     ptr != &c->Program.Instructions; -						     ptr = ptr->Prev) { -							opcode = rc_get_opcode_info(ptr->U.I.Opcode); -							if (opcode->HasDstReg && -							    ptr->U.I.DstReg.File == RC_FILE_TEMPORARY && -							    ptr->U.I.DstReg.WriteMask) { -								mark_used(&s, -									  ptr->U.I.DstReg.File, -									  ptr->U.I.DstReg.Index, -									  ptr->U.I.DstReg.WriteMask); -							} -						} - -						has_temp_reladdr_src = 1; -						break; -					} -				} -			}  		}  		update_instruction(&s, inst); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c index a0f7bd8174..133a9f72ec 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c @@ -56,6 +56,7 @@ static void rewrite_source(struct radeon_compiler * c,  		mov->U.I.DstReg.Index = tempreg;  		mov->U.I.DstReg.WriteMask = split.Phase[phase];  		mov->U.I.SrcReg[0] = inst->U.I.SrcReg[src]; +		mov->U.I.PreSub = inst->U.I.PreSub;  		phase_refmask = 0;  		for(unsigned int chan = 0; chan < 4; ++chan) { diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c index 44f4c0fbdc..c4e6a5e0a1 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c @@ -139,7 +139,6 @@ static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * i  	unsigned int i;  	if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY || -	    inst_mov->U.I.DstReg.RelAddr ||  	    inst_mov->U.I.WriteALUResult ||  	    inst_mov->U.I.SaturateMode)  		return; @@ -312,7 +311,18 @@ static void constant_folding(struct radeon_compiler * c, struct rc_instruction *  		struct rc_constant * constant;  		struct rc_src_register newsrc;  		int have_real_reference; +		unsigned int chan; +		/* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */ +		for (chan = 0; chan < 4; ++chan) +			if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3) +				break; +		if (chan == 4) { +			inst->U.I.SrcReg[src].File = RC_FILE_NONE; +			continue; +		} + +		/* Convert immediates to swizzles. */  		if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT ||  		    inst->U.I.SrcReg[src].RelAddr ||  		    inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count) @@ -326,7 +336,7 @@ static void constant_folding(struct radeon_compiler * c, struct rc_instruction *  		newsrc = inst->U.I.SrcReg[src];  		have_real_reference = 0; -		for(unsigned int chan = 0; chan < 4; ++chan) { +		for (chan = 0; chan < 4; ++chan) {  			unsigned int swz = GET_SWZ(newsrc.Swizzle, chan);  			unsigned int newswz;  			float imm; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c index 9beb5d6357..8e10813ff0 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c @@ -365,8 +365,8 @@ static int merge_presub_sources(  		for(arg = 0; arg < info->NumSrcRegs; arg++) {  			/*If this arg does not read from an rgb source,  			 * do nothing. */ -			if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle, -								3) & type)) { +			if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle) +								& type)) {  				continue;  			} @@ -423,11 +423,11 @@ static int destructive_merge_instructions(  		unsigned int index = 0;  		int source; -		if (alpha->Alpha.Arg[arg].Swizzle < 3) { +		if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 3) {  			srcrgb = 1;  			file = alpha->RGB.Src[oldsrc].File;  			index = alpha->RGB.Src[oldsrc].Index; -		} else if (alpha->Alpha.Arg[arg].Swizzle < 4) { +		} else if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 4) {  			srcalpha = 1;  			file = alpha->Alpha.Src[oldsrc].File;  			index = alpha->Alpha.Src[oldsrc].Index; @@ -544,18 +544,12 @@ static void rgb_to_alpha_remap (  {  	int new_src_index;  	unsigned int i; -	struct rc_pair_instruction_source * old_src = -					rc_pair_get_src(&inst->U.P, arg); -	if (!old_src) { -		return; -	}  	for (i = 0; i < 3; i++) {  		if (get_swz(arg->Swizzle, i) == old_swz) {  			SET_SWZ(arg->Swizzle, i, RC_SWIZZLE_W);  		}  	} -	memset(old_src, 0, sizeof(struct rc_pair_instruction_source));  	new_src_index = rc_pair_alloc_source(&inst->U.P, 0, 1,  							old_file, new_index);  	/* This conversion is not possible, we must have made a mistake in @@ -728,7 +722,8 @@ static int convert_rgb_to_alpha(  		for (j = 0; j < 3; j++) {  			unsigned int swz = get_swz(pair_inst->Alpha.Arg[i].Swizzle, j);  			if (swz != RC_SWIZZLE_UNUSED) { -				pair_inst->Alpha.Arg[i].Swizzle = swz; +				pair_inst->Alpha.Arg[i].Swizzle = +							rc_init_swizzle(swz, 1);  				break;  			}  		} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c index fc05366f50..9e03eb1aca 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c @@ -28,6 +28,7 @@  #include "radeon_program_pair.h"  #include "radeon_compiler.h" +#include "radeon_compiler_util.h"  /** @@ -213,16 +214,21 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c,  		if (needrgb && !istranscendent) {  			unsigned int srcrgb = 0;  			unsigned int srcalpha = 0; +			unsigned int srcmask = 0;  			int j;  			/* We don't care about the alpha channel here.  We only  			 * want the part of the swizzle that writes to rgb,  			 * since we are creating an rgb instruction. */  			for(j = 0; j < 3; ++j) {  				unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); -				if (swz < 3) + +				if (swz < RC_SWIZZLE_W)  					srcrgb = 1; -				else if (swz < 4) +				else if (swz == RC_SWIZZLE_W)  					srcalpha = 1; + +				if (swz < RC_SWIZZLE_UNUSED) +					srcmask |= 1 << j;  			}  			source = rc_pair_alloc_source(pair, srcrgb, srcalpha,  							inst->SrcReg[i].File, inst->SrcReg[i].Index); @@ -232,9 +238,10 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c,  				return;  			}  			pair->RGB.Arg[i].Source = source; -			pair->RGB.Arg[i].Swizzle = inst->SrcReg[i].Swizzle & 0x1ff; +			pair->RGB.Arg[i].Swizzle = +				rc_init_swizzle(inst->SrcReg[i].Swizzle, 3);  			pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs; -			pair->RGB.Arg[i].Negate = !!(inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z)); +			pair->RGB.Arg[i].Negate = !!(srcmask & inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z));  		}  		if (needalpha) {  			unsigned int srcrgb = 0; @@ -252,7 +259,7 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c,  				return;  			}  			pair->Alpha.Arg[i].Source = source; -			pair->Alpha.Arg[i].Swizzle = swz; +			pair->Alpha.Arg[i].Swizzle = rc_init_swizzle(swz, 1);  			pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;  			pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & RC_MASK_W);  		} @@ -302,12 +309,6 @@ static void check_opcode_support(struct r300_fragment_program_compiler *c,  	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);  	if (opcode->HasDstReg) { -		if (inst->DstReg.RelAddr) { -			rc_error(&c->Base, "Fragment program does not support relative addressing " -				 "of destination operands.\n"); -			return; -		} -  		if (inst->SaturateMode == RC_SATURATE_MINUS_PLUS_ONE) {  			rc_error(&c->Base, "Fragment program does not support signed Saturate.\n");  			return; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h index df6c94b35f..a07f6b63c6 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h @@ -56,11 +56,7 @@ struct rc_src_register {  struct rc_dst_register {  	unsigned int File:3; - -	/** Negative values may be used for relative addressing. */ -	signed int Index:(RC_REGISTER_INDEX_BITS+1); -	unsigned int RelAddr:1; - +	unsigned int Index:RC_REGISTER_INDEX_BITS;  	unsigned int WriteMask:4;  }; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c index 58977a40c7..9fc991166a 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c @@ -91,7 +91,6 @@ static struct rc_dst_register dstregtmpmask(int index, int mask)  	dst.File = RC_FILE_TEMPORARY;  	dst.Index = index;  	dst.WriteMask = mask; -	dst.RelAddr = 0;  	return dst;  } @@ -689,11 +688,12 @@ static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c,  							 &constant_swizzle);  	/* MOV dst, src */ +	dst.WriteMask = RC_MASK_XYZW;  	emit1(c, inst->Prev, RC_OPCODE_MOV, 0,  		dst,  		inst->U.I.SrcReg[0]); -	/* MAX dst.z, src, 0.00...001 */ +	/* MAX dst.y, src, 0.00...001 */  	emit2(c, inst->Prev, RC_OPCODE_MAX, 0,  		dstregtmpmask(dst.Index, RC_MASK_Y),  		srcreg(RC_FILE_TEMPORARY, dst.Index), diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c index 5905d26e52..68874795b8 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c @@ -211,27 +211,9 @@ struct rc_pair_instruction_source * rc_pair_get_src(  	struct rc_pair_instruction * pair_inst,  	struct rc_pair_instruction_arg * arg)  { -	unsigned int i, type; -	unsigned int channels = 0; +	unsigned int type; -	for(i = 0; i < 3; i++) { -		if (arg == pair_inst->RGB.Arg + i) { -			channels = 3; -			break; -		} -	} - -	if (channels == 0) { -		for (i = 0; i < 3; i++) { -			if (arg == pair_inst->Alpha.Arg + i) { -				channels = 1; -				break; -			} -		} -	} - -	assert(channels > 0); -	type = rc_source_type_swz(arg->Swizzle, channels); +	type = rc_source_type_swz(arg->Swizzle);  	if (type & RC_SOURCE_RGB) {  		return &pair_inst->RGB.Src[arg->Source]; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h index ccf7a0070c..6708b16d29 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h @@ -63,7 +63,7 @@ struct rc_pair_instruction_source {  struct rc_pair_instruction_arg {  	unsigned int Source:2; -	unsigned int Swizzle:9; +	unsigned int Swizzle:12;  	unsigned int Abs:1;  	unsigned int Negate:1;  }; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c index ae13f6742f..390d131946 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c @@ -110,7 +110,7 @@ static void rc_print_mask(FILE * f, unsigned int mask)  static void rc_print_dst_register(FILE * f, struct rc_dst_register dst)  { -	rc_print_register(f, dst.File, dst.Index, dst.RelAddr); +	rc_print_register(f, dst.File, dst.Index, 0);  	if (dst.WriteMask != RC_MASK_XYZW) {  		fprintf(f, ".");  		rc_print_mask(f, dst.WriteMask); @@ -379,7 +379,7 @@ static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst  			else  				fprintf(f,"%d", inst->Alpha.Arg[arg].Source);  			fprintf(f,".%c%s", -				rc_swizzle_char(inst->Alpha.Arg[arg].Swizzle), abs); +				rc_swizzle_char(GET_SWZ(inst->Alpha.Arg[arg].Swizzle, 0)), abs);  		}  		fprintf(f, "\n");  	} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c index f9d9f34b6a..1cf77d9cf7 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c @@ -33,14 +33,14 @@  /* Series of transformations to be done on textures. */  static struct rc_src_register shadow_ambient(struct r300_fragment_program_compiler *compiler, -											 int tmu) +					     int tmu)  {  	struct rc_src_register reg = { 0, };  	if (compiler->enable_shadow_ambient) {  		reg.File = RC_FILE_CONSTANT;  		reg.Index = rc_constants_add_state(&compiler->Base.Program.Constants, -										   RC_STATE_SHADOW_AMBIENT, tmu); +						   RC_STATE_SHADOW_AMBIENT, tmu);  		reg.Swizzle = RC_SWIZZLE_WWWW;  	} else {  		reg.File = RC_FILE_NONE; @@ -149,14 +149,11 @@ int radeonTransformTEX(  			return 1;  		} else { -			rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func;  			struct rc_instruction * inst_rcp = NULL; -			struct rc_instruction * inst_mad; -			struct rc_instruction * inst_cmp; +			struct rc_instruction *inst_mul, *inst_add, *inst_cmp;  			unsigned tmp_texsample;  			unsigned tmp_sum; -			unsigned tmp_recip_w = 0; -			int pass, fail, tex; +			int pass, fail;  			/* Save the output register. */  			struct rc_dst_register output_reg = inst->U.I.DstReg; @@ -167,63 +164,68 @@ int radeonTransformTEX(  			inst->U.I.DstReg.Index = tmp_texsample;  			inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; -			if (inst->U.I.Opcode == RC_OPCODE_TXP) { -				tmp_recip_w = rc_find_free_temporary(c); +			tmp_sum = rc_find_free_temporary(c); +			if (inst->U.I.Opcode == RC_OPCODE_TXP) {  				/* Compute 1/W. */  				inst_rcp = rc_insert_new_instruction(c, inst);  				inst_rcp->U.I.Opcode = RC_OPCODE_RCP;  				inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; -				inst_rcp->U.I.DstReg.Index = tmp_recip_w; +				inst_rcp->U.I.DstReg.Index = tmp_sum;  				inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;  				inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0];  				inst_rcp->U.I.SrcReg[0].Swizzle =  					RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3));  			} -			/* Perspective-divide Z by W (if it's TXP) and add the texture sample (see below). */ -			tmp_sum = rc_find_free_temporary(c); -			inst_mad = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst); -			inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; -			inst_mad->U.I.DstReg.Index = tmp_sum; -			inst_mad->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; -			inst_mad->U.I.SrcReg[0].Swizzle = +			/* Divide Z by W (if it's TXP) and saturate. */ +			inst_mul = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst); +			inst_mul->U.I.Opcode = inst->U.I.Opcode == RC_OPCODE_TXP ? RC_OPCODE_MUL : RC_OPCODE_MOV; +			inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; +			inst_mul->U.I.DstReg.Index = tmp_sum; +			inst_mul->U.I.DstReg.WriteMask = RC_MASK_W; +			inst_mul->U.I.SaturateMode = RC_SATURATE_ZERO_ONE; +			inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; +			inst_mul->U.I.SrcReg[0].Swizzle =  				RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 2));  			if (inst->U.I.Opcode == RC_OPCODE_TXP) { -				inst_mad->U.I.Opcode = RC_OPCODE_MAD; -				inst_mad->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; -				inst_mad->U.I.SrcReg[1].Index = tmp_recip_w; -				inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; -				tex = 2; -			} else { -				inst_mad->U.I.Opcode = RC_OPCODE_ADD; -				tex = 1; +				inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; +				inst_mul->U.I.SrcReg[1].Index = tmp_sum; +				inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;  			} -			inst_mad->U.I.SrcReg[tex].File = RC_FILE_TEMPORARY; -			inst_mad->U.I.SrcReg[tex].Index = tmp_texsample; -			inst_mad->U.I.SrcReg[tex].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].depth_texture_swizzle; -			/* Fake EQUAL/NOTEQUAL, it seems to pass some tests suprisingly. */ -			if (comparefunc == RC_COMPARE_FUNC_EQUAL) { -				comparefunc = RC_COMPARE_FUNC_GEQUAL; -			} else if (comparefunc == RC_COMPARE_FUNC_NOTEQUAL) { -				comparefunc = RC_COMPARE_FUNC_LESS; -			} +			/* Add the depth texture value. */ +			inst_add = rc_insert_new_instruction(c, inst_mul); +			inst_add->U.I.Opcode = RC_OPCODE_ADD; +			inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY; +			inst_add->U.I.DstReg.Index = tmp_sum; +			inst_add->U.I.DstReg.WriteMask = RC_MASK_W; +			inst_add->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; +			inst_add->U.I.SrcReg[0].Index = tmp_sum; +			inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; +			inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; +			inst_add->U.I.SrcReg[1].Index = tmp_texsample; +			inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX; -			/* Recall that SrcReg[0] is r, SrcReg[tex] is tex and: +			/* Note that SrcReg[0] is r, SrcReg[1] is tex and:  			 *   LESS:    r  < tex  <=>      -tex+r < 0  			 *   GEQUAL:  r >= tex  <=> not (-tex+r < 0)  			 *   GREATER: r  > tex  <=>       tex-r < 0  			 *   LEQUAL:  r <= tex  <=> not ( tex-r < 0) -			 * -			 * This negates either r or tex: */ -			if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL) -				inst_mad->U.I.SrcReg[tex].Negate = inst_mad->U.I.SrcReg[tex].Negate ^ RC_MASK_XYZW; +			 *   EQUAL:   GEQUAL +			 *   NOTEQUAL:LESS +			 */ + +			/* This negates either r or tex: */ +			if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL || +			    comparefunc == RC_COMPARE_FUNC_EQUAL || comparefunc == RC_COMPARE_FUNC_NOTEQUAL) +				inst_add->U.I.SrcReg[1].Negate = inst_add->U.I.SrcReg[1].Negate ^ RC_MASK_XYZW;  			else -				inst_mad->U.I.SrcReg[0].Negate = inst_mad->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW; +				inst_add->U.I.SrcReg[0].Negate = inst_add->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW;  			/* This negates the whole expresion: */ -			if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER) { +			if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER || +			    comparefunc == RC_COMPARE_FUNC_NOTEQUAL) {  				pass = 1;  				fail = 2;  			} else { @@ -231,16 +233,19 @@ int radeonTransformTEX(  				fail = 1;  			} -			inst_cmp = rc_insert_new_instruction(c, inst_mad); +			inst_cmp = rc_insert_new_instruction(c, inst_add);  			inst_cmp->U.I.Opcode = RC_OPCODE_CMP;  			inst_cmp->U.I.DstReg = output_reg;  			inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;  			inst_cmp->U.I.SrcReg[0].Index = tmp_sum; +			inst_cmp->U.I.SrcReg[0].Swizzle = +					combine_swizzles(RC_SWIZZLE_WWWW, +							 compiler->state.unit[inst->U.I.TexSrcUnit].depth_texture_swizzle);  			inst_cmp->U.I.SrcReg[pass].File = RC_FILE_NONE;  			inst_cmp->U.I.SrcReg[pass].Swizzle = RC_SWIZZLE_1111;  			inst_cmp->U.I.SrcReg[fail] = shadow_ambient(compiler, inst->U.I.TexSrcUnit); -			assert(tmp_texsample != tmp_sum && tmp_sum != tmp_recip_w); +			assert(tmp_texsample != tmp_sum);  		}  	} @@ -420,17 +425,21 @@ int radeonTransformTEX(  		scale_texcoords(compiler, inst, RC_STATE_R300_TEXSCALE_FACTOR);  	} -	/* Cannot write texture to output registers (all chips) or with masks (non-r500) */ +	/* Cannot write texture to output registers or with saturate (all chips), +	 * or with masks (non-r500). */  	if (inst->U.I.Opcode != RC_OPCODE_KIL &&  		(inst->U.I.DstReg.File != RC_FILE_TEMPORARY || +		 inst->U.I.SaturateMode ||  		 (!c->is_r500 && inst->U.I.DstReg.WriteMask != RC_MASK_XYZW))) {  		struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst);  		inst_mov->U.I.Opcode = RC_OPCODE_MOV; +		inst_mov->U.I.SaturateMode = inst->U.I.SaturateMode;  		inst_mov->U.I.DstReg = inst->U.I.DstReg;  		inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;  		inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c); +		inst->U.I.SaturateMode = 0;  		inst->U.I.DstReg.File = RC_FILE_TEMPORARY;  		inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index;  		inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c index 88165f7895..5bd19c0b9c 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c @@ -51,6 +51,14 @@ void rc_rename_regs(struct radeon_compiler *c, void *user)  	struct rc_reader_data reader_data;  	unsigned char * used; +	/* XXX Remove this once the register allocation works with flow control. */ +	for(inst = c->Program.Instructions.Next; +					inst != &c->Program.Instructions; +					inst = inst->Next) { +		if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) +			return; +	} +  	used_length = 2 * rc_recompute_ips(c);  	used = memory_pool_malloc(&c->Pool, sizeof(unsigned char) * used_length);  	memset(used, 0, sizeof(unsigned char) * used_length); | 
