diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/mesa/drivers/dri/r300/r300_reg.h | 6 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/r300/r500_fragprog.c | 9 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/r300/r500_fragprog_emit.c | 1218 | 
3 files changed, 458 insertions, 775 deletions
diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index 58a19554c7..cd232c5b7b 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -2705,6 +2705,7 @@ enum {  #   define R500_ALPHA_OP_MDV				15  #   define R500_ALPHA_ADDRD(x)				(x << 4)  #   define R500_ALPHA_ADDRD_REL				(1 << 11) +#  define R500_ALPHA_SEL_A_SHIFT			12  #   define R500_ALPHA_SEL_A_SRC0			(0 << 12)  #   define R500_ALPHA_SEL_A_SRC1			(1 << 12)  #   define R500_ALPHA_SEL_A_SRC2			(2 << 12) @@ -2721,6 +2722,7 @@ enum {  #   define R500_ALPHA_MOD_A_NEG				(1 << 17)  #   define R500_ALPHA_MOD_A_ABS				(2 << 17)  #   define R500_ALPHA_MOD_A_NAB				(3 << 17) +#  define R500_ALPHA_SEL_B_SHIFT			19  #   define R500_ALPHA_SEL_B_SRC0			(0 << 19)  #   define R500_ALPHA_SEL_B_SRC1			(1 << 19)  #   define R500_ALPHA_SEL_B_SRC2			(2 << 19) @@ -2777,6 +2779,7 @@ enum {  #   define R500_ALU_RGBA_OP_MDV				(12 << 0)  #   define R500_ALU_RGBA_ADDRD(x)			(x << 4)  #   define R500_ALU_RGBA_ADDRD_REL			(1 << 11) +#  define R500_ALU_RGBA_SEL_C_SHIFT			12  #   define R500_ALU_RGBA_SEL_C_SRC0			(0 << 12)  #   define R500_ALU_RGBA_SEL_C_SRC1			(1 << 12)  #   define R500_ALU_RGBA_SEL_C_SRC2			(2 << 12) @@ -2809,6 +2812,7 @@ enum {  #   define R500_ALU_RGBA_MOD_C_NEG			(1 << 23)  #   define R500_ALU_RGBA_MOD_C_ABS			(2 << 23)  #   define R500_ALU_RGBA_MOD_C_NAB			(3 << 23) +#  define R500_ALU_RGBA_ALPHA_SEL_C_SHIFT		25  #   define R500_ALU_RGBA_ALPHA_SEL_C_SRC0		(0 << 25)  #   define R500_ALU_RGBA_ALPHA_SEL_C_SRC1		(1 << 25)  #   define R500_ALU_RGBA_ALPHA_SEL_C_SRC2		(2 << 25) @@ -2826,6 +2830,7 @@ enum {  #   define R500_ALU_RGBA_ALPHA_MOD_C_ABS		(2 << 30)  #   define R500_ALU_RGBA_ALPHA_MOD_C_NAB		(3 << 30)  #define R500_US_ALU_RGB_INST_0				0xa000 +#  define R500_ALU_RGB_SEL_A_SHIFT			0  #   define R500_ALU_RGB_SEL_A_SRC0			(0 << 0)  #   define R500_ALU_RGB_SEL_A_SRC1			(1 << 0)  #   define R500_ALU_RGB_SEL_A_SRC2			(2 << 0) @@ -2858,6 +2863,7 @@ enum {  #   define R500_ALU_RGB_MOD_A_NEG			(1 << 11)  #   define R500_ALU_RGB_MOD_A_ABS			(2 << 11)  #   define R500_ALU_RGB_MOD_A_NAB			(3 << 11) +#  define R500_ALU_RGB_SEL_B_SHIFT			13  #   define R500_ALU_RGB_SEL_B_SRC0			(0 << 13)  #   define R500_ALU_RGB_SEL_B_SRC1			(1 << 13)  #   define R500_ALU_RGB_SEL_B_SRC2			(2 << 13) diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 62e06ea52c..b46e924ac7 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -27,6 +27,8 @@  #include "r500_fragprog.h" +#include "radeon_program_alu.h" +  /**   * Transform TEX, TXP, TXB, and KIL instructions in the following way: @@ -316,11 +318,12 @@ void r500TranslateFragmentShader(r300ContextPtr r300,  		insert_WPOS_trailer(&compiler); -		struct radeon_program_transformation transformations[1] = { -			{ &transform_TEX, &compiler } +		struct radeon_program_transformation transformations[2] = { +			{ &transform_TEX, &compiler }, +			{ &radeonTransformALU, 0 }  		};  		radeonLocalTransform(r300->radeon.glCtx, compiler.program, -			1, transformations); +			2, transformations);  		if (RADEON_DEBUG & DEBUG_PIXEL) {  			_mesa_printf("Compiler: after all transformations:\n"); diff --git a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/r500_fragprog_emit.c index 67545cbb4f..0e95c81e48 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog_emit.c @@ -168,6 +168,12 @@ static const GLfloat LIT[] = {127.999999,  	127.999999,  	-127.999999}; +static const struct prog_dst_register dstreg_template = { +	.File = PROGRAM_TEMPORARY, +	.Index = 0, +	.WriteMask = WRITEMASK_XYZW +}; +  static INLINE GLuint make_rgb_swizzle(struct prog_src_register src) {  	GLuint swiz = 0x0;  	GLuint temp; @@ -179,8 +185,14 @@ static INLINE GLuint make_rgb_swizzle(struct prog_src_register src) {  		if (temp == 5) temp++;  		swiz |= temp << i*3;  	} -	if (src.NegateBase) -		swiz |= (R500_SWIZ_MOD_NEG << 9); +	if (src.Abs) { +		swiz |= R500_SWIZ_MOD_ABS << 9; +	} else if (src.NegateBase & 7) { +		ASSERT((src.NegateBase & 7) == 7); +		swiz |= R500_SWIZ_MOD_NEG << 9; +	} +	if (src.NegateAbs) +		swiz ^= R500_SWIZ_MOD_NEG << 9;  	return swiz;  } @@ -202,8 +214,13 @@ static INLINE GLuint make_alpha_swizzle(struct prog_src_register src) {  	if (swiz == 5) swiz++; -	if (src.NegateBase) -		swiz |= (R500_SWIZ_MOD_NEG << 3); +	if (src.Abs) { +		swiz |= R500_SWIZ_MOD_ABS << 3; +	} else if (src.NegateBase & 8) { +		swiz |= R500_SWIZ_MOD_NEG << 3; +	} +	if (src.NegateAbs) +		swiz ^= R500_SWIZ_MOD_NEG << 3;  	return swiz;  } @@ -212,6 +229,15 @@ static INLINE GLuint make_sop_swizzle(struct prog_src_register src) {  	GLuint swiz = GET_SWZ(src.Swizzle, 0);  	if (swiz == 5) swiz++; + +	if (src.Abs) { +		swiz |= R500_SWIZ_MOD_ABS << 3; +	} else if (src.NegateBase & 1) { +		swiz |= R500_SWIZ_MOD_NEG << 3; +	} +	if (src.NegateAbs) +		swiz ^= R500_SWIZ_MOD_NEG << 3; +  	return swiz;  } @@ -324,12 +350,23 @@ static GLuint make_dest(struct r500_pfs_compile_state *cs, struct prog_dst_regis  	return reg;  } -static void emit_tex(struct r500_pfs_compile_state *cs, -		     struct prog_instruction *fpi, int dest, int counter) +static int emit_slot(struct r500_pfs_compile_state *cs) +{ +	if (cs->nrslots >= 512) { +		ERROR("Too many instructions"); +		cs->nrslots = 1; +		return 0; +	} +	return cs->nrslots++; +} + +static int emit_tex(struct r500_pfs_compile_state *cs, +		     struct prog_instruction *fpi, int dest)  {  	PROG_CODE;  	int hwsrc, hwdest;  	GLuint mask; +	int counter = emit_slot(cs);  	mask = fpi->DstReg.WriteMask << 11;  	hwsrc = make_src(cs, fpi->SrcReg[0]); @@ -399,844 +436,490 @@ static void emit_tex(struct r500_pfs_compile_state *cs,  			| MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)  			| MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);  	} + +	return counter;  } -static void emit_alu(struct r500_pfs_compile_state *cs, int counter, struct prog_instruction *fpi) { +/* Do not call directly */ +static int _helper_emit_alu(struct r500_pfs_compile_state *cs, GLuint rgbop, GLuint alphaop, +	int File, int Index, int WriteMask) +{  	PROG_CODE; -	/* Ideally, we shouldn't have to explicitly clear memory here! */ -	code->inst[counter].inst0 = 0x0; -	code->inst[counter].inst1 = 0x0; -	code->inst[counter].inst2 = 0x0; -	code->inst[counter].inst3 = 0x0; -	code->inst[counter].inst4 = 0x0; -	code->inst[counter].inst5 = 0x0; +	int counter = emit_slot(cs); -	if (fpi->DstReg.File == PROGRAM_OUTPUT) { -		code->inst[counter].inst0 = R500_INST_TYPE_OUT; +	code->inst[counter].inst4 = alphaop; +	code->inst[counter].inst5 = rgbop; -		if (fpi->DstReg.Index == FRAG_RESULT_COLR) -			code->inst[counter].inst0 |= (fpi->DstReg.WriteMask << 15); +	if (File == PROGRAM_OUTPUT) { +		code->inst[counter].inst0 = R500_INST_TYPE_OUT; -		if (fpi->DstReg.Index == FRAG_RESULT_DEPR) { +		if (Index == FRAG_RESULT_COLR) { +			code->inst[counter].inst0 |= WriteMask << 15; +		} else if (Index == FRAG_RESULT_DEPR) {  			code->inst[counter].inst4 |= R500_ALPHA_W_OMASK; -			/* Notify the state emission! */  			cs->compiler->fp->writes_depth = GL_TRUE;  		}  	} else { +		int dest = Index + code->temp_reg_offset; +  		code->inst[counter].inst0 = R500_INST_TYPE_ALU -			/* pixel_mask */ -			| (fpi->DstReg.WriteMask << 11); +			| (WriteMask << 11); +		code->inst[counter].inst4 |= R500_ALPHA_ADDRD(dest); +		code->inst[counter].inst5 |= R500_ALU_RGBA_ADDRD(dest);  	}  	code->inst[counter].inst0 |= R500_INST_TEX_SEM_WAIT; + +	return counter; +} + +/** + * Prepare an ALU slot with the given RGB operation, ALPHA operation, and + * destination register. + */ +static int emit_alu(struct r500_pfs_compile_state *cs, GLuint rgbop, GLuint alphaop, struct prog_dst_register dst) +{ +	return _helper_emit_alu(cs, rgbop, alphaop, dst.File, dst.Index, dst.WriteMask);  } -static void emit_mov(struct r500_pfs_compile_state *cs, int counter, struct prog_instruction *fpi, GLuint src_reg, GLuint swizzle, GLuint dest) { +static int emit_alu_temp(struct r500_pfs_compile_state *cs, GLuint rgbop, GLuint alphaop, int dst, int writemask) +{ +	return _helper_emit_alu(cs, rgbop, alphaop, +		PROGRAM_TEMPORARY, dst - cs->compiler->code->temp_reg_offset, writemask); +} + +/** + * Set an instruction's source 0 (both RGB and ALPHA) to the given hardware index. + */ +static void set_src0_direct(struct r500_pfs_compile_state *cs, int ip, GLuint src) +{  	PROG_CODE; -	/* The r3xx shader uses MAD to implement MOV. We are using CMP, since -	 * it is technically more accurate and recommended by ATI/AMD. */ -	emit_alu(cs, counter, fpi); -	code->inst[counter].inst1 = R500_RGB_ADDR0(src_reg); -	code->inst[counter].inst2 = R500_ALPHA_ADDR0(src_reg); -	/* (De)mangle the swizzle from Mesa to R500. */ -	swizzle = make_rgba_swizzle(swizzle); -	/* 0x1FF is 9 bits, size of an RGB swizzle. */ -	code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 -		| MAKE_SWIZ_RGB_A((swizzle & 0x1ff)) -		| R500_ALU_RGB_SEL_B_SRC0 -		| MAKE_SWIZ_RGB_B((swizzle & 0x1ff)) -		| R500_ALU_RGB_OMOD_DISABLE; -	code->inst[counter].inst4 |= R500_ALPHA_OP_CMP -		| R500_ALPHA_ADDRD(dest) -		| R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(GET_SWZ(swizzle, 3)) -		| R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(GET_SWZ(swizzle, 3)) -		| R500_ALPHA_OMOD_DISABLE; -	code->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP -		| R500_ALU_RGBA_ADDRD(dest) -		| MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) -		| MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); +	code->inst[ip].inst1 |= R500_RGB_ADDR0(src); +	code->inst[ip].inst2 |= R500_ALPHA_ADDR0(src);  } -static void emit_mad(struct r500_pfs_compile_state *cs, int counter, struct prog_instruction *fpi, int one, int two, int three) { +/** + * Set an instruction's source 1 (both RGB and ALPHA) to the given hardware index. + */ +static void set_src1_direct(struct r500_pfs_compile_state *cs, int ip, GLuint src) +{  	PROG_CODE; -	/* Note: This code was all Corbin's. Corbin is a rather hackish coder. -	 * If you can make it pretty or fast, please do so! */ -	emit_alu(cs, counter, fpi); -	/* Common MAD stuff */ -	code->inst[counter].inst4 |= R500_ALPHA_OP_MAD -		| R500_ALPHA_ADDRD(make_dest(cs, fpi->DstReg)); -	code->inst[counter].inst5 |= R500_ALU_RGBA_OP_MAD -		| R500_ALU_RGBA_ADDRD(make_dest(cs, fpi->DstReg)); -	switch (one) { -		case 0: -		case 1: -		case 2: -			code->inst[counter].inst1 |= R500_RGB_ADDR0(make_src(cs, fpi->SrcReg[one])); -			code->inst[counter].inst2 |= R500_ALPHA_ADDR0(make_src(cs, fpi->SrcReg[one])); -			code->inst[counter].inst3 |= R500_ALU_RGB_SEL_A_SRC0 -				| MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[one])); -			code->inst[counter].inst4 |= R500_ALPHA_SEL_A_SRC0 -				| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[one])); -			break; -		case R500_SWIZZLE_ZERO: -			code->inst[counter].inst3 |= MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO); -			code->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO); -			break; -		case R500_SWIZZLE_ONE: -			code->inst[counter].inst3 |= MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE); -			code->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE); -			break; -		default: -			ERROR("Bad src index in emit_mad: %d\n", one); -			break; -	} -	switch (two) { -		case 0: -		case 1: -		case 2: -			code->inst[counter].inst1 |= R500_RGB_ADDR1(make_src(cs, fpi->SrcReg[two])); -			code->inst[counter].inst2 |= R500_ALPHA_ADDR1(make_src(cs, fpi->SrcReg[two])); -			code->inst[counter].inst3 |= R500_ALU_RGB_SEL_B_SRC1 -				| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[two])); -			code->inst[counter].inst4 |= R500_ALPHA_SEL_B_SRC1 -				| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[two])); -			break; -		case R500_SWIZZLE_ZERO: -			code->inst[counter].inst3 |= MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO); -			code->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO); -			break; -		case R500_SWIZZLE_ONE: -			code->inst[counter].inst3 |= MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE); -			code->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE); -			break; -		default: -			ERROR("Bad src index in emit_mad: %d\n", two); -			break; -	} -	switch (three) { -		case 0: -		case 1: -		case 2: -			code->inst[counter].inst1 |= R500_RGB_ADDR2(make_src(cs, fpi->SrcReg[three])); -			code->inst[counter].inst2 |= R500_ALPHA_ADDR2(make_src(cs, fpi->SrcReg[three])); -			code->inst[counter].inst5 |= R500_ALU_RGBA_SEL_C_SRC2 -				| MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[three])) -				| R500_ALU_RGBA_ALPHA_SEL_C_SRC2 -				| MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[three])); -			break; -		case R500_SWIZZLE_ZERO: -			code->inst[counter].inst5 |= MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) -				| MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); -			break; -		case R500_SWIZZLE_ONE: -			code->inst[counter].inst5 |= MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ONE) -			| MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ONE); -			break; -		default: -			ERROR("Bad src index in emit_mad: %d\n", three); -			break; -	} +	code->inst[ip].inst1 |= R500_RGB_ADDR1(src); +	code->inst[ip].inst2 |= R500_ALPHA_ADDR1(src);  } -static void emit_sop(struct r500_pfs_compile_state *cs, int counter, struct prog_instruction *fpi, int opcode, GLuint src, GLuint swiz, GLuint dest) { +/** + * Set an instruction's source 2 (both RGB and ALPHA) to the given hardware index. + */ +static void set_src2_direct(struct r500_pfs_compile_state *cs, int ip, GLuint src) +{  	PROG_CODE; -	emit_alu(cs, counter, fpi); -	code->inst[counter].inst1 = R500_RGB_ADDR0(src); -	code->inst[counter].inst2 = R500_ALPHA_ADDR0(src); -	code->inst[counter].inst4 |= R500_ALPHA_ADDRD(dest) -		| R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(swiz); -	code->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP -		| R500_ALU_RGBA_ADDRD(dest); -	switch (opcode) { -		case OPCODE_COS: -			code->inst[counter].inst4 |= R500_ALPHA_OP_COS; -			break; -		case OPCODE_EX2: -			code->inst[counter].inst4 |= R500_ALPHA_OP_EX2; -			break; -		case OPCODE_LG2: -			code->inst[counter].inst4 |= R500_ALPHA_OP_LN2; -			break; -		case OPCODE_RCP: -			code->inst[counter].inst4 |= R500_ALPHA_OP_RCP; -			break; -		case OPCODE_RSQ: -			code->inst[counter].inst4 |= R500_ALPHA_OP_RSQ; -			break; -		case OPCODE_SIN: -			code->inst[counter].inst4 |= R500_ALPHA_OP_SIN; -			break; -		default: -			ERROR("Bad opcode in emit_sop: %d\n", opcode); -			break; +	code->inst[ip].inst1 |= R500_RGB_ADDR2(src); +	code->inst[ip].inst2 |= R500_ALPHA_ADDR2(src); +} + +/** + * Set an instruction's source 0 (both RGB and ALPHA) according to the given source register. + */ +static void set_src0(struct r500_pfs_compile_state *cs, int ip, struct prog_src_register srcreg) +{ +	set_src0_direct(cs, ip, make_src(cs, srcreg)); +} + +/** + * Set an instruction's source 1 (both RGB and ALPHA) according to the given source register. + */ +static void set_src1(struct r500_pfs_compile_state *cs, int ip, struct prog_src_register srcreg) +{ +	set_src1_direct(cs, ip, make_src(cs, srcreg)); +} + +/** + * Set an instruction's source 2 (both RGB and ALPHA) according to the given source register. + */ +static void set_src2(struct r500_pfs_compile_state *cs, int ip, struct prog_src_register srcreg) +{ +	set_src2_direct(cs, ip, make_src(cs, srcreg)); +} + +/** + * Set an instruction's argument A (both RGB and ALPHA) from the given source, + * taking swizzles+neg+abs as specified (see also _reg version below). + */ +static void set_argA(struct r500_pfs_compile_state *cs, int ip, int source, GLuint swizRGB, GLuint swizA) +{ +	PROG_CODE; +	code->inst[ip].inst3 |= (source << R500_ALU_RGB_SEL_A_SHIFT) | MAKE_SWIZ_RGB_A(swizRGB); +	code->inst[ip].inst4 |= (source << R500_ALPHA_SEL_A_SHIFT) | MAKE_SWIZ_ALPHA_A(swizA); +} + +/** + * Set an instruction's argument B (both RGB and ALPHA) from the given source, + * taking swizzles+neg+abs as specified (see also _reg version below). + */ +static void set_argB(struct r500_pfs_compile_state *cs, int ip, int source, GLuint swizRGB, GLuint swizA) +{ +	PROG_CODE; +	code->inst[ip].inst3 |= (source << R500_ALU_RGB_SEL_B_SHIFT) | MAKE_SWIZ_RGB_B(swizRGB); +	code->inst[ip].inst4 |= (source << R500_ALPHA_SEL_B_SHIFT) | MAKE_SWIZ_ALPHA_B(swizA); +} + +/** + * Set an instruction's argument C (both RGB and ALPHA) from the given source, + * taking swizzles+neg+abs as specified (see also _reg version below). + */ +static void set_argC(struct r500_pfs_compile_state *cs, int ip, int source, GLuint swizRGB, GLuint swizA) +{ +	PROG_CODE; +	code->inst[ip].inst5 |= +		(source << R500_ALU_RGBA_SEL_C_SHIFT) | +		MAKE_SWIZ_RGBA_C(swizRGB) | +		(source << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT) | +		MAKE_SWIZ_ALPHA_C(swizA); +} + +/** + * Set an instruction's argument A (both RGB and ALPHA) from the given source, + * taking swizzles, negation and absolute value from the given source register. + */ +static void set_argA_reg(struct r500_pfs_compile_state *cs, int ip, int source, struct prog_src_register srcreg) +{ +	set_argA(cs, ip, source, make_rgb_swizzle(srcreg), make_alpha_swizzle(srcreg)); +} + +/** + * Set an instruction's argument B (both RGB and ALPHA) from the given source, + * taking swizzles, negation and absolute value from the given source register. + */ +static void set_argB_reg(struct r500_pfs_compile_state *cs, int ip, int source, struct prog_src_register srcreg) +{ +	set_argB(cs, ip, source, make_rgb_swizzle(srcreg), make_alpha_swizzle(srcreg)); +} + +/** + * Set an instruction's argument C (both RGB and ALPHA) from the given source, + * taking swizzles, negation and absolute value from the given source register. + */ +static void set_argC_reg(struct r500_pfs_compile_state *cs, int ip, int source, struct prog_src_register srcreg) +{ +	set_argC(cs, ip, source, make_rgb_swizzle(srcreg), make_alpha_swizzle(srcreg)); +} + +/** + * Emit a special scalar operation. + */ +static int emit_sop(struct r500_pfs_compile_state *cs, +	int opcode, struct prog_dst_register dstreg, GLuint src, GLuint swiz) +{ +	int ip = emit_alu(cs, R500_ALU_RGBA_OP_SOP, opcode, dstreg); +	set_src0_direct(cs, ip, src); +	set_argA(cs, ip, 0, R500_SWIZ_RGB_ZERO, swiz); +	return ip; +} + + +/** + * Emit trigonometric function COS, SIN, SCS + */ +static void emit_trig(struct r500_pfs_compile_state *cs, struct prog_instruction *fpi) +{ +	int ip; +	struct prog_dst_register temp = dstreg_template; +	temp.Index = get_temp(cs, 0); +	temp.WriteMask = WRITEMASK_W; + +	/* temp = Input*(1/2pi) */ +	ip = emit_alu(cs, R500_ALU_RGBA_OP_MAD, R500_ALPHA_OP_MAD, temp); +	set_src0(cs, ip, fpi->SrcReg[0]); +	set_src1_direct(cs, ip, emit_const4fv(cs, RCP_2PI)); +	set_argA(cs, ip, 0, R500_SWIZ_RGB_ZERO, make_sop_swizzle(fpi->SrcReg[0])); +	set_argB(cs, ip, 1, R500_SWIZ_RGB_ZERO, SWIZZLE_W); +	set_argC(cs, ip, 0, R500_SWIZ_RGB_ZERO, R500_SWIZZLE_ZERO); + +	/* temp = frac(dst) */ +	ip = emit_alu(cs, R500_ALU_RGBA_OP_FRC, R500_ALPHA_OP_FRC, temp); +	set_src0_direct(cs, ip, temp.Index); +	set_argA(cs, ip, 0, R500_SWIZ_RGB_RGB, SWIZZLE_W); + +	/* Dest = trig(temp) */ +	if (fpi->Opcode == OPCODE_COS) { +		emit_sop(cs, R500_ALPHA_OP_COS, fpi->DstReg, temp.Index, SWIZZLE_W); +	} else if (fpi->Opcode == OPCODE_SIN) { +		emit_sop(cs, R500_ALPHA_OP_SIN, fpi->DstReg, temp.Index, SWIZZLE_W); +	} else if (fpi->Opcode == OPCODE_SCS) { +		struct prog_dst_register moddst = fpi->DstReg; + +		if (fpi->DstReg.WriteMask & WRITEMASK_X) { +			moddst.WriteMask = WRITEMASK_X; +			emit_sop(cs, R500_ALPHA_OP_COS, fpi->DstReg, temp.Index, SWIZZLE_W); +		} +		if (fpi->DstReg.WriteMask & WRITEMASK_Y) { +			moddst.WriteMask = WRITEMASK_Y; +			emit_sop(cs, R500_ALPHA_OP_SIN, fpi->DstReg, temp.Index, SWIZZLE_W); +		} +	} +} + +/** + * Emit a LIT instruction. + * + * Definition of LIT (from ARB_fragment_program): + *  tmp = VectorLoad(op0); + *  if (tmp.x < 0) tmp.x = 0; + *  if (tmp.y < 0) tmp.y = 0; + *  if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon); + *  else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon; + *  result.x = 1.0; + *  result.y = tmp.x; + *  result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0; + *  result.w = 1.0; + */ +static void emit_lit(struct r500_pfs_compile_state *cs, struct prog_instruction *fpi) +{ +	GLuint cnst; +	int needTemporary; +	GLuint temp; +	int ip; + +	cnst = emit_const4fv(cs, LIT); + +	needTemporary = 0; +	if (fpi->DstReg.WriteMask != WRITEMASK_XYZW || fpi->DstReg.File == PROGRAM_OUTPUT) +		needTemporary = 1; + +	if (needTemporary) { +		temp = get_temp(cs, 0); +	} else { +		temp = fpi->DstReg.Index; +	} + +	// MAX tmp.xyw, op0, { 0, 0, 0, -128+eps } +	ip = emit_alu_temp(cs, R500_ALU_RGBA_OP_MAX, R500_ALPHA_OP_MAX, temp, WRITEMASK_XYW); +	set_src0(cs, ip, fpi->SrcReg[0]); +	set_src1_direct(cs, ip, cnst); +	set_argA_reg(cs, ip, 0, fpi->SrcReg[0]); +	set_argB(cs, ip, 1, R500_SWIZ_RGB_ZERO, SWIZZLE_W); + +	// MIN tmp.z, tmp.w, { 128-eps } +	// LG2 tmp.w, tmp.y +	ip = emit_alu_temp(cs, R500_ALU_RGBA_OP_MIN, R500_ALPHA_OP_LN2, temp, WRITEMASK_ZW); +	set_src0_direct(cs, ip, temp); +	set_src1_direct(cs, ip, cnst); +	set_argA(cs, ip, 0, SWIZZLE_W | (SWIZZLE_W<<3) | (SWIZZLE_W<<6), SWIZZLE_Y); +	set_argB(cs, ip, 1, SWIZZLE_X | (SWIZZLE_X<<3) | (SWIZZLE_X<<6), SWIZZLE_X); + +	// MOV tmp.y, tmp.x +	// MUL tmp.w, tmp.z, tmp.w +	ip = emit_alu_temp(cs, R500_ALU_RGBA_OP_MAD, R500_ALPHA_OP_MAD, temp, WRITEMASK_YW); +	set_src0_direct(cs, ip, temp); +	set_argA(cs, ip, 0, SWIZZLE_X | (SWIZZLE_X<<3) | (SWIZZLE_X<<6), SWIZZLE_Z); +	set_argB(cs, ip, 0, R500_SWIZ_RGB_ONE, SWIZZLE_W); +	set_argC(cs, ip, 0, R500_SWIZ_RGB_ZERO, R500_SWIZZLE_ZERO); + +	// MOV tmp.x, 1.0 +	// EX2 tmp.w, tmp.w +	ip = emit_alu_temp(cs, R500_ALU_RGBA_OP_CMP, R500_ALPHA_OP_EX2, temp, WRITEMASK_XW); +	set_src0_direct(cs, ip, temp); +	set_argA(cs, ip, 0, R500_SWIZ_RGB_ONE, SWIZZLE_W); +	set_argB(cs, ip, 0, R500_SWIZ_RGB_ONE, R500_SWIZZLE_ZERO); +	set_argC(cs, ip, 0, R500_SWIZ_RGB_ZERO, R500_SWIZZLE_ZERO); + +	// tmp.z := (-tmp.x >= 0) ? tmp.y : 0.0 +	// MOV tmp.w, 1.0 +	ip = emit_alu_temp(cs, R500_ALU_RGBA_OP_CMP, R500_ALPHA_OP_CMP, temp, WRITEMASK_ZW); +	set_src0_direct(cs, ip, temp); +	set_argA(cs, ip, 0, R500_SWIZZLE_ZERO, R500_SWIZZLE_ONE); +	set_argB(cs, ip, 0, SWIZZLE_W | (SWIZZLE_W<<3) | (SWIZZLE_W<<6), R500_SWIZZLE_ONE); +	set_argC(cs, ip, 0, SWIZZLE_Y | (SWIZZLE_Y<<3) | (SWIZZLE_Y<<6) | (R500_SWIZ_MOD_NEG<<9), R500_SWIZZLE_ZERO); + +	if (needTemporary) { +		ip = emit_alu(cs, R500_ALU_RGBA_OP_CMP, R500_ALPHA_OP_CMP, fpi->DstReg); +		set_src0_direct(cs, ip, temp); +		set_argA(cs, ip, 0, R500_SWIZ_RGB_RGB, SWIZZLE_W); +		set_argB(cs, ip, 1, R500_SWIZ_RGB_RGB, SWIZZLE_W); +		set_argC(cs, ip, 0, R500_SWIZ_RGB_ZERO, R500_SWIZZLE_ZERO);  	}  } -static int do_inst(struct r500_pfs_compile_state *cs, struct prog_instruction *fpi, int counter) { +static void do_inst(struct r500_pfs_compile_state *cs, struct prog_instruction *fpi) {  	PROG_CODE;  	GLuint src[3], dest = 0; -	int temp_swiz = 0; +	int ip;  	if (fpi->Opcode != OPCODE_KIL) {  		dest = make_dest(cs, fpi->DstReg);  	}  	switch (fpi->Opcode) { -		case OPCODE_ABS: -			emit_mov(cs, counter, fpi, make_src(cs, fpi->SrcReg[0]), fpi->SrcReg[0].Swizzle, dest); -			code->inst[counter].inst3 |= R500_ALU_RGB_MOD_A_ABS -				| R500_ALU_RGB_MOD_B_ABS; -			code->inst[counter].inst4 |= R500_ALPHA_MOD_A_ABS -				| R500_ALPHA_MOD_B_ABS; -			break;  		case OPCODE_ADD:  			/* Variation on MAD: 1*src0+src1 */ -			emit_mad(cs, counter, fpi, R500_SWIZZLE_ONE, 0, 1); +			ip = emit_alu(cs, R500_ALU_RGBA_OP_MAD, R500_ALPHA_OP_MAD, fpi->DstReg); +			set_src0(cs, ip, fpi->SrcReg[0]); +			set_src1(cs, ip, fpi->SrcReg[1]); +			set_argA(cs, ip, 0, R500_SWIZ_RGB_ONE, R500_SWIZZLE_ONE); +			set_argB_reg(cs, ip, 0, fpi->SrcReg[0]); +			set_argC_reg(cs, ip, 1, fpi->SrcReg[1]);  			break;  		case OPCODE_CMP:  			/* This inst's selects need to be swapped as follows:  				* 0 -> C ; 1 -> B ; 2 -> A */ -			src[0] = make_src(cs, fpi->SrcReg[0]); -			src[1] = make_src(cs, fpi->SrcReg[1]); -			src[2] = make_src(cs, fpi->SrcReg[2]); -			emit_alu(cs, counter, fpi); -			code->inst[counter].inst1 = R500_RGB_ADDR0(src[2]) -				| R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[0]); -			code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[2]) -				| R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[0]); -			code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 -				| MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[2])) -				| R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); -			code->inst[counter].inst4 |= R500_ALPHA_OP_CMP -				| R500_ALPHA_ADDRD(dest) -				| R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[2])) -				| R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); -			code->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP -				| R500_ALU_RGBA_ADDRD(dest) -				| R500_ALU_RGBA_SEL_C_SRC2 -				| MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0])) -				| R500_ALU_RGBA_ALPHA_SEL_C_SRC2 -				| MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0])); +			ip = emit_alu(cs, R500_ALU_RGBA_OP_CMP, R500_ALPHA_OP_CMP, fpi->DstReg); +			set_src0(cs, ip, fpi->SrcReg[0]); +			set_src1(cs, ip, fpi->SrcReg[1]); +			set_src2(cs, ip, fpi->SrcReg[2]); +			set_argA_reg(cs, ip, 2, fpi->SrcReg[2]); +			set_argB_reg(cs, ip, 1, fpi->SrcReg[1]); +			set_argC_reg(cs, ip, 0, fpi->SrcReg[0]);  			break;  		case OPCODE_COS: -			src[0] = make_src(cs, fpi->SrcReg[0]); -			src[1] = emit_const4fv(cs, RCP_2PI); -			code->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT -				| (R500_WRITEMASK_ARGB << 11); -			code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) -				| R500_RGB_ADDR1(src[1]); -			code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) -				| R500_ALPHA_ADDR1(src[1]); -			code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 -				| MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB) -				| R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB); -			code->inst[counter].inst4 = R500_ALPHA_OP_MAD -				| R500_ALPHA_ADDRD(get_temp(cs, 0)) -				| R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A -				| R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; -			code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD -				| R500_ALU_RGBA_ADDRD(get_temp(cs, 0)) -				| MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) -				| MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); -			counter++; -			code->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11); -			code->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(cs, 0)); -			code->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(cs, 0)); -			code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 -				| MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB); -			code->inst[counter].inst4 = R500_ALPHA_OP_FRC -				| R500_ALPHA_ADDRD(get_temp(cs, 1)) -				| R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A; -			code->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC -				| R500_ALU_RGBA_ADDRD(get_temp(cs, 1)); -			counter++; -			emit_sop(cs, counter, fpi, OPCODE_COS, get_temp(cs, 1), make_sop_swizzle(fpi->SrcReg[0]), dest); +			emit_trig(cs, fpi);  			break;  		case OPCODE_DP3: -			src[0] = make_src(cs, fpi->SrcReg[0]); -			src[1] = make_src(cs, fpi->SrcReg[1]); -			emit_alu(cs, counter, fpi); -			code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) -				| R500_RGB_ADDR1(src[1]); -			code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) -				| R500_ALPHA_ADDR1(src[1]); -			code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 -				| MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) -				| R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); -			code->inst[counter].inst4 |= R500_ALPHA_OP_DP -				| R500_ALPHA_ADDRD(dest) -				| R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) -				| R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); -			code->inst[counter].inst5 = R500_ALU_RGBA_OP_DP3 -				| R500_ALU_RGBA_ADDRD(dest); +			ip = emit_alu(cs, R500_ALU_RGBA_OP_DP3, R500_ALPHA_OP_DP, fpi->DstReg); +			set_src0(cs, ip, fpi->SrcReg[0]); +			set_src1(cs, ip, fpi->SrcReg[1]); +			set_argA_reg(cs, ip, 0, fpi->SrcReg[0]); +			set_argB_reg(cs, ip, 1, fpi->SrcReg[1]);  			break;  		case OPCODE_DP4: -			src[0] = make_src(cs, fpi->SrcReg[0]); -			src[1] = make_src(cs, fpi->SrcReg[1]); -			/* Based on DP3 */ -			emit_alu(cs, counter, fpi); -			code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) -				| R500_RGB_ADDR1(src[1]); -			code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) -				| R500_ALPHA_ADDR1(src[1]); -			code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 -				| MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) -				| R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); -			code->inst[counter].inst4 |= R500_ALPHA_OP_DP -				| R500_ALPHA_ADDRD(dest) -				| R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) -				| R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); -			code->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4 -				| R500_ALU_RGBA_ADDRD(dest); -			break; -		case OPCODE_DPH: -			src[0] = make_src(cs, fpi->SrcReg[0]); -			src[1] = make_src(cs, fpi->SrcReg[1]); -			/* Based on DP3 */ -			emit_alu(cs, counter, fpi); -			code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) -				| R500_RGB_ADDR1(src[1]); -			code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) -				| R500_ALPHA_ADDR1(src[1]); -			code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 -				| MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) -				| R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); -			code->inst[counter].inst4 |= R500_ALPHA_OP_DP -				| R500_ALPHA_ADDRD(dest) -				| R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) -				| R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); -			code->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4 -				| R500_ALU_RGBA_ADDRD(dest); +			ip = emit_alu(cs, R500_ALU_RGBA_OP_DP4, R500_ALPHA_OP_DP, fpi->DstReg); +			set_src0(cs, ip, fpi->SrcReg[0]); +			set_src1(cs, ip, fpi->SrcReg[1]); +			set_argA_reg(cs, ip, 0, fpi->SrcReg[0]); +			set_argB_reg(cs, ip, 1, fpi->SrcReg[1]);  			break;  		case OPCODE_DST: -			src[0] = make_src(cs, fpi->SrcReg[0]); -			src[1] = make_src(cs, fpi->SrcReg[1]);  			/* [1, src0.y*src1.y, src0.z, src1.w] -				* So basically MUL with lotsa swizzling. */ -			emit_alu(cs, counter, fpi); -			code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) -				| R500_RGB_ADDR1(src[1]); -			code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) -				| R500_ALPHA_ADDR1(src[1]); -			code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 -				| R500_ALU_RGB_SEL_B_SRC1; -			/* Select [1, y, z, 1] */ -			temp_swiz = (make_rgb_swizzle(fpi->SrcReg[0]) & ~0x7) | R500_SWIZZLE_ONE; -			code->inst[counter].inst3 |= MAKE_SWIZ_RGB_A(temp_swiz); -			/* Select [1, y, 1, w] */ -			temp_swiz = (make_rgb_swizzle(fpi->SrcReg[0]) & ~0x1c7) | R500_SWIZZLE_ONE | (R500_SWIZZLE_ONE << 6); -			code->inst[counter].inst3 |= MAKE_SWIZ_RGB_B(temp_swiz); -			code->inst[counter].inst4 |= R500_ALPHA_OP_MAD -				| R500_ALPHA_ADDRD(dest) -				| R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) -				| R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); -			code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD -				| R500_ALU_RGBA_ADDRD(dest) -				| MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) -				| MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); +			 * So basically MUL with lotsa swizzling. */ +			ip = emit_alu(cs, R500_ALU_RGBA_OP_MAD, R500_ALPHA_OP_MAD, fpi->DstReg); +			set_src0(cs, ip, fpi->SrcReg[0]); +			set_src1(cs, ip, fpi->SrcReg[1]); +			set_argA(cs, ip, 0, +				(make_rgb_swizzle(fpi->SrcReg[0]) & ~0x7) | R500_SWIZZLE_ONE, +				R500_SWIZZLE_ONE); +			set_argB(cs, ip, 1, +				(make_rgb_swizzle(fpi->SrcReg[0]) & ~0x1c7) | R500_SWIZZLE_ONE | (R500_SWIZZLE_ONE << 6), +				make_alpha_swizzle(fpi->SrcReg[1])); +			set_argC(cs, ip, 0, R500_SWIZ_RGB_ZERO, R500_SWIZZLE_ZERO);  			break;  		case OPCODE_EX2:  			src[0] = make_src(cs, fpi->SrcReg[0]); -			emit_sop(cs, counter, fpi, OPCODE_EX2, src[0], make_sop_swizzle(fpi->SrcReg[0]), dest); +			emit_sop(cs, R500_ALPHA_OP_EX2, fpi->DstReg, src[0], make_sop_swizzle(fpi->SrcReg[0]));  			break;  		case OPCODE_FLR: -			src[0] = make_src(cs, fpi->SrcReg[0]); -			code->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11); -			code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); -			code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); -			code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 -				| MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])); -			code->inst[counter].inst4 |= R500_ALPHA_OP_FRC -				| R500_ALPHA_ADDRD(get_temp(cs, 0)) -				| R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])); -			code->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC -				| R500_ALU_RGBA_ADDRD(get_temp(cs, 0)); -			counter++; -			emit_alu(cs, counter, fpi); -			code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) -				| R500_RGB_ADDR1(get_temp(cs, 0)); -			code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) -				| R500_ALPHA_ADDR1(get_temp(cs, 0)); -			code->inst[counter].inst3 = MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE) -				| R500_ALU_RGB_SEL_B_SRC0 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0])); -			code->inst[counter].inst4 = R500_ALPHA_OP_MAD -				| R500_ALPHA_ADDRD(dest) -				| R500_ALPHA_SWIZ_A_A -				| R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0])); -			code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD -				| R500_ALU_RGBA_ADDRD(dest) -				| R500_ALU_RGBA_SEL_C_SRC1 -				| MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0])) -				| R500_ALU_RGBA_ALPHA_SEL_C_SRC1 -				| MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0])) -				| R500_ALU_RGBA_MOD_C_NEG; +			dest = get_temp(cs, 0); +			ip = emit_alu_temp(cs, R500_ALU_RGBA_OP_FRC, R500_ALPHA_OP_FRC, dest, WRITEMASK_XYZW); +			set_src0(cs, ip, fpi->SrcReg[0]); +			set_argA_reg(cs, ip, 0, fpi->SrcReg[0]); + +			ip = emit_alu(cs, R500_ALU_RGBA_OP_MAD, R500_ALPHA_OP_MAD, fpi->DstReg); +			set_src0(cs, ip, fpi->SrcReg[0]); +			set_src1_direct(cs, ip, dest); +			set_argA_reg(cs, ip, 0, fpi->SrcReg[1]); +			set_argB(cs, ip, 0, R500_SWIZ_RGB_ONE, R500_SWIZZLE_ONE); +			set_argC(cs, ip, 1, +				R500_SWIZ_RGB_RGB|(R500_SWIZ_MOD_NEG<<9), +				SWIZZLE_W|(R500_SWIZ_MOD_NEG<<3));  			break;  		case OPCODE_FRC: -			src[0] = make_src(cs, fpi->SrcReg[0]); -			emit_alu(cs, counter, fpi); -			code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); -			code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); -			code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 -				| MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])); -			code->inst[counter].inst4 |= R500_ALPHA_OP_FRC -				| R500_ALPHA_ADDRD(dest) -				| R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])); -			code->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC -				| R500_ALU_RGBA_ADDRD(dest); +			ip = emit_alu(cs, R500_ALU_RGBA_OP_FRC, R500_ALPHA_OP_FRC, fpi->DstReg); +			set_src0(cs, ip, fpi->SrcReg[0]); +			set_argA_reg(cs, ip, 0, fpi->SrcReg[0]);  			break;  		case OPCODE_LG2:  			src[0] = make_src(cs, fpi->SrcReg[0]); -			emit_sop(cs, counter, fpi, OPCODE_LG2, src[0], make_sop_swizzle(fpi->SrcReg[0]), dest); +			emit_sop(cs, R500_ALPHA_OP_LN2, fpi->DstReg, src[0], make_sop_swizzle(fpi->SrcReg[0]));  			break;  		case OPCODE_LIT: -			src[0] = make_src(cs, fpi->SrcReg[0]); -			src[1] = emit_const4fv(cs, LIT); -			/* First inst: MAX temp, input, [0, 0, 0, -128] -				* Write: RG, A  */ -			code->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT -				| (R500_WRITEMASK_ARG << 11); -			code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]); -			code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]); -			code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 -				| MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) -				| MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO); -			code->inst[counter].inst4 = R500_ALPHA_OP_MAX -				| R500_ALPHA_ADDRD(get_temp(cs, 0)) -				| R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) -				| R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; -			code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX -				| R500_ALU_RGBA_ADDRD(get_temp(cs, 0)); -			counter++; -			/* Second inst: MIN temp, temp, [x, x, x, 128] -				* Write: A */ -			code->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_A << 11); -			code->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(cs, 0)) | R500_RGB_ADDR1(src[1]); -			code->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(cs, 0)) | R500_ALPHA_ADDR1(src[1]); -			/* code->inst[counter].inst3; */ -			code->inst[counter].inst4 = R500_ALPHA_OP_MAX -				| R500_ALPHA_ADDRD(dest) -				| R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A -				| R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; -			code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX -				| R500_ALU_RGBA_ADDRD(dest); -			counter++; -			/* Third-fifth insts: POW temp, temp.y, temp.w -				* Write: B */ -			emit_sop(cs, counter, fpi, OPCODE_LG2, get_temp(cs, 0), SWIZZLE_Y, get_temp(cs, 1)); -			code->inst[counter].inst0 |= (R500_WRITEMASK_ARGB << 11); -			counter++; -			code->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11); -			code->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(cs, 1)) -				| R500_RGB_ADDR1(get_temp(cs, 0)); -			code->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(cs, 1)) -				| R500_ALPHA_ADDR1(get_temp(cs, 0)); -			code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 -				| MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB) -				| R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB); -			code->inst[counter].inst4 = R500_ALPHA_OP_MAD -				| R500_ALPHA_ADDRD(get_temp(cs, 1)) -				| R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A -				| R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; -			code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD -				| R500_ALU_RGBA_ADDRD(get_temp(cs, 1)) -				| MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) -				| MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); -			counter++; -			emit_sop(cs, counter, fpi, OPCODE_EX2, get_temp(cs, 1), SWIZZLE_W, get_temp(cs, 0)); -			code->inst[counter].inst0 |= (R500_WRITEMASK_B << 11); -			counter++; -			/* Sixth inst: CMP dest, temp.xxxx, temp.[1, x, z, 1], temp.[1, x, 0, 1]; -				* Write: ARGB -				* This inst's selects need to be swapped as follows: -				* 0 -> C ; 1 -> B ; 2 -> A */ -			emit_alu(cs, counter, fpi); -			code->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(cs, 0)); -			code->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(cs, 0)); -			code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 -				| R500_ALU_RGB_R_SWIZ_A_1 -				| R500_ALU_RGB_G_SWIZ_A_R -				| R500_ALU_RGB_B_SWIZ_A_B -				| R500_ALU_RGB_SEL_B_SRC0 -				| R500_ALU_RGB_R_SWIZ_B_1 -				| R500_ALU_RGB_G_SWIZ_B_R -				| R500_ALU_RGB_B_SWIZ_B_0; -			code->inst[counter].inst4 |= R500_ALPHA_OP_CMP -				| R500_ALPHA_ADDRD(dest) -				| R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_1 -				| R500_ALPHA_SEL_B_SRC0 | R500_ALPHA_SWIZ_B_1; -			code->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP -				| R500_ALU_RGBA_ADDRD(dest) -				| R500_ALU_RGBA_SEL_C_SRC0 -				| R500_ALU_RGBA_ALPHA_SEL_C_SRC0 -				| R500_ALU_RGBA_R_SWIZ_R -				| R500_ALU_RGBA_G_SWIZ_R -				| R500_ALU_RGBA_B_SWIZ_R -				| R500_ALU_RGBA_A_SWIZ_R; +			emit_lit(cs, fpi);  			break;  		case OPCODE_LRP: -			/* src0 * src1 + INV(src0) * src2 -				* 1) MUL src0, src1, temp -				* 2) PRE 1-src0; MAD srcp, src2, temp */ -			src[0] = make_src(cs, fpi->SrcReg[0]); -			src[1] = make_src(cs, fpi->SrcReg[1]); -			src[2] = make_src(cs, fpi->SrcReg[2]); -			code->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT -				| R500_INST_NOP | (R500_WRITEMASK_ARGB << 11); -			code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) -				| R500_RGB_ADDR1(src[1]); -			code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) -				| R500_ALPHA_ADDR1(src[1]); -			code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 -				| MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) -				| R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); -			code->inst[counter].inst4 = R500_ALPHA_OP_MAD -				| R500_ALPHA_ADDRD(get_temp(cs, 0)) -				| R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) -				| R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); -			code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD -				| R500_ALU_RGBA_ADDRD(get_temp(cs, 0)) -				| MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) -				| MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); -			counter++; -			emit_alu(cs, counter, fpi); -			code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) -				| R500_RGB_ADDR1(src[2]) -				| R500_RGB_ADDR2(get_temp(cs, 0)) -				| R500_RGB_SRCP_OP_1_MINUS_RGB0; -			code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) -				| R500_ALPHA_ADDR1(src[2]) -				| R500_ALPHA_ADDR2(get_temp(cs, 0)) -				| R500_ALPHA_SRCP_OP_1_MINUS_A0; -			code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRCP -				| MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) -				| R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[2])); -			code->inst[counter].inst4 |= R500_ALPHA_OP_MAD -				| R500_ALPHA_ADDRD(dest) -				| R500_ALPHA_SEL_A_SRCP | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) -				| R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[2])); -			code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD -				| R500_ALU_RGBA_ADDRD(dest) -				| R500_ALU_RGBA_SEL_C_SRC2 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_RGB) -				| R500_ALU_RGBA_ALPHA_SEL_C_SRC2 | R500_ALU_RGBA_A_SWIZ_A; +			/* result = src0*src1 + (1-src0)*src2 +			 *        = src0*src1 + src2 + (-src0)*src2 +			 * +			 * Note: LRP without swizzling (or with only limited +			 * swizzling) could be done more efficiently using the +			 * presubtract hardware. +			 */ +			dest = get_temp(cs, 0); +			ip = emit_alu_temp(cs, R500_ALU_RGBA_OP_MAD, R500_ALPHA_OP_MAD, dest, WRITEMASK_XYZW); +			set_src0(cs, ip, fpi->SrcReg[0]); +			set_src1(cs, ip, fpi->SrcReg[1]); +			set_src2(cs, ip, fpi->SrcReg[2]); +			set_argA_reg(cs, ip, 0, fpi->SrcReg[0]); +			set_argB_reg(cs, ip, 1, fpi->SrcReg[1]); +			set_argC_reg(cs, ip, 2, fpi->SrcReg[2]); + +			ip = emit_alu(cs, R500_ALU_RGBA_OP_MAD, R500_ALPHA_OP_MAD, fpi->DstReg); +			set_src0(cs, ip, fpi->SrcReg[0]); +			set_src1(cs, ip, fpi->SrcReg[2]); +			set_src2_direct(cs, ip, dest); +			set_argA(cs, ip, 0, +				make_rgb_swizzle(fpi->SrcReg[0]) ^ (R500_SWIZ_MOD_NEG<<9), +				make_alpha_swizzle(fpi->SrcReg[0]) ^ (R500_SWIZ_MOD_NEG<<3)); +			set_argB_reg(cs, ip, 1, fpi->SrcReg[2]); +			set_argC(cs, ip, 2, R500_SWIZ_RGB_RGB, SWIZZLE_W);  			break;  		case OPCODE_MAD: -			emit_mad(cs, counter, fpi, 0, 1, 2); +			ip = emit_alu(cs, R500_ALU_RGBA_OP_MAD, R500_ALPHA_OP_MAD, fpi->DstReg); +			set_src0(cs, ip, fpi->SrcReg[0]); +			set_src1(cs, ip, fpi->SrcReg[1]); +			set_src2(cs, ip, fpi->SrcReg[2]); +			set_argA_reg(cs, ip, 0, fpi->SrcReg[0]); +			set_argB_reg(cs, ip, 1, fpi->SrcReg[1]); +			set_argC_reg(cs, ip, 2, fpi->SrcReg[2]);  			break;  		case OPCODE_MAX: -			src[0] = make_src(cs, fpi->SrcReg[0]); -			src[1] = make_src(cs, fpi->SrcReg[1]); -			emit_alu(cs, counter, fpi); -			code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]); -			code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]); -			code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 -				| MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) -				| R500_ALU_RGB_SEL_B_SRC1 -				| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); -			code->inst[counter].inst4 |= R500_ALPHA_OP_MAX -				| R500_ALPHA_ADDRD(dest) -				| R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) -				| R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); -			code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX -				| R500_ALU_RGBA_ADDRD(dest); +			ip = emit_alu(cs, R500_ALU_RGBA_OP_MAX, R500_ALPHA_OP_MAX, fpi->DstReg); +			set_src0(cs, ip, fpi->SrcReg[0]); +			set_src1(cs, ip, fpi->SrcReg[1]); +			set_argA_reg(cs, ip, 0, fpi->SrcReg[0]); +			set_argB_reg(cs, ip, 1, fpi->SrcReg[1]);  			break;  		case OPCODE_MIN: -			src[0] = make_src(cs, fpi->SrcReg[0]); -			src[1] = make_src(cs, fpi->SrcReg[1]); -			emit_alu(cs, counter, fpi); -			code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]); -			code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]); -			code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 -				| MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) -				| R500_ALU_RGB_SEL_B_SRC1 -				| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); -			code->inst[counter].inst4 |= R500_ALPHA_OP_MIN -				| R500_ALPHA_ADDRD(dest) -				| R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) -				| R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); -			code->inst[counter].inst5 = R500_ALU_RGBA_OP_MIN -				| R500_ALU_RGBA_ADDRD(dest); +			ip = emit_alu(cs, R500_ALU_RGBA_OP_MIN, R500_ALPHA_OP_MIN, fpi->DstReg); +			set_src0(cs, ip, fpi->SrcReg[0]); +			set_src1(cs, ip, fpi->SrcReg[1]); +			set_argA_reg(cs, ip, 0, fpi->SrcReg[0]); +			set_argB_reg(cs, ip, 1, fpi->SrcReg[1]);  			break;  		case OPCODE_MOV: -			emit_mov(cs, counter, fpi, make_src(cs, fpi->SrcReg[0]), fpi->SrcReg[0].Swizzle, dest); +			ip = emit_alu(cs, R500_ALU_RGBA_OP_CMP, R500_ALPHA_OP_CMP, fpi->DstReg); +			set_src0(cs, ip, fpi->SrcReg[0]); +			set_argA_reg(cs, ip, 0, fpi->SrcReg[0]); +			set_argB_reg(cs, ip, 0, fpi->SrcReg[0]); +			set_argC(cs, ip, 0, R500_SWIZ_RGB_ZERO, R500_SWIZZLE_ZERO); +			code->inst[ip].inst3 |= R500_ALU_RGB_OMOD_DISABLE; +			code->inst[ip].inst4 |= R500_ALPHA_OMOD_DISABLE;  			break;  		case OPCODE_MUL:  			/* Variation on MAD: src0*src1+0 */ -			emit_mad(cs, counter, fpi, 0, 1, R500_SWIZZLE_ZERO); -			break; -		case OPCODE_POW: -			/* POW(a,b) = EX2(LN2(a)*b) */ -			src[0] = make_src(cs, fpi->SrcReg[0]); -			src[1] = make_src(cs, fpi->SrcReg[1]); -			emit_sop(cs, counter, fpi, OPCODE_LG2, src[0], make_sop_swizzle(fpi->SrcReg[0]), get_temp(cs, 0)); -			code->inst[counter].inst0 |= (R500_WRITEMASK_ARGB << 11); -			counter++; -			code->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11); -			code->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(cs, 0)) -				| R500_RGB_ADDR1(src[1]); -			code->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(cs, 0)) -				| R500_ALPHA_ADDR1(src[1]); -			code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 -				| MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) -				| R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); -			code->inst[counter].inst4 = R500_ALPHA_OP_MAD -				| R500_ALPHA_ADDRD(get_temp(cs, 1)) -				| R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) -				| R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); -			code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD -				| R500_ALU_RGBA_ADDRD(get_temp(cs, 1)) -				| MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) -				| MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); -			counter++; -			emit_sop(cs, counter, fpi, OPCODE_EX2, get_temp(cs, 1), make_sop_swizzle(fpi->SrcReg[0]), dest); +			ip = emit_alu(cs, R500_ALU_RGBA_OP_MAD, R500_ALPHA_OP_MAD, fpi->DstReg); +			set_src0(cs, ip, fpi->SrcReg[0]); +			set_src1(cs, ip, fpi->SrcReg[1]); +			set_argA_reg(cs, ip, 0, fpi->SrcReg[0]); +			set_argB_reg(cs, ip, 1, fpi->SrcReg[1]); +			set_argC(cs, ip, 0, R500_SWIZ_RGB_ZERO, R500_SWIZZLE_ZERO);  			break;  		case OPCODE_RCP:  			src[0] = make_src(cs, fpi->SrcReg[0]); -			emit_sop(cs, counter, fpi, OPCODE_RCP, src[0], make_sop_swizzle(fpi->SrcReg[0]), dest); +			emit_sop(cs, R500_ALPHA_OP_RCP, fpi->DstReg, src[0], make_sop_swizzle(fpi->SrcReg[0]));  			break;  		case OPCODE_RSQ:  			src[0] = make_src(cs, fpi->SrcReg[0]); -			emit_sop(cs, counter, fpi, OPCODE_RSQ, src[0], make_sop_swizzle(fpi->SrcReg[0]), dest); +			emit_sop(cs, R500_ALPHA_OP_RSQ, fpi->DstReg, src[0], make_sop_swizzle(fpi->SrcReg[0]));  			break;  		case OPCODE_SCS: -			src[0] = make_src(cs, fpi->SrcReg[0]); -			src[1] = emit_const4fv(cs, RCP_2PI); -			code->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT -				| (R500_WRITEMASK_ARGB << 11); -			code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) -				| R500_RGB_ADDR1(src[1]); -			code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) -				| R500_ALPHA_ADDR1(src[1]); -			code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 -				| MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB) -				| R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB); -			code->inst[counter].inst4 = R500_ALPHA_OP_MAD -				| R500_ALPHA_ADDRD(get_temp(cs, 0)) -				| R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A -				| R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; -			code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD -				| R500_ALU_RGBA_ADDRD(get_temp(cs, 0)) -				| MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) -				| MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); -			counter++; -			code->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11); -			code->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(cs, 0)); -			code->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(cs, 0)); -			code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 -				| MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB); -			code->inst[counter].inst4 = R500_ALPHA_OP_FRC -				| R500_ALPHA_ADDRD(get_temp(cs, 1)) -				| R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A; -			code->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC -				| R500_ALU_RGBA_ADDRD(get_temp(cs, 1)); -			counter++; -			/* Do a cosine, then a sine, masking out the channels we want to protect. */ -			/* Cosine only goes in R (x) channel. */ -			fpi->DstReg.WriteMask = 0x1; -			emit_sop(cs, counter, fpi, OPCODE_COS, get_temp(cs, 1), make_sop_swizzle(fpi->SrcReg[0]), dest); -			counter++; -			/* Sine only goes in G (y) channel. */ -			fpi->DstReg.WriteMask = 0x2; -			emit_sop(cs, counter, fpi, OPCODE_SIN, get_temp(cs, 1), make_sop_swizzle(fpi->SrcReg[0]), dest); -			break; -		case OPCODE_SGE: -			src[0] = make_src(cs, fpi->SrcReg[0]); -			src[1] = make_src(cs, fpi->SrcReg[1]); -			code->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT -				| (R500_WRITEMASK_ARGB << 11); -			code->inst[counter].inst1 = R500_RGB_ADDR1(src[0]) -				| R500_RGB_ADDR2(src[1]); -			code->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0]) -				| R500_ALPHA_ADDR2(src[1]); -			code->inst[counter].inst3 = /* 1 */ -				MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE) -				| R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0])); -			code->inst[counter].inst4 = R500_ALPHA_OP_MAD -				| R500_ALPHA_ADDRD(get_temp(cs, 0)) -				| R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) -				| R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0])); -			code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD -				| R500_ALU_RGBA_ADDRD(get_temp(cs, 0)) -				| R500_ALU_RGBA_SEL_C_SRC2 -				| MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1])) -				| R500_ALU_RGBA_MOD_C_NEG -				| R500_ALU_RGBA_ALPHA_SEL_C_SRC2 -				| MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1])) -				| R500_ALU_RGBA_ALPHA_MOD_C_NEG; -			counter++; -			/* This inst's selects need to be swapped as follows: -				* 0 -> C ; 1 -> B ; 2 -> A */ -			emit_alu(cs, counter, fpi); -			code->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(cs, 0)); -			code->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(cs, 0)); -			code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 -				| MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE) -				| R500_ALU_RGB_SEL_B_SRC0 -				| MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO); -			code->inst[counter].inst4 |= R500_ALPHA_OP_CMP -				| R500_ALPHA_ADDRD(dest) -				| R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) -				| R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO); -			code->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP -				| R500_ALU_RGBA_ADDRD(dest) -				| R500_ALU_RGBA_SEL_C_SRC0 -				| MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_RGB) -				| R500_ALU_RGBA_ALPHA_SEL_C_SRC0 -				| R500_ALU_RGBA_A_SWIZ_A; +			emit_trig(cs, fpi);  			break;  		case OPCODE_SIN: -			src[0] = make_src(cs, fpi->SrcReg[0]); -			src[1] = emit_const4fv(cs, RCP_2PI); -			code->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT -				| (R500_WRITEMASK_ARGB << 11); -			code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) -				| R500_RGB_ADDR1(src[1]); -			code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) -				| R500_ALPHA_ADDR1(src[1]); -			code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 -				| MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB) -				| R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB); -			code->inst[counter].inst4 = R500_ALPHA_OP_MAD -				| R500_ALPHA_ADDRD(get_temp(cs, 0)) -				| R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A -				| R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; -			code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD -				| R500_ALU_RGBA_ADDRD(get_temp(cs, 0)) -				| MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) -				| MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); -			counter++; -			code->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11); -			code->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(cs, 0)); -			code->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(cs, 0)); -			code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 -				| MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB); -			code->inst[counter].inst4 = R500_ALPHA_OP_FRC -				| R500_ALPHA_ADDRD(get_temp(cs, 1)) -				| R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A; -			code->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC -				| R500_ALU_RGBA_ADDRD(get_temp(cs, 1)); -			counter++; -			emit_sop(cs, counter, fpi, OPCODE_SIN, get_temp(cs, 1), make_sop_swizzle(fpi->SrcReg[0]), dest); -			break; -		case OPCODE_SLT: -			src[0] = make_src(cs, fpi->SrcReg[0]); -			src[1] = make_src(cs, fpi->SrcReg[1]); -			code->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT -				| (R500_WRITEMASK_ARGB << 11); -			code->inst[counter].inst1 = R500_RGB_ADDR1(src[0]) -				| R500_RGB_ADDR2(src[1]); -			code->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0]) -				| R500_ALPHA_ADDR2(src[1]); -			code->inst[counter].inst3 = /* 1 */ -				MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE) -				| R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0])); -			code->inst[counter].inst4 = R500_ALPHA_OP_MAD -				| R500_ALPHA_ADDRD(get_temp(cs, 0)) -				| R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) -				| R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0])); -			code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD -				| R500_ALU_RGBA_ADDRD(get_temp(cs, 0)) -				| R500_ALU_RGBA_SEL_C_SRC2 -				| MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1])) -				| R500_ALU_RGBA_MOD_C_NEG -				| R500_ALU_RGBA_ALPHA_SEL_C_SRC2 -				| MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1])) -				| R500_ALU_RGBA_ALPHA_MOD_C_NEG; -			counter++; -			/* This inst's selects need to be swapped as follows: -				* 0 -> C ; 1 -> B ; 2 -> A */ -			emit_alu(cs, counter, fpi); -			code->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(cs, 0)); -			code->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(cs, 0)); -			code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 -				| MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO) -				| R500_ALU_RGB_SEL_B_SRC0 -				| MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE); -			code->inst[counter].inst4 |= R500_ALPHA_OP_CMP -				| R500_ALPHA_ADDRD(dest) -				| R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO) -				| R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE); -			code->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP -				| R500_ALU_RGBA_ADDRD(dest) -				| R500_ALU_RGBA_SEL_C_SRC0 -				| MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_RGB) -				| R500_ALU_RGBA_ALPHA_SEL_C_SRC0 -				| R500_ALU_RGBA_A_SWIZ_A; -			break; -		case OPCODE_SUB: -			/* Variation on MAD: 1*src0-src1 */ -			fpi->SrcReg[1].NegateBase = 0xF; /* NEG_XYZW */ -			emit_mad(cs, counter, fpi, R500_SWIZZLE_ONE, 0, 1); -			break; -		case OPCODE_SWZ: -			/* TODO: The rarer negation masks! */ -			emit_mov(cs, counter, fpi, make_src(cs, fpi->SrcReg[0]), fpi->SrcReg[0].Swizzle, dest); -			break; -		case OPCODE_XPD: -			/* src0 * src1 - src1 * src0 -				* 1) MUL temp.xyz, src0.yzx, src1.zxy -				* 2) MAD src0.zxy, src1.yzx, -temp.xyz */ -			src[0] = make_src(cs, fpi->SrcReg[0]); -			src[1] = make_src(cs, fpi->SrcReg[1]); -			code->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT -				| (R500_WRITEMASK_RGB << 11); -			code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) -				| R500_RGB_ADDR1(src[1]); -			code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) -				| R500_ALPHA_ADDR1(src[1]); -			/* Select [y, z, x] */ -			temp_swiz = make_rgb_swizzle(fpi->SrcReg[0]); -			temp_swiz = (GET_SWZ(temp_swiz, 1) << 0) | (GET_SWZ(temp_swiz, 2) << 3) | (GET_SWZ(temp_swiz, 0) << 6); -			code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 -				| MAKE_SWIZ_RGB_A(temp_swiz); -			/* Select [z, x, y] */ -			temp_swiz = make_rgb_swizzle(fpi->SrcReg[1]); -			temp_swiz = (GET_SWZ(temp_swiz, 2) << 0) | (GET_SWZ(temp_swiz, 0) << 3) | (GET_SWZ(temp_swiz, 1) << 6); -			code->inst[counter].inst3 |= R500_ALU_RGB_SEL_B_SRC1 -				| MAKE_SWIZ_RGB_B(temp_swiz); -			code->inst[counter].inst4 = R500_ALPHA_OP_MAD -				| R500_ALPHA_ADDRD(get_temp(cs, 0)) -				| R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) -				| R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); -			code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD -				| R500_ALU_RGBA_ADDRD(get_temp(cs, 0)) -				| MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) -				| MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); -			counter++; -			emit_alu(cs, counter, fpi); -			code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) -				| R500_RGB_ADDR1(src[1]) -				| R500_RGB_ADDR2(get_temp(cs, 0)); -			code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) -				| R500_ALPHA_ADDR1(src[1]) -				| R500_ALPHA_ADDR2(get_temp(cs, 0)); -			/* Select [z, x, y] */ -			temp_swiz = make_rgb_swizzle(fpi->SrcReg[0]); -			temp_swiz = (GET_SWZ(temp_swiz, 2) << 0) | (GET_SWZ(temp_swiz, 0) << 3) | (GET_SWZ(temp_swiz, 1) << 6); -			code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 -				| MAKE_SWIZ_RGB_A(temp_swiz); -			/* Select [y, z, x] */ -			temp_swiz = make_rgb_swizzle(fpi->SrcReg[1]); -			temp_swiz = (GET_SWZ(temp_swiz, 1) << 0) | (GET_SWZ(temp_swiz, 2) << 3) | (GET_SWZ(temp_swiz, 0) << 6); -			code->inst[counter].inst3 |= R500_ALU_RGB_SEL_B_SRC1 -				| MAKE_SWIZ_RGB_B(temp_swiz); -			code->inst[counter].inst4 |= R500_ALPHA_OP_MAD -				| R500_ALPHA_ADDRD(dest) -				| R500_ALPHA_SWIZ_A_1 -				| R500_ALPHA_SWIZ_B_1; -			code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD -				| R500_ALU_RGBA_ADDRD(dest) -				| R500_ALU_RGBA_SEL_C_SRC2 -				| MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_RGB) -				| R500_ALU_RGBA_MOD_C_NEG -				| R500_ALU_RGBA_A_SWIZ_0; +			emit_trig(cs, fpi);  			break;  		case OPCODE_KIL:  		case OPCODE_TEX:  		case OPCODE_TXB:  		case OPCODE_TXP: -			emit_tex(cs, fpi, dest, counter); -				if (fpi->DstReg.File == PROGRAM_OUTPUT) -					counter++; +			emit_tex(cs, fpi, dest);  			break;  		default:  			ERROR("unknown fpi->Opcode %s\n", _mesa_opcode_string(fpi->Opcode)); @@ -1245,37 +928,30 @@ static int do_inst(struct r500_pfs_compile_state *cs, struct prog_instruction *f  	/* Finishing touches */  	if (fpi->SaturateMode == SATURATE_ZERO_ONE) { -		code->inst[counter].inst0 |= R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP; +		code->inst[cs->nrslots-1].inst0 |= R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP;  	} - -	counter++; - -	return counter;  }  static GLboolean parse_program(struct r500_pfs_compile_state *cs)  {  	PROG_CODE; -	int counter = 0;  	struct prog_instruction* fpi;  	for(fpi = cs->compiler->program->Instructions; fpi->Opcode != OPCODE_END; ++fpi) { -		counter = do_inst(cs, fpi, counter); +		do_inst(cs, fpi);  		if (cs->compiler->fp->error)  			return GL_FALSE;  	}  	/* Finish him! (If it's an ALU/OUT instruction...) */ -	if ((code->inst[counter-1].inst0 & 0x3) == 1) { -		code->inst[counter-1].inst0 |= R500_INST_LAST; +	if ((code->inst[cs->nrslots-1].inst0 & 0x3) == 1) { +		code->inst[cs->nrslots-1].inst0 |= R500_INST_LAST;  	} else {  		/* We still need to put an output inst, right? */  		WARN_ONCE("Final FP instruction is not an OUT.\n");  	} -	cs->nrslots = counter; -  	code->max_temp_idx++;  	return GL_TRUE; @@ -1295,12 +971,10 @@ static void init_program(struct r500_pfs_compile_state *cs)  	    driQueryOptioni(&cs->compiler->r300->radeon.optionCache, "fp_optimization");  	cs->compiler->fp->translated = GL_FALSE;  	cs->compiler->fp->error = GL_FALSE; -	code->const_nr = 0; -	/* Size of pixel stack, plus 1. */ -	code->max_temp_idx = 1; -	/* Temp register offset. */ -	code->temp_reg_offset = 0; -	/* Whether or not we perform any depth writing. */ + +	_mesa_bzero(code, sizeof(*code)); +	code->max_temp_idx = 1; /* Size of pixel stack, plus 1. */ +	cs->nrslots = 0;  	cs->compiler->fp->writes_depth = GL_FALSE;  	/* Work out what temps the Mesa inputs correspond to, this must match  | 
