diff options
| author | Brian Paul <brian.paul@tungstengraphics.com> | 2002-03-07 21:40:08 +0000 | 
|---|---|---|
| committer | Brian Paul <brian.paul@tungstengraphics.com> | 2002-03-07 21:40:08 +0000 | 
| commit | 0b2fa873c6da02f169a76d74d3bedd91236f8e28 (patch) | |
| tree | 505c779a78d475869f837a371ea65979ec3a0a6b /src | |
| parent | c5a76cbed41b5816d2f5284f90c70364b062aea4 (diff) | |
syntax fixes for Solaris (David Dawes)
Diffstat (limited to 'src')
| -rw-r--r-- | src/mesa/x86/assyntax.h | 13 | ||||
| -rw-r--r-- | src/mesa/x86/common_x86_asm.S | 36 | ||||
| -rw-r--r-- | src/mesa/x86/x86_cliptest.S | 119 | ||||
| -rw-r--r-- | src/mesa/x86/x86_xform2.S | 299 | ||||
| -rw-r--r-- | src/mesa/x86/x86_xform3.S | 391 | ||||
| -rw-r--r-- | src/mesa/x86/x86_xform4.S | 451 | 
6 files changed, 711 insertions, 598 deletions
| diff --git a/src/mesa/x86/assyntax.h b/src/mesa/x86/assyntax.h index 442d21b2a7..8acc6d031d 100644 --- a/src/mesa/x86/assyntax.h +++ b/src/mesa/x86/assyntax.h @@ -1,4 +1,4 @@ -/* $Id: assyntax.h,v 1.17 2002/01/04 14:35:46 brianp Exp $ */ +/* $Id: assyntax.h,v 1.18 2002/03/07 21:40:08 brianp Exp $ */  #ifndef __ASSYNTAX_H__  #define __ASSYNTAX_H__ @@ -69,6 +69,11 @@  #if !(defined(NASM_ASSEMBLER) || defined(MASM_ASSEMBLER)) +/* Default to ATT_ASSEMBLER when SVR4 or SYSV are defined */ +#if (defined(SVR4) || defined(SYSV)) && !defined(GNU_ASSEMBLER) +#define ATT_ASSEMBLER +#endif +  #if !defined(ATT_ASSEMBLER) && !defined(GNU_ASSEMBLER) && !defined(ACK_ASSEMBLER)  #define GNU_ASSEMBLER  #endif @@ -200,6 +205,11 @@  #define _STX6		%st(6)  #define _STX7		%st(7)  #define ST(x)		CONCAT(_STX,x) +#ifdef GNU_ASSEMBLER +#define ST0		%st(0) +#else +#define ST0		%st +#endif  /* MMX Registers */  #define MM0		%mm0  #define MM1		%mm1 @@ -858,6 +868,7 @@  #if defined(NASM_ASSEMBLER)  #define ST(n)		st ## n +#define ST0		st0  #define TBYTE_PTR	tword  #define QWORD_PTR	qword diff --git a/src/mesa/x86/common_x86_asm.S b/src/mesa/x86/common_x86_asm.S index 8a0a6477af..61ff15a1df 100644 --- a/src/mesa/x86/common_x86_asm.S +++ b/src/mesa/x86/common_x86_asm.S @@ -1,4 +1,4 @@ -/* $Id: common_x86_asm.S,v 1.7 2001/03/29 06:46:16 gareth Exp $ */ +/* $Id: common_x86_asm.S,v 1.8 2002/03/07 21:40:08 brianp Exp $ */  /*   * Mesa 3-D graphics library @@ -34,6 +34,12 @@   * Cleaned up and simplified by Gareth Hughes <gareth@valinux.com>   */ +/* + * NOTE: Avoid using spaces in between '(' ')' and arguments, especially + * with macros like CONST, LLBL that expand to CONCAT(...).  Putting spaces + * in there will break the build on some platforms. + */ +  #include "matypes.h"  #include "common_x86_features.h" @@ -87,7 +93,7 @@ GLNAME( _mesa_identify_x86_cpu_features ):  	/* Verify the ID Flag bit has been written.  	 */  	CMP_L	( ECX, EAX ) -	JZ	( LLBL ( cpuid_done ) ) +	JZ	( LLBL (cpuid_done) )  	/* Get the CPU vendor info.  	 */ @@ -98,11 +104,11 @@ GLNAME( _mesa_identify_x86_cpu_features ):  	 * "GenuineIntel" string in EBX, ECX and EDX.  	 */  	CMP_L	( CONST(GENU), EBX ) -	JNE	( LLBL( cpuid_amd ) ) +	JNE	( LLBL(cpuid_amd) )  	CMP_L	( CONST(INEI), EDX ) -	JNE	( LLBL( cpuid_amd ) ) +	JNE	( LLBL(cpuid_amd) )  	CMP_L	( CONST(NTEL), ECX ) -	JNE	( LLBL( cpuid_amd ) ) +	JNE	( LLBL(cpuid_amd) )  	/* We have an Intel processor, so we can get the feature  	 * information with an CPUID input value of 1. @@ -110,19 +116,19 @@ GLNAME( _mesa_identify_x86_cpu_features ):  	MOV_L	( CONST(0x1), EAX )  	CPUID  	MOV_L	( EDX, EAX ) -	JMP	( LLBL( cpuid_done ) ) +	JMP	( LLBL(cpuid_done) ) -LLBL( cpuid_amd ): +LLBL(cpuid_amd):  	/* Test for AMD processors.  We must look for the  	 * "AuthenticAMD" string in EBX, ECX and EDX.  	 */  	CMP_L	( CONST(AUTH), EBX ) -	JNE	( LLBL( cpuid_other ) ) +	JNE	( LLBL(cpuid_other) )  	CMP_L	( CONST(ENTI), EDX ) -	JNE	( LLBL( cpuid_other ) ) +	JNE	( LLBL(cpuid_other) )  	CMP_L	( CONST(CAMD), ECX ) -	JNE	( LLBL( cpuid_other ) ) +	JNE	( LLBL(cpuid_other) )  	/* We have an AMD processor, so we can get the feature  	 * information after we verify that the extended functions are @@ -131,19 +137,19 @@ LLBL( cpuid_amd ):  	MOV_L	( CONST(0x80000000), EAX )  	CPUID  	TEST_L	( EAX, EAX ) -	JZ	( LLBL ( cpuid_failed ) ) +	JZ	( LLBL (cpuid_failed) )  	MOV_L	( CONST(0x80000001), EAX )  	CPUID  	MOV_L	( EDX, EAX ) -	JMP	( LLBL ( cpuid_done ) ) +	JMP	( LLBL (cpuid_done) ) -LLBL( cpuid_other ): +LLBL(cpuid_other):  	/* Test for other processors here when required.  	 */ -LLBL( cpuid_failed ): +LLBL(cpuid_failed):  	/* If we can't determine the feature information, we must  	 * return zero to indicate that no platform-specific @@ -151,7 +157,7 @@ LLBL( cpuid_failed ):  	 */  	MOV_L	( CONST(0), EAX ) -LLBL ( cpuid_done ): +LLBL (cpuid_done):  	POP_L	( EBX )  	RET diff --git a/src/mesa/x86/x86_cliptest.S b/src/mesa/x86/x86_cliptest.S index 08e34ecbd6..617eb2b6fe 100644 --- a/src/mesa/x86/x86_cliptest.S +++ b/src/mesa/x86/x86_cliptest.S @@ -1,4 +1,4 @@ -/* $Id: x86_cliptest.S,v 1.8 2001/05/21 16:33:41 gareth Exp $ */ +/* $Id: x86_cliptest.S,v 1.9 2002/03/07 21:40:08 brianp Exp $ */  /*   * Mesa 3-D graphics library @@ -24,23 +24,38 @@   * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.   */ +/* + * NOTE: Avoid using spaces in between '(' ')' and arguments, especially + * with macros like CONST, LLBL that expand to CONCAT(...).  Putting spaces + * in there will break the build on some platforms. + */ +  #include "matypes.h"  #include "clip_args.h" -#define SRC(i)		REGOFF(i * 4, ESI) -#define DST(i)		REGOFF(i * 4, EDI) -#define MAT(i)		REGOFF(i * 4, EDX) +#define SRC0		REGOFF(0, ESI) +#define SRC1		REGOFF(4, ESI) +#define SRC2		REGOFF(8, ESI) +#define SRC3		REGOFF(12, ESI) +#define DST0		REGOFF(0, EDI) +#define DST1		REGOFF(4, EDI) +#define DST2		REGOFF(8, EDI) +#define DST3		REGOFF(12, EDI) +#define MAT0		REGOFF(0, EDX) +#define MAT1		REGOFF(4, EDX) +#define MAT2		REGOFF(8, EDX) +#define MAT3		REGOFF(12, EDX)  /*   * Table for clip test.   * - * 	bit6 = SRC(3) < 0 - * 	bit5 = SRC(2) < 0 + * 	bit6 = SRC3 < 0 + * 	bit5 = SRC2 < 0   * 	bit4 = abs(S(2)) > abs(S(3)) - * 	bit3 = SRC(1) < 0 + * 	bit3 = SRC1 < 0   * 	bit2 = abs(S(1)) > abs(S(3)) - * 	bit1 = SRC(0) < 0 + * 	bit1 = SRC0 < 0   * 	bit0 = abs(S(0)) > abs(S(3))   */ @@ -100,18 +115,18 @@ GLNAME( _mesa_x86_cliptest_points4 ):  #ifdef ELFPIC  	/* store pointer to clip_table on stack */ -	CALL( LLBL( ctp4_get_eip ) ) +	CALL( LLBL(ctp4_get_eip) )  	ADD_L( CONST(_GLOBAL_OFFSET_TABLE_), EBX )  	MOV_L( REGOFF(clip_table@GOT, EBX), EBX )  	PUSH_L( EBX ) -	JMP( LLBL( ctp4_clip_table_ready ) ) +	JMP( LLBL(ctp4_clip_table_ready) ) -LLBL( ctp4_get_eip ): +LLBL(ctp4_get_eip):  	/* store eip in ebx */  	MOV_L( REGIND(ESP), EBX )  	RET -LLBL( ctp4_clip_table_ready ): +LLBL(ctp4_clip_table_ready):  #endif  	MOV_L( ARG_SOURCE, ESI ) @@ -141,16 +156,16 @@ LLBL( ctp4_clip_table_ready ):  	MOV_B( REGIND(EBX), AL )  	MOV_B( REGIND(EBP), AH ) -	JZ( LLBL( ctp4_finish ) ) +	JZ( LLBL(ctp4_finish) )  ALIGNTEXT16 -LLBL( ctp4_top ): +LLBL(ctp4_top):  	FLD1				/* F3 */ -	FDIV_S( SRC(3) )		/* GH: don't care about div-by-zero */ +	FDIV_S( SRC3 )		/* GH: don't care about div-by-zero */ -	MOV_L( SRC(3), EBP ) -	MOV_L( SRC(2), EBX ) +	MOV_L( SRC3, EBP ) +	MOV_L( SRC2, EBX )  	XOR_L( ECX, ECX )  	ADD_L( EBP, EBP )	/* ebp = abs(S(3))*2 ; carry = sign of S(3) */ @@ -162,7 +177,7 @@ LLBL( ctp4_top ):  	CMP_L( EBX, EBP )	/* carry = abs(S(2))*2 > abs(S(3))*2 */  	ADC_L( ECX, ECX ) -	MOV_L( SRC(1), EBX ) +	MOV_L( SRC1, EBX )  	ADD_L( EBX, EBX )	/* ebx = abs(S(1))*2 ; carry = sign of S(1) */ @@ -170,7 +185,7 @@ LLBL( ctp4_top ):  	CMP_L( EBX, EBP )	/* carry = abs(S(1))*2 > abs(S(3))*2 */  	ADC_L( ECX, ECX ) -	MOV_L( SRC(0), EBX ) +	MOV_L( SRC0, EBX )  	ADD_L( EBX, EBX )	/* ebx = abs(S(0))*2 ; carry = sign of S(0) */ @@ -193,37 +208,37 @@ LLBL( ctp4_top ):  	TEST_B( CL, CL )  	MOV_B( CL, REGIND(EDX) ) -	JZ( LLBL( ctp4_proj ) ) +	JZ( LLBL(ctp4_proj) ) -LLBL( ctp4_noproj ): +LLBL(ctp4_noproj):  	FSTP( ST(0) )			/* */ -	MOV_L( CONST(0), DST(0) ) -	MOV_L( CONST(0), DST(1) ) -	MOV_L( CONST(0), DST(2) ) -	MOV_L( CONST(0x3f800000), DST(3) ) +	MOV_L( CONST(0), DST0 ) +	MOV_L( CONST(0), DST1 ) +	MOV_L( CONST(0), DST2 ) +	MOV_L( CONST(0x3f800000), DST3 ) -	JMP( LLBL( ctp4_next ) ) +	JMP( LLBL(ctp4_next) ) -LLBL( ctp4_proj ): +LLBL(ctp4_proj): -	FLD_S( SRC(0) )			/* F0 F3 */ -	FMUL2( ST(1), ST(0) ) +	FLD_S( SRC0 )			/* F0 F3 */ +	FMUL2( ST(1), ST0 ) -	FLD_S( SRC(1) )			/* F1 F0 F3 */ -	FMUL2( ST(2), ST(0) ) +	FLD_S( SRC1 )			/* F1 F0 F3 */ +	FMUL2( ST(2), ST0 ) -	FLD_S( SRC(2) )			/* F2 F1 F0 F3 */ -	FMUL2( ST(3), ST(0) ) +	FLD_S( SRC2 )			/* F2 F1 F0 F3 */ +	FMUL2( ST(3), ST0 )  	FXCH( ST(2) )			/* F0 F1 F2 F3 */ -	FSTP_S( DST(0) )		/* F1 F2 F3 */ -	FSTP_S( DST(1) )		/* F2 F3 */ -	FSTP_S( DST(2) )		/* F3 */ -	FSTP_S( DST(3) )		/* */ +	FSTP_S( DST0 )		/* F1 F2 F3 */ +	FSTP_S( DST1 )		/* F2 F3 */ +	FSTP_S( DST2 )		/* F3 */ +	FSTP_S( DST3 )		/* */ -LLBL( ctp4_next ): +LLBL(ctp4_next):  	INC_L( EDX )  	ADD_L( CONST(16), EDI ) @@ -231,7 +246,7 @@ LLBL( ctp4_next ):  	ADD_L( ARG_SOURCE, ESI )  	CMP_L( EDX, ARG_CLIP ) -	JNZ( LLBL( ctp4_top ) ) +	JNZ( LLBL(ctp4_top) )  	MOV_L( ARG_OR, ECX )  	MOV_L( ARG_AND, EDX ) @@ -239,7 +254,7 @@ LLBL( ctp4_next ):  	MOV_B( AL, REGIND(ECX) )  	MOV_B( AH, REGIND(EDX) ) -LLBL( ctp4_finish ): +LLBL(ctp4_finish):  	MOV_L( ARG_DEST, EAX )  #ifdef ELFPIC @@ -274,18 +289,18 @@ GLNAME( _mesa_x86_cliptest_points4_np ):  #ifdef ELFPIC  	/* store pointer to clip_table on stack */ -	CALL( LLBL( ctp4_np_get_eip ) ) +	CALL( LLBL(ctp4_np_get_eip) )  	ADD_L( CONST(_GLOBAL_OFFSET_TABLE_), EBX )  	MOV_L( REGOFF(clip_table@GOT, EBX), EBX )  	PUSH_L( EBX ) -	JMP( LLBL( ctp4_np_clip_table_ready ) ) +	JMP( LLBL(ctp4_np_clip_table_ready) ) -LLBL( ctp4_np_get_eip ): +LLBL(ctp4_np_get_eip):  	/* store eip in ebx */  	MOV_L( REGIND(ESP), EBX )  	RET -LLBL( ctp4_np_clip_table_ready ): +LLBL(ctp4_np_clip_table_ready):  #endif  	MOV_L( ARG_SOURCE, ESI ) @@ -309,13 +324,13 @@ LLBL( ctp4_np_clip_table_ready ):  	MOV_B( REGIND(EBX), AL )  	MOV_B( REGIND(EBP), AH ) -	JZ( LLBL( ctp4_np_finish ) ) +	JZ( LLBL(ctp4_np_finish) )  ALIGNTEXT16 -LLBL( ctp4_np_top ): +LLBL(ctp4_np_top): -	MOV_L( SRC(3), EBP ) -	MOV_L( SRC(2), EBX ) +	MOV_L( SRC3, EBP ) +	MOV_L( SRC2, EBX )  	XOR_L( ECX, ECX )  	ADD_L( EBP, EBP )	/* ebp = abs(S(3))*2 ; carry = sign of S(3) */ @@ -327,7 +342,7 @@ LLBL( ctp4_np_top ):  	CMP_L( EBX, EBP )	/* carry = abs(S(2))*2 > abs(S(3))*2 */  	ADC_L( ECX, ECX ) -	MOV_L( SRC(1), EBX ) +	MOV_L( SRC1, EBX )  	ADD_L( EBX, EBX )	/* ebx = abs(S(1))*2 ; carry = sign of S(1) */ @@ -335,7 +350,7 @@ LLBL( ctp4_np_top ):  	CMP_L( EBX, EBP )	/* carry = abs(S(1))*2 > abs(S(3))*2 */  	ADC_L( ECX, ECX ) -	MOV_L( SRC(0), EBX ) +	MOV_L( SRC0, EBX )  	ADD_L( EBX, EBX )	/* ebx = abs(S(0))*2 ; carry = sign of S(0) */ @@ -364,7 +379,7 @@ LLBL( ctp4_np_top ):  	ADD_L( ARG_DEST, ESI )  	CMP_L( EDX, EDI ) -	JNZ( LLBL( ctp4_np_top ) ) +	JNZ( LLBL(ctp4_np_top) )  	MOV_L( ARG_OR, ECX )  	MOV_L( ARG_AND, EDX ) @@ -372,7 +387,7 @@ LLBL( ctp4_np_top ):  	MOV_B( AL, REGIND(ECX) )  	MOV_B( AH, REGIND(EDX) ) -LLBL( ctp4_np_finish ): +LLBL(ctp4_np_finish):  	MOV_L( ARG_SOURCE, EAX )  #ifdef ELFPIC diff --git a/src/mesa/x86/x86_xform2.S b/src/mesa/x86/x86_xform2.S index 0e46a8411e..b9528e1653 100644 --- a/src/mesa/x86/x86_xform2.S +++ b/src/mesa/x86/x86_xform2.S @@ -1,4 +1,4 @@ -/* $Id: x86_xform2.S,v 1.1 2001/03/29 06:46:27 gareth Exp $ */ +/* $Id: x86_xform2.S,v 1.2 2002/03/07 21:40:08 brianp Exp $ */  /*   * Mesa 3-D graphics library @@ -24,6 +24,12 @@   * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.   */ +/* + * NOTE: Avoid using spaces in between '(' ')' and arguments, especially + * with macros like CONST, LLBL that expand to CONCAT(...).  Putting spaces + * in there will break the build on some platforms. + */ +  #include "matypes.h"  #include "xform_args.h" @@ -32,9 +38,30 @@  #define FP_ONE		1065353216  #define FP_ZERO		0 -#define SRC(i)		REGOFF(i * 4, ESI) -#define DST(i)		REGOFF(i * 4, EDI) -#define MAT(i)		REGOFF(i * 4, EDX) +#define SRC0		REGOFF(0, ESI) +#define SRC1		REGOFF(4, ESI) +#define SRC2		REGOFF(8, ESI) +#define SRC3		REGOFF(12, ESI) +#define DST0		REGOFF(0, EDI) +#define DST1		REGOFF(4, EDI) +#define DST2		REGOFF(8, EDI) +#define DST3		REGOFF(12, EDI) +#define MAT0		REGOFF(0, EDX) +#define MAT1		REGOFF(4, EDX) +#define MAT2		REGOFF(8, EDX) +#define MAT3		REGOFF(12, EDX) +#define MAT4		REGOFF(16, EDX) +#define MAT5		REGOFF(20, EDX) +#define MAT6		REGOFF(24, EDX) +#define MAT7		REGOFF(28, EDX) +#define MAT8		REGOFF(32, EDX) +#define MAT9		REGOFF(36, EDX) +#define MAT10		REGOFF(40, EDX) +#define MAT11		REGOFF(44, EDX) +#define MAT12		REGOFF(48, EDX) +#define MAT13		REGOFF(52, EDX) +#define MAT14		REGOFF(56, EDX) +#define MAT15		REGOFF(60, EDX)  ALIGNTEXT16 @@ -52,7 +79,7 @@ GLNAME( _mesa_x86_transform_points2_general ):  	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )  	TEST_L( ECX, ECX ) -	JZ( LLBL( x86_p2_gr_done ) ) +	JZ( LLBL(x86_p2_gr_done) )  	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )  	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) @@ -67,57 +94,57 @@ GLNAME( _mesa_x86_transform_points2_general ):  	ADD_L( EDI, ECX )  ALIGNTEXT16 -LLBL( x86_p2_gr_loop ): +LLBL(x86_p2_gr_loop): -	FLD_S( SRC(0) )			/* F4 */ -	FMUL_S( MAT(0) ) -	FLD_S( SRC(0) )			/* F5 F4 */ -	FMUL_S( MAT(1) ) -	FLD_S( SRC(0) )			/* F6 F5 F4 */ -	FMUL_S( MAT(2) ) -	FLD_S( SRC(0) )			/* F7 F6 F5 F4 */ -	FMUL_S( MAT(3) ) +	FLD_S( SRC0 )			/* F4 */ +	FMUL_S( MAT0 ) +	FLD_S( SRC0 )			/* F5 F4 */ +	FMUL_S( MAT1 ) +	FLD_S( SRC0 )			/* F6 F5 F4 */ +	FMUL_S( MAT2 ) +	FLD_S( SRC0 )			/* F7 F6 F5 F4 */ +	FMUL_S( MAT3 ) -	FLD_S( SRC(1) )			/* F0 F7 F6 F5 F4 */ -	FMUL_S( MAT(4) ) -	FLD_S( SRC(1) )			/* F1 F0 F7 F6 F5 F4 */ -	FMUL_S( MAT(5) ) -	FLD_S( SRC(1) )			/* F2 F1 F0 F7 F6 F5 F4 */ -	FMUL_S( MAT(6) ) -	FLD_S( SRC(1) )			/* F3 F2 F1 F0 F7 F6 F5 F4 */ -	FMUL_S( MAT(7) ) +	FLD_S( SRC1 )			/* F0 F7 F6 F5 F4 */ +	FMUL_S( MAT4 ) +	FLD_S( SRC1 )			/* F1 F0 F7 F6 F5 F4 */ +	FMUL_S( MAT5 ) +	FLD_S( SRC1 )			/* F2 F1 F0 F7 F6 F5 F4 */ +	FMUL_S( MAT6 ) +	FLD_S( SRC1 )			/* F3 F2 F1 F0 F7 F6 F5 F4 */ +	FMUL_S( MAT7 )  	FXCH( ST(3) )			/* F0 F2 F1 F3 F7 F6 F5 F4 */ -	FADDP( ST(0), ST(7) )		/* F2 F1 F3 F7 F6 F5 F4 */ +	FADDP( ST0, ST(7) )		/* F2 F1 F3 F7 F6 F5 F4 */  	FXCH( ST(1) )			/* F1 F2 F3 F7 F6 F5 F4 */ -	FADDP( ST(0), ST(5) )		/* F2 F3 F7 F6 F5 F4 */ -	FADDP( ST(0), ST(3) )		/* F3 F7 F6 F5 F4 */ -	FADDP( ST(0), ST(1) )		/* F7 F6 F5 F4 */ +	FADDP( ST0, ST(5) )		/* F2 F3 F7 F6 F5 F4 */ +	FADDP( ST0, ST(3) )		/* F3 F7 F6 F5 F4 */ +	FADDP( ST0, ST(1) )		/* F7 F6 F5 F4 */  	FXCH( ST(3) )			/* F4 F6 F5 F7 */ -	FADD_S( MAT(12) ) +	FADD_S( MAT12 )  	FXCH( ST(2) )			/* F5 F6 F4 F7 */ -	FADD_S( MAT(13) ) +	FADD_S( MAT13 )  	FXCH( ST(1) )			/* F6 F5 F4 F7 */ -	FADD_S( MAT(14) ) +	FADD_S( MAT14 )  	FXCH( ST(3) )			/* F7 F5 F4 F6 */ -	FADD_S( MAT(15) ) +	FADD_S( MAT15 )  	FXCH( ST(2) )			/* F4 F5 F7 F6 */ -	FSTP_S( DST(0) )		/* F5 F7 F6 */ -	FSTP_S( DST(1) )		/* F7 F6 */ +	FSTP_S( DST0 )			/* F5 F7 F6 */ +	FSTP_S( DST1 )			/* F7 F6 */  	FXCH( ST(1) )			/* F6 F7 */ -	FSTP_S( DST(2) )		/* F7 */ -	FSTP_S( DST(3) )		/* */ +	FSTP_S( DST2 )			/* F7 */ +	FSTP_S( DST3 )			/* */ -LLBL( x86_p2_gr_skip ): +LLBL(x86_p2_gr_skip):  	ADD_L( CONST(16), EDI )  	ADD_L( EAX, ESI )  	CMP_L( ECX, EDI ) -	JNE( LLBL( x86_p2_gr_loop ) ) +	JNE( LLBL(x86_p2_gr_loop) ) -LLBL( x86_p2_gr_done ): +LLBL(x86_p2_gr_done):  	POP_L( EDI )  	POP_L( ESI ) @@ -143,7 +170,7 @@ GLNAME( _mesa_x86_transform_points2_perspective ):  	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )  	TEST_L( ECX, ECX ) -	JZ( LLBL( x86_p2_pr_done ) ) +	JZ( LLBL(x86_p2_pr_done) )  	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )  	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) @@ -157,31 +184,31 @@ GLNAME( _mesa_x86_transform_points2_perspective ):  	MOV_L( REGOFF(V4F_START, EDI), EDI )  	ADD_L( EDI, ECX ) -	MOV_L( MAT(14), EBX ) +	MOV_L( MAT14, EBX )  ALIGNTEXT16 -LLBL( x86_p2_pr_loop ): +LLBL(x86_p2_pr_loop): -	FLD_S( SRC(0) )			/* F4 */ -	FMUL_S( MAT(0) ) +	FLD_S( SRC0 )			/* F4 */ +	FMUL_S( MAT0 ) -	FLD_S( SRC(1) )			/* F1 F4 */ -	FMUL_S( MAT(5) ) +	FLD_S( SRC1 )			/* F1 F4 */ +	FMUL_S( MAT5 )  	FXCH( ST(1) )			/* F4 F1 */ -	FSTP_S( DST(0)   )		/* F1 */ -	FSTP_S( DST(1)   )		/* */ -	MOV_L( EBX, DST(2) ) -	MOV_L( CONST(FP_ZERO), DST(3) ) +	FSTP_S( DST0   )		/* F1 */ +	FSTP_S( DST1   )		/* */ +	MOV_L( EBX, DST2 ) +	MOV_L( CONST(FP_ZERO), DST3 ) -LLBL( x86_p2_pr_skip ): +LLBL(x86_p2_pr_skip):  	ADD_L( CONST(16), EDI )  	ADD_L( EAX, ESI )  	CMP_L( ECX, EDI ) -	JNE( LLBL( x86_p2_pr_loop ) ) +	JNE( LLBL(x86_p2_pr_loop) ) -LLBL( x86_p2_pr_done ): +LLBL(x86_p2_pr_done):  	POP_L( EBX )  	POP_L( EDI ) @@ -207,7 +234,7 @@ GLNAME( _mesa_x86_transform_points2_3d ):  	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )  	TEST_L( ECX, ECX ) -	JZ( LLBL( x86_p2_3dr_done ) ) +	JZ( LLBL(x86_p2_3dr_done) )  	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )  	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) @@ -222,48 +249,48 @@ GLNAME( _mesa_x86_transform_points2_3d ):  	ADD_L( EDI, ECX )  ALIGNTEXT16 -LLBL( x86_p2_3dr_loop ): +LLBL(x86_p2_3dr_loop): -	FLD_S( SRC(0) )			/* F4 */ -	FMUL_S( MAT(0) ) -	FLD_S( SRC(0) )			/* F5 F4 */ -	FMUL_S( MAT(1) ) -	FLD_S( SRC(0) )			/* F6 F5 F4 */ -	FMUL_S( MAT(2) ) +	FLD_S( SRC0 )			/* F4 */ +	FMUL_S( MAT0 ) +	FLD_S( SRC0 )			/* F5 F4 */ +	FMUL_S( MAT1 ) +	FLD_S( SRC0 )			/* F6 F5 F4 */ +	FMUL_S( MAT2 ) -	FLD_S( SRC(1) )			/* F0 F6 F5 F4 */ -	FMUL_S( MAT(4) ) -	FLD_S( SRC(1) )			/* F1 F0 F6 F5 F4 */ -	FMUL_S( MAT(5) ) -	FLD_S( SRC(1) )			/* F2 F1 F0 F6 F5 F4 */ -	FMUL_S( MAT(6) ) +	FLD_S( SRC1 )			/* F0 F6 F5 F4 */ +	FMUL_S( MAT4 ) +	FLD_S( SRC1 )			/* F1 F0 F6 F5 F4 */ +	FMUL_S( MAT5 ) +	FLD_S( SRC1 )			/* F2 F1 F0 F6 F5 F4 */ +	FMUL_S( MAT6 )  	FXCH( ST(2) )			/* F0 F1 F2 F6 F5 F4 */ -	FADDP( ST(0), ST(5) )		/* F1 F2 F6 F5 F4 */ -	FADDP( ST(0), ST(3) )		/* F2 F6 F5 F4 */ -	FADDP( ST(0), ST(1) )		/* F6 F5 F4 */ +	FADDP( ST0, ST(5) )		/* F1 F2 F6 F5 F4 */ +	FADDP( ST0, ST(3) )		/* F2 F6 F5 F4 */ +	FADDP( ST0, ST(1) )		/* F6 F5 F4 */  	FXCH( ST(2) )			/* F4 F5 F6 */ -	FADD_S( MAT(12) ) +	FADD_S( MAT12 )  	FXCH( ST(1) )			/* F5 F4 F6 */ -	FADD_S( MAT(13) ) +	FADD_S( MAT13 )  	FXCH( ST(2) )			/* F6 F4 F5 */ -	FADD_S( MAT(14) ) +	FADD_S( MAT14 )  	FXCH( ST(1) )			/* F4 F6 F5 */ -	FSTP_S( DST(0) )		/* F6 F5 */ +	FSTP_S( DST0 )			/* F6 F5 */  	FXCH( ST(1) )			/* F5 F6 */ -	FSTP_S( DST(1) )		/* F6 */ -	FSTP_S( DST(2) )		/* */ +	FSTP_S( DST1 )			/* F6 */ +	FSTP_S( DST2 )			/* */ -LLBL( x86_p2_3dr_skip ): +LLBL(x86_p2_3dr_skip):  	ADD_L( CONST(16), EDI )  	ADD_L( EAX, ESI )  	CMP_L( ECX, EDI ) -	JNE( LLBL( x86_p2_3dr_loop ) ) +	JNE( LLBL(x86_p2_3dr_loop) ) -LLBL( x86_p2_3dr_done ): +LLBL(x86_p2_3dr_done):  	POP_L( EDI )  	POP_L( ESI ) @@ -289,7 +316,7 @@ GLNAME( _mesa_x86_transform_points2_3d_no_rot ):  	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )  	TEST_L( ECX, ECX ) -	JZ( LLBL( x86_p2_3dnrr_done ) ) +	JZ( LLBL(x86_p2_3dnrr_done) )  	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )  	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) @@ -303,35 +330,35 @@ GLNAME( _mesa_x86_transform_points2_3d_no_rot ):  	MOV_L( REGOFF(V4F_START, EDI), EDI )  	ADD_L( EDI, ECX ) -	MOV_L( MAT(14), EBX ) +	MOV_L( MAT14, EBX )  ALIGNTEXT16 -LLBL( x86_p2_3dnrr_loop ): +LLBL(x86_p2_3dnrr_loop): -	FLD_S( SRC(0) )			/* F4 */ -	FMUL_S( MAT(0) ) +	FLD_S( SRC0 )			/* F4 */ +	FMUL_S( MAT0 ) -	FLD_S( SRC(1) )			/* F1 F4 */ -	FMUL_S( MAT(5) ) +	FLD_S( SRC1 )			/* F1 F4 */ +	FMUL_S( MAT5 )  	FXCH( ST(1) )			/* F4 F1 */ -	FADD_S( MAT(12) ) -	FLD_S( MAT(13) )		/* F5 F4 F1 */ +	FADD_S( MAT12 ) +	FLD_S( MAT13 )		/* F5 F4 F1 */  	FXCH( ST(2) )			/* F1 F4 F5 */ -	FADDP( ST(0), ST(2) )		/* F4 F5 */ +	FADDP( ST0, ST(2) )		/* F4 F5 */ -	FSTP_S( DST(0) )		/* F5 */ -	FSTP_S( DST(1) )		/* */ -	MOV_L( EBX, DST(2) ) +	FSTP_S( DST0 )		/* F5 */ +	FSTP_S( DST1 )		/* */ +	MOV_L( EBX, DST2 ) -LLBL( x86_p2_3dnrr_skip ): +LLBL(x86_p2_3dnrr_skip):  	ADD_L( CONST(16), EDI )  	ADD_L( EAX, ESI )  	CMP_L( ECX, EDI ) -	JNE( LLBL( x86_p2_3dnrr_loop ) ) +	JNE( LLBL(x86_p2_3dnrr_loop) ) -LLBL( x86_p2_3dnrr_done ): +LLBL(x86_p2_3dnrr_done):  	POP_L( EBX )  	POP_L( EDI ) @@ -357,7 +384,7 @@ GLNAME( _mesa_x86_transform_points2_2d ):  	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )  	TEST_L( ECX, ECX ) -	JZ( LLBL( x86_p2_2dr_done ) ) +	JZ( LLBL(x86_p2_2dr_done) )  	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )  	OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) @@ -372,39 +399,39 @@ GLNAME( _mesa_x86_transform_points2_2d ):  	ADD_L( EDI, ECX )  ALIGNTEXT16 -LLBL( x86_p2_2dr_loop ): +LLBL(x86_p2_2dr_loop): -	FLD_S( SRC(0) )			/* F4 */ -	FMUL_S( MAT(0) ) -	FLD_S( SRC(0) )			/* F5 F4 */ -	FMUL_S( MAT(1) ) +	FLD_S( SRC0 )			/* F4 */ +	FMUL_S( MAT0 ) +	FLD_S( SRC0 )			/* F5 F4 */ +	FMUL_S( MAT1 ) -	FLD_S( SRC(1) )			/* F0 F5 F4 */ -	FMUL_S( MAT(4) ) -	FLD_S( SRC(1) )			/* F1 F0 F5 F4 */ -	FMUL_S( MAT(5) ) +	FLD_S( SRC1 )			/* F0 F5 F4 */ +	FMUL_S( MAT4 ) +	FLD_S( SRC1 )			/* F1 F0 F5 F4 */ +	FMUL_S( MAT5 )  	FXCH( ST(1) )			/* F0 F1 F5 F4 */ -	FADDP( ST(0), ST(3) )		/* F1 F5 F4 */ -	FADDP( ST(0), ST(1) )		/* F5 F4 */ +	FADDP( ST0, ST(3) )		/* F1 F5 F4 */ +	FADDP( ST0, ST(1) )		/* F5 F4 */  	FXCH( ST(1) )			/* F4 F5 */ -	FADD_S( MAT(12) ) +	FADD_S( MAT12 )  	FXCH( ST(1) )			/* F5 F4 */ -	FADD_S( MAT(13) ) +	FADD_S( MAT13 )  	FXCH( ST(1) )			/* F4 F5 */ -	FSTP_S( DST(0) )		/* F5 */ -	FSTP_S( DST(1) )		/* */ +	FSTP_S( DST0 )		/* F5 */ +	FSTP_S( DST1 )		/* */ -LLBL( x86_p2_2dr_skip ): +LLBL(x86_p2_2dr_skip):  	ADD_L( CONST(16), EDI )  	ADD_L( EAX, ESI )  	CMP_L( ECX, EDI ) -	JNE( LLBL( x86_p2_2dr_loop ) ) +	JNE( LLBL(x86_p2_2dr_loop) ) -LLBL( x86_p2_2dr_done ): +LLBL(x86_p2_2dr_done):  	POP_L( EDI )  	POP_L( ESI ) @@ -429,7 +456,7 @@ GLNAME( _mesa_x86_transform_points2_2d_no_rot ):  	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )  	TEST_L( ECX, ECX ) -	JZ( LLBL( x86_p2_2dnrr_done ) ) +	JZ( LLBL(x86_p2_2dnrr_done) )  	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )  	OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) @@ -444,31 +471,31 @@ GLNAME( _mesa_x86_transform_points2_2d_no_rot ):  	ADD_L( EDI, ECX )  ALIGNTEXT16 -LLBL( x86_p2_2dnrr_loop ): +LLBL(x86_p2_2dnrr_loop): -	FLD_S( SRC(0) )			/* F4 */ -	FMUL_S( MAT(0) ) +	FLD_S( SRC0 )			/* F4 */ +	FMUL_S( MAT0 ) -	FLD_S( SRC(1) )			/* F1 F4 */ -	FMUL_S( MAT(5) ) +	FLD_S( SRC1 )			/* F1 F4 */ +	FMUL_S( MAT5 )  	FXCH( ST(1) )			/* F4 F1 */ -	FADD_S( MAT(12) ) -	FLD_S( MAT(13) )		/* F5 F4 F1 */ +	FADD_S( MAT12 ) +	FLD_S( MAT13 )		/* F5 F4 F1 */  	FXCH( ST(2) )			/* F1 F4 F5 */ -	FADDP( ST(0), ST(2) )		/* F4 F5 */ +	FADDP( ST0, ST(2) )		/* F4 F5 */ -	FSTP_S( DST(0)   )		/* F5 */ -	FSTP_S( DST(1)   )		/* */ +	FSTP_S( DST0   )		/* F5 */ +	FSTP_S( DST1   )		/* */ -LLBL( x86_p2_2dnrr_skip ): +LLBL(x86_p2_2dnrr_skip):  	ADD_L( CONST(16), EDI )  	ADD_L( EAX, ESI )  	CMP_L( ECX, EDI ) -	JNE( LLBL( x86_p2_2dnrr_loop ) ) +	JNE( LLBL(x86_p2_2dnrr_loop) ) -LLBL( x86_p2_2dnrr_done ): +LLBL(x86_p2_2dnrr_done):  	POP_L( EDI )  	POP_L( ESI ) @@ -494,7 +521,7 @@ GLNAME( _mesa_x86_transform_points2_identity ):  	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )  	TEST_L( ECX, ECX ) -	JZ( LLBL( x86_p2_ir_done ) ) +	JZ( LLBL(x86_p2_ir_done) )  	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )  	OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) @@ -509,25 +536,25 @@ GLNAME( _mesa_x86_transform_points2_identity ):  	ADD_L( EDI, ECX )  	CMP_L( ESI, EDI ) -	JE( LLBL( x86_p2_ir_done ) ) +	JE( LLBL(x86_p2_ir_done) )  ALIGNTEXT16 -LLBL( x86_p2_ir_loop ): +LLBL(x86_p2_ir_loop): -	MOV_L( SRC(0), EBX ) -	MOV_L( SRC(1), EDX ) +	MOV_L( SRC0, EBX ) +	MOV_L( SRC1, EDX ) -	MOV_L( EBX, DST(0) ) -	MOV_L( EDX, DST(1) ) +	MOV_L( EBX, DST0 ) +	MOV_L( EDX, DST1 ) -LLBL( x86_p2_ir_skip ): +LLBL(x86_p2_ir_skip):  	ADD_L( CONST(16), EDI )  	ADD_L( EAX, ESI )  	CMP_L( ECX, EDI ) -	JNE( LLBL( x86_p2_ir_loop ) ) +	JNE( LLBL(x86_p2_ir_loop) ) -LLBL( x86_p2_ir_done ): +LLBL(x86_p2_ir_done):  	POP_L( EBX )  	POP_L( EDI ) diff --git a/src/mesa/x86/x86_xform3.S b/src/mesa/x86/x86_xform3.S index 1f798744e8..d71a0fc3ba 100644 --- a/src/mesa/x86/x86_xform3.S +++ b/src/mesa/x86/x86_xform3.S @@ -1,4 +1,4 @@ -/* $Id: x86_xform3.S,v 1.1 2001/03/29 06:46:27 gareth Exp $ */ +/* $Id: x86_xform3.S,v 1.2 2002/03/07 21:40:08 brianp Exp $ */  /*   * Mesa 3-D graphics library @@ -24,6 +24,12 @@   * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.   */ +/* + * NOTE: Avoid using spaces in between '(' ')' and arguments, especially + * with macros like CONST, LLBL that expand to CONCAT(...).  Putting spaces + * in there will break the build on some platforms. + */ +  #include "matypes.h"  #include "xform_args.h" @@ -32,9 +38,30 @@  #define FP_ONE		1065353216  #define FP_ZERO		0 -#define SRC(i)		REGOFF(i * 4, ESI) -#define DST(i)		REGOFF(i * 4, EDI) -#define MAT(i)		REGOFF(i * 4, EDX) +#define SRC0		REGOFF(0, ESI) +#define SRC1		REGOFF(4, ESI) +#define SRC2		REGOFF(8, ESI) +#define SRC3		REGOFF(12, ESI) +#define DST0		REGOFF(0, EDI) +#define DST1		REGOFF(4, EDI) +#define DST2		REGOFF(8, EDI) +#define DST3		REGOFF(12, EDI) +#define MAT0		REGOFF(0, EDX) +#define MAT1		REGOFF(4, EDX) +#define MAT2		REGOFF(8, EDX) +#define MAT3		REGOFF(12, EDX) +#define MAT4		REGOFF(16, EDX) +#define MAT5		REGOFF(20, EDX) +#define MAT6		REGOFF(24, EDX) +#define MAT7		REGOFF(28, EDX) +#define MAT8		REGOFF(32, EDX) +#define MAT9		REGOFF(36, EDX) +#define MAT10		REGOFF(40, EDX) +#define MAT11		REGOFF(44, EDX) +#define MAT12		REGOFF(48, EDX) +#define MAT13		REGOFF(52, EDX) +#define MAT14		REGOFF(56, EDX) +#define MAT15		REGOFF(60, EDX)  ALIGNTEXT16 @@ -52,7 +79,7 @@ GLNAME( _mesa_x86_transform_points3_general ):  	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )  	TEST_L( ECX, ECX ) -	JZ( LLBL( x86_p3_gr_done ) ) +	JZ( LLBL(x86_p3_gr_done) )  	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )  	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) @@ -67,73 +94,73 @@ GLNAME( _mesa_x86_transform_points3_general ):  	ADD_L( EDI, ECX )  ALIGNTEXT16 -LLBL( x86_p3_gr_loop ): +LLBL(x86_p3_gr_loop): -	FLD_S( SRC(0) )			/* F4 */ -	FMUL_S( MAT(0) ) -	FLD_S( SRC(0) )			/* F5 F4 */ -	FMUL_S( MAT(1) ) -	FLD_S( SRC(0) )			/* F6 F5 F4 */ -	FMUL_S( MAT(2) ) -	FLD_S( SRC(0) )			/* F7 F6 F5 F4 */ -	FMUL_S( MAT(3) ) +	FLD_S( SRC0 )			/* F4 */ +	FMUL_S( MAT0 ) +	FLD_S( SRC0 )			/* F5 F4 */ +	FMUL_S( MAT1 ) +	FLD_S( SRC0 )			/* F6 F5 F4 */ +	FMUL_S( MAT2 ) +	FLD_S( SRC0 )			/* F7 F6 F5 F4 */ +	FMUL_S( MAT3 ) -	FLD_S( SRC(1) )			/* F0 F7 F6 F5 F4 */ -	FMUL_S( MAT(4) ) -	FLD_S( SRC(1) )			/* F1 F0 F7 F6 F5 F4 */ -	FMUL_S( MAT(5) ) -	FLD_S( SRC(1) )			/* F2 F1 F0 F7 F6 F5 F4 */ -	FMUL_S( MAT(6) ) -	FLD_S( SRC(1) )			/* F3 F2 F1 F0 F7 F6 F5 F4 */ -	FMUL_S( MAT(7) ) +	FLD_S( SRC1 )			/* F0 F7 F6 F5 F4 */ +	FMUL_S( MAT4 ) +	FLD_S( SRC1 )			/* F1 F0 F7 F6 F5 F4 */ +	FMUL_S( MAT5 ) +	FLD_S( SRC1 )			/* F2 F1 F0 F7 F6 F5 F4 */ +	FMUL_S( MAT6 ) +	FLD_S( SRC1 )			/* F3 F2 F1 F0 F7 F6 F5 F4 */ +	FMUL_S( MAT7 )  	FXCH( ST(3) )			/* F0 F2 F1 F3 F7 F6 F5 F4 */ -	FADDP( ST(0), ST(7) )		/* F2 F1 F3 F7 F6 F5 F4 */ +	FADDP( ST0, ST(7) )		/* F2 F1 F3 F7 F6 F5 F4 */  	FXCH( ST(1) )			/* F1 F2 F3 F7 F6 F5 F4 */ -	FADDP( ST(0), ST(5) )		/* F2 F3 F7 F6 F5 F4 */ -	FADDP( ST(0), ST(3) )		/* F3 F7 F6 F5 F4 */ -	FADDP( ST(0), ST(1) )		/* F7 F6 F5 F4 */ +	FADDP( ST0, ST(5) )		/* F2 F3 F7 F6 F5 F4 */ +	FADDP( ST0, ST(3) )		/* F3 F7 F6 F5 F4 */ +	FADDP( ST0, ST(1) )		/* F7 F6 F5 F4 */ -	FLD_S( SRC(2) )			/* F0 F7 F6 F5 F4 */ -	FMUL_S( MAT(8) ) -	FLD_S( SRC(2) )			/* F1 F0 F7 F6 F5 F4 */ -	FMUL_S( MAT(9) ) -	FLD_S( SRC(2) )			/* F2 F1 F0 F7 F6 F5 F4 */ -	FMUL_S( MAT(10) ) -	FLD_S( SRC(2) )			/* F3 F2 F1 F0 F7 F6 F5 F4 */ -	FMUL_S( MAT(11) ) +	FLD_S( SRC2 )			/* F0 F7 F6 F5 F4 */ +	FMUL_S( MAT8 ) +	FLD_S( SRC2 )			/* F1 F0 F7 F6 F5 F4 */ +	FMUL_S( MAT9 ) +	FLD_S( SRC2 )			/* F2 F1 F0 F7 F6 F5 F4 */ +	FMUL_S( MAT10 ) +	FLD_S( SRC2 )			/* F3 F2 F1 F0 F7 F6 F5 F4 */ +	FMUL_S( MAT11 )  	FXCH( ST(3) )			/* F0 F2 F1 F3 F7 F6 F5 F4 */ -	FADDP( ST(0), ST(7) )		/* F2 F1 F3 F7 F6 F5 F4 */ +	FADDP( ST0, ST(7) )		/* F2 F1 F3 F7 F6 F5 F4 */  	FXCH( ST(1) )			/* F1 F2 F3 F7 F6 F5 F4 */ -	FADDP( ST(0), ST(5) )		/* F2 F3 F7 F6 F5 F4 */ -	FADDP( ST(0), ST(3) )		/* F3 F7 F6 F5 F4 */ -	FADDP( ST(0), ST(1) )		/* F7 F6 F5 F4 */ +	FADDP( ST0, ST(5) )		/* F2 F3 F7 F6 F5 F4 */ +	FADDP( ST0, ST(3) )		/* F3 F7 F6 F5 F4 */ +	FADDP( ST0, ST(1) )		/* F7 F6 F5 F4 */  	FXCH( ST(3) )			/* F4 F6 F5 F7 */ -	FADD_S( MAT(12) ) +	FADD_S( MAT12 )  	FXCH( ST(2) )			/* F5 F6 F4 F7 */ -	FADD_S( MAT(13) ) +	FADD_S( MAT13 )  	FXCH( ST(1) )			/* F6 F5 F4 F7 */ -	FADD_S( MAT(14) ) +	FADD_S( MAT14 )  	FXCH( ST(3) )			/* F7 F5 F4 F6 */ -	FADD_S( MAT(15) ) +	FADD_S( MAT15 )  	FXCH( ST(2) )			/* F4 F5 F7 F6 */ -	FSTP_S( DST(0) )		/* F5 F7 F6 */ -	FSTP_S( DST(1) )		/* F7 F6 */ +	FSTP_S( DST0 )		/* F5 F7 F6 */ +	FSTP_S( DST1 )		/* F7 F6 */  	FXCH( ST(1) )			/* F6 F7 */ -	FSTP_S( DST(2) )		/* F7 */ -	FSTP_S( DST(3) )		/* */ +	FSTP_S( DST2 )		/* F7 */ +	FSTP_S( DST3 )		/* */ -LLBL( x86_p3_gr_skip ): +LLBL(x86_p3_gr_skip):  	ADD_L( CONST(16), EDI )  	ADD_L( EAX, ESI )  	CMP_L( ECX, EDI ) -	JNE( LLBL( x86_p3_gr_loop ) ) +	JNE( LLBL(x86_p3_gr_loop) ) -LLBL( x86_p3_gr_done ): +LLBL(x86_p3_gr_done):  	POP_L( EDI )  	POP_L( ESI ) @@ -159,7 +186,7 @@ GLNAME( _mesa_x86_transform_points3_perspective ):  	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )  	TEST_L( ECX, ECX ) -	JZ( LLBL( x86_p3_pr_done ) ) +	JZ( LLBL(x86_p3_pr_done) )  	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )  	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) @@ -174,45 +201,45 @@ GLNAME( _mesa_x86_transform_points3_perspective ):  	ADD_L( EDI, ECX )  ALIGNTEXT16 -LLBL( x86_p3_pr_loop ): +LLBL(x86_p3_pr_loop): -	FLD_S( SRC(0) )			/* F4 */ -	FMUL_S( MAT(0) ) +	FLD_S( SRC0 )			/* F4 */ +	FMUL_S( MAT0 ) -	FLD_S( SRC(1) )			/* F5 F4 */ -	FMUL_S( MAT(5) ) +	FLD_S( SRC1 )			/* F5 F4 */ +	FMUL_S( MAT5 ) -	FLD_S( SRC(2) )			/* F0 F5 F4 */ -	FMUL_S( MAT(8) ) -	FLD_S( SRC(2) )			/* F1 F0 F5 F4 */ -	FMUL_S( MAT(9) ) -	FLD_S( SRC(2) )			/* F2 F1 F0 F5 F4 */ -	FMUL_S( MAT(10) ) +	FLD_S( SRC2 )			/* F0 F5 F4 */ +	FMUL_S( MAT8 ) +	FLD_S( SRC2 )			/* F1 F0 F5 F4 */ +	FMUL_S( MAT9 ) +	FLD_S( SRC2 )			/* F2 F1 F0 F5 F4 */ +	FMUL_S( MAT10 )  	FXCH( ST(2) )			/* F0 F1 F2 F5 F4 */ -	FADDP( ST(0), ST(4) )		/* F1 F2 F5 F4 */ -	FADDP( ST(0), ST(2) )		/* F2 F5 F4 */ -	FLD_S( MAT(14) )		/* F6 F2 F5 F4 */ +	FADDP( ST0, ST(4) )		/* F1 F2 F5 F4 */ +	FADDP( ST0, ST(2) )		/* F2 F5 F4 */ +	FLD_S( MAT14 )		/* F6 F2 F5 F4 */  	FXCH( ST(1) )			/* F2 F6 F5 F4 */ -	FADDP( ST(0), ST(1) )		/* F6 F5 F4 */ +	FADDP( ST0, ST(1) )		/* F6 F5 F4 */ -	MOV_L( SRC(2), EBX ) +	MOV_L( SRC2, EBX )  	XOR_L( CONST(-2147483648), EBX )/* change sign */  	FXCH( ST(2) )			/* F4 F5 F6 */ -	FSTP_S( DST(0) )		/* F5 F6 */ -	FSTP_S( DST(1) )		/* F6 */ -	FSTP_S( DST(2) )		/* */ -	MOV_L( EBX, DST(3) ) +	FSTP_S( DST0 )		/* F5 F6 */ +	FSTP_S( DST1 )		/* F6 */ +	FSTP_S( DST2 )		/* */ +	MOV_L( EBX, DST3 ) -LLBL( x86_p3_pr_skip ): +LLBL(x86_p3_pr_skip):  	ADD_L( CONST(16), EDI )  	ADD_L( EAX, ESI )  	CMP_L( ECX, EDI ) -	JNE( LLBL( x86_p3_pr_loop ) ) +	JNE( LLBL(x86_p3_pr_loop) ) -LLBL( x86_p3_pr_done ): +LLBL(x86_p3_pr_done):  	POP_L( EBX )  	POP_L( EDI ) @@ -238,7 +265,7 @@ GLNAME( _mesa_x86_transform_points3_3d ):  	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )  	TEST_L( ECX, ECX ) -	JZ( LLBL( x86_p3_3dr_done ) ) +	JZ( LLBL(x86_p3_3dr_done) )  	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )  	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) @@ -253,60 +280,60 @@ GLNAME( _mesa_x86_transform_points3_3d ):  	ADD_L( EDI, ECX )  ALIGNTEXT16 -LLBL( x86_p3_3dr_loop ): +LLBL(x86_p3_3dr_loop): -	FLD_S( SRC(0) )			/* F4 */ -	FMUL_S( MAT(0) ) -	FLD_S( SRC(0) )			/* F5 F4 */ -	FMUL_S( MAT(1) ) -	FLD_S( SRC(0) )			/* F6 F5 F4 */ -	FMUL_S( MAT(2) ) +	FLD_S( SRC0 )			/* F4 */ +	FMUL_S( MAT0 ) +	FLD_S( SRC0 )			/* F5 F4 */ +	FMUL_S( MAT1 ) +	FLD_S( SRC0 )			/* F6 F5 F4 */ +	FMUL_S( MAT2 ) -	FLD_S( SRC(1) )			/* F0 F6 F5 F4 */ -	FMUL_S( MAT(4) ) -	FLD_S( SRC(1) )			/* F1 F0 F6 F5 F4 */ -	FMUL_S( MAT(5) ) -	FLD_S( SRC(1) )			/* F2 F1 F0 F6 F5 F4 */ -	FMUL_S( MAT(6) ) +	FLD_S( SRC1 )			/* F0 F6 F5 F4 */ +	FMUL_S( MAT4 ) +	FLD_S( SRC1 )			/* F1 F0 F6 F5 F4 */ +	FMUL_S( MAT5 ) +	FLD_S( SRC1 )			/* F2 F1 F0 F6 F5 F4 */ +	FMUL_S( MAT6 )  	FXCH( ST(2) )			/* F0 F1 F2 F6 F5 F4 */ -	FADDP( ST(0), ST(5) )		/* F1 F2 F6 F5 F4 */ -	FADDP( ST(0), ST(3) )		/* F2 F6 F5 F4 */ -	FADDP( ST(0), ST(1) )		/* F6 F5 F4 */ +	FADDP( ST0, ST(5) )		/* F1 F2 F6 F5 F4 */ +	FADDP( ST0, ST(3) )		/* F2 F6 F5 F4 */ +	FADDP( ST0, ST(1) )		/* F6 F5 F4 */ -	FLD_S( SRC(2) )			/* F0 F6 F5 F4 */ -	FMUL_S( MAT(8) ) -	FLD_S( SRC(2) )			/* F1 F0 F6 F5 F4 */ -	FMUL_S( MAT(9) ) -	FLD_S( SRC(2) )			/* F2 F1 F0 F6 F5 F4 */ -	FMUL_S( MAT(10) ) +	FLD_S( SRC2 )			/* F0 F6 F5 F4 */ +	FMUL_S( MAT8 ) +	FLD_S( SRC2 )			/* F1 F0 F6 F5 F4 */ +	FMUL_S( MAT9 ) +	FLD_S( SRC2 )			/* F2 F1 F0 F6 F5 F4 */ +	FMUL_S( MAT10 )  	FXCH( ST(2) )			/* F0 F1 F2 F6 F5 F4 */ -	FADDP( ST(0), ST(5) )		/* F1 F2 F6 F5 F4 */ -	FADDP( ST(0), ST(3) )		/* F2 F6 F5 F4 */ -	FADDP( ST(0), ST(1) )		/* F6 F5 F4 */ +	FADDP( ST0, ST(5) )		/* F1 F2 F6 F5 F4 */ +	FADDP( ST0, ST(3) )		/* F2 F6 F5 F4 */ +	FADDP( ST0, ST(1) )		/* F6 F5 F4 */  	FXCH( ST(2) )			/* F4 F5 F6 */ -	FADD_S( MAT(12) ) +	FADD_S( MAT12 )  	FXCH( ST(1) )			/* F5 F4 F6 */ -	FADD_S( MAT(13) ) +	FADD_S( MAT13 )  	FXCH( ST(2) )			/* F6 F4 F5 */ -	FADD_S( MAT(14) ) +	FADD_S( MAT14 )  	FXCH( ST(1) )			/* F4 F6 F5 */ -	FSTP_S( DST(0)   )		/* F6 F5 */ +	FSTP_S( DST0   )		/* F6 F5 */  	FXCH( ST(1) )			/* F5 F6 */ -	FSTP_S( DST(1)   )		/* F6 */ -	FSTP_S( DST(2)   )		/* */ +	FSTP_S( DST1   )		/* F6 */ +	FSTP_S( DST2   )		/* */ -LLBL( x86_p3_3dr_skip ): +LLBL(x86_p3_3dr_skip):  	ADD_L( CONST(16), EDI )  	ADD_L( EAX, ESI )  	CMP_L( ECX, EDI ) -	JNE( LLBL( x86_p3_3dr_loop ) ) +	JNE( LLBL(x86_p3_3dr_loop) ) -LLBL( x86_p3_3dr_done ): +LLBL(x86_p3_3dr_done):  	POP_L( EDI )  	POP_L( ESI ) @@ -332,7 +359,7 @@ GLNAME( _mesa_x86_transform_points3_3d_no_rot ):  	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )  	TEST_L( ECX, ECX ) -	JZ( LLBL( x86_p3_3dnrr_done ) ) +	JZ( LLBL(x86_p3_3dnrr_done) )  	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )  	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) @@ -347,38 +374,38 @@ GLNAME( _mesa_x86_transform_points3_3d_no_rot ):  	ADD_L( EDI, ECX )  ALIGNTEXT16 -LLBL( x86_p3_3dnrr_loop ): +LLBL(x86_p3_3dnrr_loop): -	FLD_S( SRC(0) )			/* F4 */ -	FMUL_S( MAT(0) ) +	FLD_S( SRC0 )			/* F4 */ +	FMUL_S( MAT0 ) -	FLD_S( SRC(1) )			/* F1 F4 */ -	FMUL_S( MAT(5) ) +	FLD_S( SRC1 )			/* F1 F4 */ +	FMUL_S( MAT5 ) -	FLD_S( SRC(2) )			/* F2 F1 F4 */ -	FMUL_S( MAT(10) ) +	FLD_S( SRC2 )			/* F2 F1 F4 */ +	FMUL_S( MAT10 )  	FXCH( ST(2) )			/* F4 F1 F2 */ -	FADD_S( MAT(12) ) -	FLD_S( MAT(13) )		/* F5 F4 F1 F2 */ +	FADD_S( MAT12 ) +	FLD_S( MAT13 )		/* F5 F4 F1 F2 */  	FXCH( ST(2) )			/* F1 F4 F5 F2 */ -	FADDP( ST(0), ST(2) )		/* F4 F5 F2 */ -	FLD_S( MAT(14) )		/* F6 F4 F5 F2 */ +	FADDP( ST0, ST(2) )		/* F4 F5 F2 */ +	FLD_S( MAT14 )		/* F6 F4 F5 F2 */  	FXCH( ST(3) )			/* F2 F4 F5 F6 */ -	FADDP( ST(0), ST(3) )		/* F4 F5 F6 */ +	FADDP( ST0, ST(3) )		/* F4 F5 F6 */ -	FSTP_S( DST(0)   )		/* F5 F6 */ -	FSTP_S( DST(1)   )		/* F6 */ -	FSTP_S( DST(2)   )		/* */ +	FSTP_S( DST0   )		/* F5 F6 */ +	FSTP_S( DST1   )		/* F6 */ +	FSTP_S( DST2   )		/* */ -LLBL( x86_p3_3dnrr_skip ): +LLBL(x86_p3_3dnrr_skip):  	ADD_L( CONST(16), EDI )  	ADD_L( EAX, ESI )  	CMP_L( ECX, EDI ) -	JNE( LLBL( x86_p3_3dnrr_loop ) ) +	JNE( LLBL(x86_p3_3dnrr_loop) ) -LLBL( x86_p3_3dnrr_done ): +LLBL(x86_p3_3dnrr_done):  	POP_L( EDI )  	POP_L( ESI ) @@ -404,7 +431,7 @@ GLNAME( _mesa_x86_transform_points3_2d ):  	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )  	TEST_L( ECX, ECX ) -	JZ( LLBL( x86_p3_2dr_done ) ) +	JZ( LLBL(x86_p3_2dr_done) )  	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )  	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) @@ -419,42 +446,42 @@ GLNAME( _mesa_x86_transform_points3_2d ):  	ADD_L( EDI, ECX )  ALIGNTEXT16 -LLBL( x86_p3_2dr_loop ): +LLBL(x86_p3_2dr_loop): -	FLD_S( SRC(0) )			/* F4 */ -	FMUL_S( MAT(0) ) -	FLD_S( SRC(0) )			/* F5 F4 */ -	FMUL_S( MAT(1) ) +	FLD_S( SRC0 )			/* F4 */ +	FMUL_S( MAT0 ) +	FLD_S( SRC0 )			/* F5 F4 */ +	FMUL_S( MAT1 ) -	FLD_S( SRC(1) )			/* F0 F5 F4 */ -	FMUL_S( MAT(4) ) -	FLD_S( SRC(1) )			/* F1 F0 F5 F4 */ -	FMUL_S( MAT(5) ) +	FLD_S( SRC1 )			/* F0 F5 F4 */ +	FMUL_S( MAT4 ) +	FLD_S( SRC1 )			/* F1 F0 F5 F4 */ +	FMUL_S( MAT5 )  	FXCH( ST(1) )			/* F0 F1 F5 F4 */ -	FADDP( ST(0), ST(3) )		/* F1 F5 F4 */ -	FADDP( ST(0), ST(1) )		/* F5 F4 */ +	FADDP( ST0, ST(3) )		/* F1 F5 F4 */ +	FADDP( ST0, ST(1) )		/* F5 F4 */  	FXCH( ST(1) )			/* F4 F5 */ -	FADD_S( MAT(12) ) +	FADD_S( MAT12 )  	FXCH( ST(1) )			/* F5 F4 */ -	FADD_S( MAT(13) ) +	FADD_S( MAT13 ) -	MOV_L( SRC(2), EBX ) +	MOV_L( SRC2, EBX )  	FXCH( ST(1) )			/* F4 F5 */ -	FSTP_S( DST(0)   )		/* F5 */ -	FSTP_S( DST(1)   )		/* */ -	MOV_L( EBX, DST(2) ) +	FSTP_S( DST0   )		/* F5 */ +	FSTP_S( DST1   )		/* */ +	MOV_L( EBX, DST2 ) -LLBL( x86_p3_2dr_skip ): +LLBL(x86_p3_2dr_skip):  	ADD_L( CONST(16), EDI )  	ADD_L( EAX, ESI )  	CMP_L( ECX, EDI ) -	JNE( LLBL( x86_p3_2dr_loop ) ) +	JNE( LLBL(x86_p3_2dr_loop) ) -LLBL( x86_p3_2dr_done ): +LLBL(x86_p3_2dr_done):  	POP_L( EBX )  	POP_L( EDI ) @@ -481,7 +508,7 @@ GLNAME( _mesa_x86_transform_points3_2d_no_rot ):  	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )  	TEST_L( ECX, ECX ) -	JZ( LLBL( x86_p3_2dnrr_done ) ) +	JZ( LLBL(x86_p3_2dnrr_done) )  	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )  	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) @@ -496,35 +523,35 @@ GLNAME( _mesa_x86_transform_points3_2d_no_rot ):  	ADD_L( EDI, ECX )  ALIGNTEXT16 -LLBL( x86_p3_2dnrr_loop ): +LLBL(x86_p3_2dnrr_loop): -	FLD_S( SRC(0) )			/* F4 */ -	FMUL_S( MAT(0) ) +	FLD_S( SRC0 )			/* F4 */ +	FMUL_S( MAT0 ) -	FLD_S( SRC(1) )			/* F1 F4 */ -	FMUL_S( MAT(5) ) +	FLD_S( SRC1 )			/* F1 F4 */ +	FMUL_S( MAT5 )  	FXCH( ST(1) )			/* F4 F1 */ -	FADD_S( MAT(12) ) -	FLD_S( MAT(13) )		/* F5 F4 F1 */ +	FADD_S( MAT12 ) +	FLD_S( MAT13 )		/* F5 F4 F1 */  	FXCH( ST(2) )			/* F1 F4 F5 */ -	FADDP( ST(0), ST(2) )		/* F4 F5 */ +	FADDP( ST0, ST(2) )		/* F4 F5 */ -	MOV_L( SRC(2), EBX ) +	MOV_L( SRC2, EBX ) -	FSTP_S( DST(0) )		/* F5 */ -	FSTP_S( DST(1) )		/* */ -	MOV_L( EBX, DST(2) ) +	FSTP_S( DST0 )		/* F5 */ +	FSTP_S( DST1 )		/* */ +	MOV_L( EBX, DST2 ) -LLBL( x86_p3_2dnrr_skip ): +LLBL(x86_p3_2dnrr_skip):  	ADD_L( CONST(16), EDI )  	ADD_L( EAX, ESI )  	CMP_L( ECX, EDI ) -	JNE( LLBL( x86_p3_2dnrr_loop ) ) +	JNE( LLBL(x86_p3_2dnrr_loop) ) -LLBL( x86_p3_2dnrr_done ): +LLBL(x86_p3_2dnrr_done):  	POP_L( EBX )  	POP_L( EDI ) @@ -552,7 +579,7 @@ GLNAME(_mesa_x86_transform_points3_identity ):  	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )  	TEST_L( ECX, ECX ) -	JZ( LLBL( x86_p3_ir_done ) ) +	JZ( LLBL(x86_p3_ir_done) )  	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )  	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) @@ -567,37 +594,37 @@ GLNAME(_mesa_x86_transform_points3_identity ):  	ADD_L( EDI, ECX )  	CMP_L( ESI, EDI ) -	JE( LLBL( x86_p3_ir_done ) ) +	JE( LLBL(x86_p3_ir_done) )  ALIGNTEXT16 -LLBL( x86_p3_ir_loop ): +LLBL(x86_p3_ir_loop):  #if 1 -	MOV_L( SRC(0), EBX ) -	MOV_L( SRC(1), EBP ) -	MOV_L( SRC(2), EDX ) +	MOV_L( SRC0, EBX ) +	MOV_L( SRC1, EBP ) +	MOV_L( SRC2, EDX ) -	MOV_L( EBX, DST(0) ) -	MOV_L( EBP, DST(1) ) -	MOV_L( EDX, DST(2) ) +	MOV_L( EBX, DST0 ) +	MOV_L( EBP, DST1 ) +	MOV_L( EDX, DST2 )  #else -	FLD_S( SRC(0) ) -	FLD_S( SRC(1) ) -	FLD_S( SRC(2) ) +	FLD_S( SRC0 ) +	FLD_S( SRC1 ) +	FLD_S( SRC2 ) -	FSTP_S( DST(2) ) -	FSTP_S( DST(1) ) -	FSTP_S( DST(0) ) +	FSTP_S( DST2 ) +	FSTP_S( DST1 ) +	FSTP_S( DST0 )  #endif -LLBL( x86_p3_ir_skip ): +LLBL(x86_p3_ir_skip):  	ADD_L( CONST(16), EDI )  	ADD_L( EAX, ESI )  	CMP_L( ECX, EDI ) -	JNE( LLBL( x86_p3_ir_loop ) ) +	JNE( LLBL(x86_p3_ir_loop) ) -LLBL( x86_p3_ir_done ): +LLBL(x86_p3_ir_done):  	POP_L( EBP )  	POP_L( EBX ) diff --git a/src/mesa/x86/x86_xform4.S b/src/mesa/x86/x86_xform4.S index 1b6a8d8aec..a5c1c1c4b0 100644 --- a/src/mesa/x86/x86_xform4.S +++ b/src/mesa/x86/x86_xform4.S @@ -1,4 +1,4 @@ -/* $Id: x86_xform4.S,v 1.1 2001/03/29 06:46:27 gareth Exp $ */ +/* $Id: x86_xform4.S,v 1.2 2002/03/07 21:40:08 brianp Exp $ */  /*   * Mesa 3-D graphics library @@ -24,6 +24,12 @@   * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.   */ +/* + * NOTE: Avoid using spaces in between '(' ')' and arguments, especially + * with macros like CONST, LLBL that expand to CONCAT(...).  Putting spaces + * in there will break the build on some platforms. + */ +  #include "matypes.h"  #include "xform_args.h" @@ -32,9 +38,30 @@  #define FP_ONE		1065353216  #define FP_ZERO		0 -#define SRC(i)		REGOFF(i * 4, ESI) -#define DST(i)		REGOFF(i * 4, EDI) -#define MAT(i)		REGOFF(i * 4, EDX) +#define SRC0		REGOFF(0, ESI) +#define SRC1		REGOFF(4, ESI) +#define SRC2		REGOFF(8, ESI) +#define SRC3		REGOFF(12, ESI) +#define DST0		REGOFF(0, EDI) +#define DST1		REGOFF(4, EDI) +#define DST2		REGOFF(8, EDI) +#define DST3		REGOFF(12, EDI) +#define MAT0		REGOFF(0, EDX) +#define MAT1		REGOFF(4, EDX) +#define MAT2		REGOFF(8, EDX) +#define MAT3		REGOFF(12, EDX) +#define MAT4		REGOFF(16, EDX) +#define MAT5		REGOFF(20, EDX) +#define MAT6		REGOFF(24, EDX) +#define MAT7		REGOFF(28, EDX) +#define MAT8		REGOFF(32, EDX) +#define MAT9		REGOFF(36, EDX) +#define MAT10		REGOFF(40, EDX) +#define MAT11		REGOFF(44, EDX) +#define MAT12		REGOFF(48, EDX) +#define MAT13		REGOFF(52, EDX) +#define MAT14		REGOFF(56, EDX) +#define MAT15		REGOFF(60, EDX)  ALIGNTEXT16 @@ -52,7 +79,7 @@ GLNAME( _mesa_x86_transform_points4_general ):  	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )  	TEST_L( ECX, ECX ) -	JZ( LLBL( x86_p4_gr_done ) ) +	JZ( LLBL(x86_p4_gr_done) )  	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )  	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) @@ -67,80 +94,80 @@ GLNAME( _mesa_x86_transform_points4_general ):  	ADD_L( EDI, ECX )  ALIGNTEXT16 -LLBL( x86_p4_gr_loop ): +LLBL(x86_p4_gr_loop): -	FLD_S( SRC(0) )			/* F4 */ -	FMUL_S( MAT(0) ) -	FLD_S( SRC(0) )			/* F5 F4 */ -	FMUL_S( MAT(1) ) -	FLD_S( SRC(0) )			/* F6 F5 F4 */ -	FMUL_S( MAT(2) ) -	FLD_S( SRC(0) )			/* F7 F6 F5 F4 */ -	FMUL_S( MAT(3) ) +	FLD_S( SRC0 )			/* F4 */ +	FMUL_S( MAT0 ) +	FLD_S( SRC0 )			/* F5 F4 */ +	FMUL_S( MAT1 ) +	FLD_S( SRC0 )			/* F6 F5 F4 */ +	FMUL_S( MAT2 ) +	FLD_S( SRC0 )			/* F7 F6 F5 F4 */ +	FMUL_S( MAT3 ) -	FLD_S( SRC(1) )			/* F0 F7 F6 F5 F4 */ -	FMUL_S( MAT(4) ) -	FLD_S( SRC(1) )			/* F1 F0 F7 F6 F5 F4 */ -	FMUL_S( MAT(5) ) -	FLD_S( SRC(1) )			/* F2 F1 F0 F7 F6 F5 F4 */ -	FMUL_S( MAT(6) ) -	FLD_S( SRC(1) )			/* F3 F2 F1 F0 F7 F6 F5 F4 */ -	FMUL_S( MAT(7) ) +	FLD_S( SRC1 )			/* F0 F7 F6 F5 F4 */ +	FMUL_S( MAT4 ) +	FLD_S( SRC1 )			/* F1 F0 F7 F6 F5 F4 */ +	FMUL_S( MAT5 ) +	FLD_S( SRC1 )			/* F2 F1 F0 F7 F6 F5 F4 */ +	FMUL_S( MAT6 ) +	FLD_S( SRC1 )			/* F3 F2 F1 F0 F7 F6 F5 F4 */ +	FMUL_S( MAT7 )  	FXCH( ST(3) )			/* F0 F2 F1 F3 F7 F6 F5 F4 */ -	FADDP( ST(0), ST(7) )		/* F2 F1 F3 F7 F6 F5 F4 */ +	FADDP( ST0, ST(7) )		/* F2 F1 F3 F7 F6 F5 F4 */  	FXCH( ST(1) )			/* F1 F2 F3 F7 F6 F5 F4 */ -	FADDP( ST(0), ST(5) )		/* F2 F3 F7 F6 F5 F4 */ -	FADDP( ST(0), ST(3) )		/* F3 F7 F6 F5 F4 */ -	FADDP( ST(0), ST(1) )		/* F7 F6 F5 F4 */ +	FADDP( ST0, ST(5) )		/* F2 F3 F7 F6 F5 F4 */ +	FADDP( ST0, ST(3) )		/* F3 F7 F6 F5 F4 */ +	FADDP( ST0, ST(1) )		/* F7 F6 F5 F4 */ -	FLD_S( SRC(2) )			/* F0 F7 F6 F5 F4 */ -	FMUL_S( MAT(8) ) -	FLD_S( SRC(2) )			/* F1 F0 F7 F6 F5 F4 */ -	FMUL_S( MAT(9) ) -	FLD_S( SRC(2) )			/* F2 F1 F0 F7 F6 F5 F4 */ -	FMUL_S( MAT(10) ) -	FLD_S( SRC(2) )			/* F3 F2 F1 F0 F7 F6 F5 F4 */ -	FMUL_S( MAT(11) ) +	FLD_S( SRC2 )			/* F0 F7 F6 F5 F4 */ +	FMUL_S( MAT8 ) +	FLD_S( SRC2 )			/* F1 F0 F7 F6 F5 F4 */ +	FMUL_S( MAT9 ) +	FLD_S( SRC2 )			/* F2 F1 F0 F7 F6 F5 F4 */ +	FMUL_S( MAT10 ) +	FLD_S( SRC2 )			/* F3 F2 F1 F0 F7 F6 F5 F4 */ +	FMUL_S( MAT11 )  	FXCH( ST(3) )			/* F0 F2 F1 F3 F7 F6 F5 F4 */ -	FADDP( ST(0), ST(7) )		/* F2 F1 F3 F7 F6 F5 F4 */ +	FADDP( ST0, ST(7) )		/* F2 F1 F3 F7 F6 F5 F4 */  	FXCH( ST(1) )			/* F1 F2 F3 F7 F6 F5 F4 */ -	FADDP( ST(0), ST(5) )		/* F2 F3 F7 F6 F5 F4 */ -	FADDP( ST(0), ST(3) )		/* F3 F7 F6 F5 F4 */ -	FADDP( ST(0), ST(1) )		/* F7 F6 F5 F4 */ +	FADDP( ST0, ST(5) )		/* F2 F3 F7 F6 F5 F4 */ +	FADDP( ST0, ST(3) )		/* F3 F7 F6 F5 F4 */ +	FADDP( ST0, ST(1) )		/* F7 F6 F5 F4 */ -	FLD_S( SRC(3) )			/* F0 F7 F6 F5 F4 */ -	FMUL_S( MAT(12) ) -	FLD_S( SRC(3) )			/* F1 F0 F7 F6 F5 F4 */ -	FMUL_S( MAT(13) ) -	FLD_S( SRC(3) )			/* F2 F1 F0 F7 F6 F5 F4 */ -	FMUL_S( MAT(14) ) -	FLD_S( SRC(3) )			/* F3 F2 F1 F0 F7 F6 F5 F4 */ -	FMUL_S( MAT(15) ) +	FLD_S( SRC3 )			/* F0 F7 F6 F5 F4 */ +	FMUL_S( MAT12 ) +	FLD_S( SRC3 )			/* F1 F0 F7 F6 F5 F4 */ +	FMUL_S( MAT13 ) +	FLD_S( SRC3 )			/* F2 F1 F0 F7 F6 F5 F4 */ +	FMUL_S( MAT14 ) +	FLD_S( SRC3 )			/* F3 F2 F1 F0 F7 F6 F5 F4 */ +	FMUL_S( MAT15 )  	FXCH( ST(3) )			/* F0 F2 F1 F3 F7 F6 F5 F4 */ -	FADDP( ST(0), ST(7) )		/* F2 F1 F3 F7 F6 F5 F4 */ +	FADDP( ST0, ST(7) )		/* F2 F1 F3 F7 F6 F5 F4 */  	FXCH( ST(1) )			/* F1 F2 F3 F7 F6 F5 F4 */ -	FADDP( ST(0), ST(5) )		/* F2 F3 F7 F6 F5 F4 */ -	FADDP( ST(0), ST(3) )		/* F3 F7 F6 F5 F4 */ -	FADDP( ST(0), ST(1) )		/* F7 F6 F5 F4 */ +	FADDP( ST0, ST(5) )		/* F2 F3 F7 F6 F5 F4 */ +	FADDP( ST0, ST(3) )		/* F3 F7 F6 F5 F4 */ +	FADDP( ST0, ST(1) )		/* F7 F6 F5 F4 */  	FXCH( ST(3) )			/* F4 F6 F5 F7 */ -	FSTP_S( DST(0) )		/* F6 F5 F7 */ +	FSTP_S( DST0 )		/* F6 F5 F7 */  	FXCH( ST(1) )			/* F5 F6 F7 */ -	FSTP_S( DST(1) )		/* F6 F7 */ -	FSTP_S( DST(2) )		/* F7 */ -	FSTP_S( DST(3) )		/* */ +	FSTP_S( DST1 )		/* F6 F7 */ +	FSTP_S( DST2 )		/* F7 */ +	FSTP_S( DST3 )		/* */ -LLBL( x86_p4_gr_skip ): +LLBL(x86_p4_gr_skip):  	ADD_L( CONST(16), EDI )  	ADD_L( EAX, ESI )  	CMP_L( ECX, EDI ) -	JNE( LLBL( x86_p4_gr_loop ) ) +	JNE( LLBL(x86_p4_gr_loop) ) -LLBL( x86_p4_gr_done ): +LLBL(x86_p4_gr_done):  	POP_L( EDI )  	POP_L( ESI ) @@ -166,7 +193,7 @@ GLNAME( _mesa_x86_transform_points4_perspective ):  	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )  	TEST_L( ECX, ECX ) -	JZ( LLBL( x86_p4_pr_done ) ) +	JZ( LLBL(x86_p4_pr_done) )  	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )  	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) @@ -181,47 +208,47 @@ GLNAME( _mesa_x86_transform_points4_perspective ):  	ADD_L( EDI, ECX )  ALIGNTEXT16 -LLBL( x86_p4_pr_loop ): +LLBL(x86_p4_pr_loop): -	FLD_S( SRC(0) )			/* F4 */ -	FMUL_S( MAT(0) ) +	FLD_S( SRC0 )			/* F4 */ +	FMUL_S( MAT0 ) -	FLD_S( SRC(1) )			/* F5 F4 */ -	FMUL_S( MAT(5) ) +	FLD_S( SRC1 )			/* F5 F4 */ +	FMUL_S( MAT5 ) -	FLD_S( SRC(2) )			/* F0 F5 F4 */ -	FMUL_S( MAT(8) ) -	FLD_S( SRC(2) )			/* F1 F0 F5 F4 */ -	FMUL_S( MAT(9) ) -	FLD_S( SRC(2) )			/* F6 F1 F0 F5 F4 */ -	FMUL_S( MAT(10) ) +	FLD_S( SRC2 )			/* F0 F5 F4 */ +	FMUL_S( MAT8 ) +	FLD_S( SRC2 )			/* F1 F0 F5 F4 */ +	FMUL_S( MAT9 ) +	FLD_S( SRC2 )			/* F6 F1 F0 F5 F4 */ +	FMUL_S( MAT10 )  	FXCH( ST(2) )			/* F0 F1 F6 F5 F4 */ -	FADDP( ST(0), ST(4) )		/* F1 F6 F5 F4 */ -	FADDP( ST(0), ST(2) )		/* F6 F5 F4 */ +	FADDP( ST0, ST(4) )		/* F1 F6 F5 F4 */ +	FADDP( ST0, ST(2) )		/* F6 F5 F4 */ -	FLD_S( SRC(3) )			/* F2 F6 F5 F4 */ -	FMUL_S( MAT(14) ) +	FLD_S( SRC3 )			/* F2 F6 F5 F4 */ +	FMUL_S( MAT14 ) -	FADDP( ST(0), ST(1) )		/* F6 F5 F4 */ +	FADDP( ST0, ST(1) )		/* F6 F5 F4 */ -	MOV_L( SRC(2), EBX ) +	MOV_L( SRC2, EBX )  	XOR_L( CONST(-2147483648), EBX )/* change sign */  	FXCH( ST(2) )			/* F4 F5 F6 */ -	FSTP_S( DST(0) )		/* F5 F6 */ -	FSTP_S( DST(1) )		/* F6 */ -	FSTP_S( DST(2) )		/* */ -	MOV_L( EBX, DST(3) ) +	FSTP_S( DST0 )		/* F5 F6 */ +	FSTP_S( DST1 )		/* F6 */ +	FSTP_S( DST2 )		/* */ +	MOV_L( EBX, DST3 ) -LLBL( x86_p4_pr_skip ): +LLBL(x86_p4_pr_skip):  	ADD_L( CONST(16), EDI )  	ADD_L( EAX, ESI )  	CMP_L( ECX, EDI ) -	JNE( LLBL( x86_p4_pr_loop ) ) +	JNE( LLBL(x86_p4_pr_loop) ) -LLBL( x86_p4_pr_done ): +LLBL(x86_p4_pr_done):  	POP_L( EBX )  	POP_L( EDI ) @@ -248,7 +275,7 @@ GLNAME( _mesa_x86_transform_points4_3d ):  	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )  	TEST_L( ECX, ECX ) -	JZ( LLBL( x86_p4_3dr_done ) ) +	JZ( LLBL(x86_p4_3dr_done) )  	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )  	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) @@ -263,67 +290,67 @@ GLNAME( _mesa_x86_transform_points4_3d ):  	ADD_L( EDI, ECX )  ALIGNTEXT16 -LLBL( x86_p4_3dr_loop ): +LLBL(x86_p4_3dr_loop): -	FLD_S( SRC(0) )			/* F4 */ -	FMUL_S( MAT(0) ) -	FLD_S( SRC(0) )			/* F5 F4 */ -	FMUL_S( MAT(1) ) -	FLD_S( SRC(0) )			/* F6 F5 F4 */ -	FMUL_S( MAT(2) ) +	FLD_S( SRC0 )			/* F4 */ +	FMUL_S( MAT0 ) +	FLD_S( SRC0 )			/* F5 F4 */ +	FMUL_S( MAT1 ) +	FLD_S( SRC0 )			/* F6 F5 F4 */ +	FMUL_S( MAT2 ) -	FLD_S( SRC(1) )			/* F0 F6 F5 F4 */ -	FMUL_S( MAT(4) ) -	FLD_S( SRC(1) )			/* F1 F0 F6 F5 F4 */ -	FMUL_S( MAT(5) ) -	FLD_S( SRC(1) )			/* F2 F1 F0 F6 F5 F4 */ -	FMUL_S( MAT(6) ) +	FLD_S( SRC1 )			/* F0 F6 F5 F4 */ +	FMUL_S( MAT4 ) +	FLD_S( SRC1 )			/* F1 F0 F6 F5 F4 */ +	FMUL_S( MAT5 ) +	FLD_S( SRC1 )			/* F2 F1 F0 F6 F5 F4 */ +	FMUL_S( MAT6 )  	FXCH( ST(2) )			/* F0 F1 F2 F6 F5 F4 */ -	FADDP( ST(0), ST(5) )		/* F1 F2 F6 F5 F4 */ -	FADDP( ST(0), ST(3) )		/* F2 F6 F5 F4 */ -	FADDP( ST(0), ST(1) )		/* F6 F5 F4 */ +	FADDP( ST0, ST(5) )		/* F1 F2 F6 F5 F4 */ +	FADDP( ST0, ST(3) )		/* F2 F6 F5 F4 */ +	FADDP( ST0, ST(1) )		/* F6 F5 F4 */ -	FLD_S( SRC(2) )			/* F0 F6 F5 F4 */ -	FMUL_S( MAT(8) ) -	FLD_S( SRC(2) )			/* F1 F0 F6 F5 F4 */ -	FMUL_S( MAT(9) ) -	FLD_S( SRC(2) )			/* F2 F1 F0 F6 F5 F4 */ -	FMUL_S( MAT(10) ) +	FLD_S( SRC2 )			/* F0 F6 F5 F4 */ +	FMUL_S( MAT8 ) +	FLD_S( SRC2 )			/* F1 F0 F6 F5 F4 */ +	FMUL_S( MAT9 ) +	FLD_S( SRC2 )			/* F2 F1 F0 F6 F5 F4 */ +	FMUL_S( MAT10 )  	FXCH( ST(2) )			/* F0 F1 F2 F6 F5 F4 */ -	FADDP( ST(0), ST(5) )		/* F1 F2 F6 F5 F4 */ -	FADDP( ST(0), ST(3) )		/* F2 F6 F5 F4 */ -	FADDP( ST(0), ST(1) )		/* F6 F5 F4 */ +	FADDP( ST0, ST(5) )		/* F1 F2 F6 F5 F4 */ +	FADDP( ST0, ST(3) )		/* F2 F6 F5 F4 */ +	FADDP( ST0, ST(1) )		/* F6 F5 F4 */ -	FLD_S( SRC(3) )			/* F0 F6 F5 F4 */ -	FMUL_S( MAT(12) ) -	FLD_S( SRC(3) )			/* F1 F0 F6 F5 F4 */ -	FMUL_S( MAT(13) ) -	FLD_S( SRC(3) )			/* F2 F1 F0 F6 F5 F4 */ -	FMUL_S( MAT(14) ) +	FLD_S( SRC3 )			/* F0 F6 F5 F4 */ +	FMUL_S( MAT12 ) +	FLD_S( SRC3 )			/* F1 F0 F6 F5 F4 */ +	FMUL_S( MAT13 ) +	FLD_S( SRC3 )			/* F2 F1 F0 F6 F5 F4 */ +	FMUL_S( MAT14 )  	FXCH( ST(2) )			/* F0 F1 F2 F6 F5 F4 */ -	FADDP( ST(0), ST(5) )		/* F1 F2 F6 F5 F4 */ -	FADDP( ST(0), ST(3) )		/* F2 F6 F5 F4 */ -	FADDP( ST(0), ST(1) )		/* F6 F5 F4 */ +	FADDP( ST0, ST(5) )		/* F1 F2 F6 F5 F4 */ +	FADDP( ST0, ST(3) )		/* F2 F6 F5 F4 */ +	FADDP( ST0, ST(1) )		/* F6 F5 F4 */ -	MOV_L( SRC(3), EBX ) +	MOV_L( SRC3, EBX )  	FXCH( ST(2) )			/* F4 F5 F6 */ -	FSTP_S( DST(0) )		/* F5 F6 */ -	FSTP_S( DST(1) )		/* F6 */ -	FSTP_S( DST(2) )		/* */ -	MOV_L( EBX, DST(3) ) +	FSTP_S( DST0 )		/* F5 F6 */ +	FSTP_S( DST1 )		/* F6 */ +	FSTP_S( DST2 )		/* */ +	MOV_L( EBX, DST3 ) -LLBL( x86_p4_3dr_skip ): +LLBL(x86_p4_3dr_skip):  	ADD_L( CONST(16), EDI )  	ADD_L( EAX, ESI )  	CMP_L( ECX, EDI ) -	JNE( LLBL( x86_p4_3dr_loop ) ) +	JNE( LLBL(x86_p4_3dr_loop) ) -LLBL( x86_p4_3dr_done ): +LLBL(x86_p4_3dr_done):  	POP_L( EBX )  	POP_L( EDI ) @@ -350,7 +377,7 @@ GLNAME(_mesa_x86_transform_points4_3d_no_rot):  	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )  	TEST_L( ECX, ECX ) -	JZ( LLBL( x86_p4_3dnrr_done ) ) +	JZ( LLBL(x86_p4_3dnrr_done) )  	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )  	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) @@ -365,45 +392,45 @@ GLNAME(_mesa_x86_transform_points4_3d_no_rot):  	ADD_L( EDI, ECX )  ALIGNTEXT16 -LLBL( x86_p4_3dnrr_loop ): +LLBL(x86_p4_3dnrr_loop): -	FLD_S( SRC(0) )			/* F4 */ -	FMUL_S( MAT(0) ) +	FLD_S( SRC0 )			/* F4 */ +	FMUL_S( MAT0 ) -	FLD_S( SRC(1) )			/* F5 F4 */ -	FMUL_S( MAT(5) ) +	FLD_S( SRC1 )			/* F5 F4 */ +	FMUL_S( MAT5 ) -	FLD_S( SRC(2) )			/* F6 F5 F4 */ -	FMUL_S( MAT(10) ) +	FLD_S( SRC2 )			/* F6 F5 F4 */ +	FMUL_S( MAT10 ) -	FLD_S( SRC(3) )			/* F0 F6 F5 F4 */ -	FMUL_S( MAT(12) ) -	FLD_S( SRC(3) )			/* F1 F0 F6 F5 F4 */ -	FMUL_S( MAT(13) ) -	FLD_S( SRC(3) )			/* F2 F1 F0 F6 F5 F4 */ -	FMUL_S( MAT(14) ) +	FLD_S( SRC3 )			/* F0 F6 F5 F4 */ +	FMUL_S( MAT12 ) +	FLD_S( SRC3 )			/* F1 F0 F6 F5 F4 */ +	FMUL_S( MAT13 ) +	FLD_S( SRC3 )			/* F2 F1 F0 F6 F5 F4 */ +	FMUL_S( MAT14 )  	FXCH( ST(2) )			/* F0 F1 F2 F6 F5 F4 */ -	FADDP( ST(0), ST(5) )		/* F1 F2 F6 F5 F4 */ -	FADDP( ST(0), ST(3) )		/* F2 F6 F5 F4 */ -	FADDP( ST(0), ST(1) )		/* F6 F5 F4 */ +	FADDP( ST0, ST(5) )		/* F1 F2 F6 F5 F4 */ +	FADDP( ST0, ST(3) )		/* F2 F6 F5 F4 */ +	FADDP( ST0, ST(1) )		/* F6 F5 F4 */ -	MOV_L( SRC(3), EBX ) +	MOV_L( SRC3, EBX )  	FXCH( ST(2) )			/* F4 F5 F6 */ -	FSTP_S( DST(0)   )		/* F5 F6 */ -	FSTP_S( DST(1)   )		/* F6 */ -	FSTP_S( DST(2)   )		/* */ -	MOV_L( EBX, DST(3) ) +	FSTP_S( DST0   )		/* F5 F6 */ +	FSTP_S( DST1   )		/* F6 */ +	FSTP_S( DST2   )		/* */ +	MOV_L( EBX, DST3 ) -LLBL( x86_p4_3dnrr_skip ): +LLBL(x86_p4_3dnrr_skip):  	ADD_L( CONST(16), EDI )  	ADD_L( EAX, ESI )  	CMP_L( ECX, EDI ) -	JNE( LLBL( x86_p4_3dnrr_loop ) ) +	JNE( LLBL(x86_p4_3dnrr_loop) ) -LLBL( x86_p4_3dnrr_done ): +LLBL(x86_p4_3dnrr_done):  	POP_L( EBX )  	POP_L( EDI ) @@ -431,7 +458,7 @@ GLNAME( _mesa_x86_transform_points4_2d ):  	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )  	TEST_L( ECX, ECX ) -	JZ( LLBL( x86_p4_2dr_done ) ) +	JZ( LLBL(x86_p4_2dr_done) )  	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )  	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) @@ -446,48 +473,48 @@ GLNAME( _mesa_x86_transform_points4_2d ):  	ADD_L( EDI, ECX )  ALIGNTEXT16 -LLBL( x86_p4_2dr_loop ): +LLBL(x86_p4_2dr_loop): -	FLD_S( SRC(0) )			/* F4 */ -	FMUL_S( MAT(0) ) -	FLD_S( SRC(0) )			/* F5 F4 */ -	FMUL_S( MAT(1) ) +	FLD_S( SRC0 )			/* F4 */ +	FMUL_S( MAT0 ) +	FLD_S( SRC0 )			/* F5 F4 */ +	FMUL_S( MAT1 ) -	FLD_S( SRC(1) )			/* F0 F5 F4 */ -	FMUL_S( MAT(4) ) -	FLD_S( SRC(1) )			/* F1 F0 F5 F4 */ -	FMUL_S( MAT(5) ) +	FLD_S( SRC1 )			/* F0 F5 F4 */ +	FMUL_S( MAT4 ) +	FLD_S( SRC1 )			/* F1 F0 F5 F4 */ +	FMUL_S( MAT5 )  	FXCH( ST(1) )			/* F0 F1 F5 F4 */ -	FADDP( ST(0), ST(3) )		/* F1 F5 F4 */ -	FADDP( ST(0), ST(1) )		/* F5 F4 */ +	FADDP( ST0, ST(3) )		/* F1 F5 F4 */ +	FADDP( ST0, ST(1) )		/* F5 F4 */ -	FLD_S( SRC(3) )			/* F0 F5 F4 */ -	FMUL_S( MAT(12) ) -	FLD_S( SRC(3) )			/* F1 F0 F5 F4 */ -	FMUL_S( MAT(13) ) +	FLD_S( SRC3 )			/* F0 F5 F4 */ +	FMUL_S( MAT12 ) +	FLD_S( SRC3 )			/* F1 F0 F5 F4 */ +	FMUL_S( MAT13 )  	FXCH( ST(1) )			/* F0 F1 F5 F4 */ -	FADDP( ST(0), ST(3) )		/* F1 F5 F4 */ -	FADDP( ST(0), ST(1) )		/* F5 F4 */ +	FADDP( ST0, ST(3) )		/* F1 F5 F4 */ +	FADDP( ST0, ST(1) )		/* F5 F4 */ -	MOV_L( SRC(2), EBX ) -	MOV_L( SRC(3), EBP ) +	MOV_L( SRC2, EBX ) +	MOV_L( SRC3, EBP )  	FXCH( ST(1) )			/* F4 F5 */ -	FSTP_S( DST(0) )		/* F5 */ -	FSTP_S( DST(1) )		/* */ -	MOV_L( EBX, DST(2) ) -	MOV_L( EBP, DST(3) ) +	FSTP_S( DST0 )		/* F5 */ +	FSTP_S( DST1 )		/* */ +	MOV_L( EBX, DST2 ) +	MOV_L( EBP, DST3 ) -LLBL( x86_p4_2dr_skip ): +LLBL(x86_p4_2dr_skip):  	ADD_L( CONST(16), EDI )  	ADD_L( EAX, ESI )  	CMP_L( ECX, EDI ) -	JNE( LLBL( x86_p4_2dr_loop ) ) +	JNE( LLBL(x86_p4_2dr_loop) ) -LLBL( x86_p4_2dr_done ): +LLBL(x86_p4_2dr_done):  	POP_L( EBP )  	POP_L( EBX ) @@ -516,7 +543,7 @@ GLNAME( _mesa_x86_transform_points4_2d_no_rot ):  	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )  	TEST_L( ECX, ECX ) -	JZ( LLBL( x86_p4_2dnrr_done ) ) +	JZ( LLBL(x86_p4_2dnrr_done) )  	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )  	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) @@ -531,40 +558,40 @@ GLNAME( _mesa_x86_transform_points4_2d_no_rot ):  	ADD_L( EDI, ECX )  ALIGNTEXT16 -LLBL( x86_p4_2dnrr_loop ): +LLBL(x86_p4_2dnrr_loop): -	FLD_S( SRC(0) )			/* F4 */ -	FMUL_S( MAT(0) ) +	FLD_S( SRC0 )			/* F4 */ +	FMUL_S( MAT0 ) -	FLD_S( SRC(1) )			/* F5 F4 */ -	FMUL_S( MAT(5) ) +	FLD_S( SRC1 )			/* F5 F4 */ +	FMUL_S( MAT5 ) -	FLD_S( SRC(3) )			/* F0 F5 F4 */ -	FMUL_S( MAT(12) ) -	FLD_S( SRC(3) )			/* F1 F0 F5 F4 */ -	FMUL_S( MAT(13) ) +	FLD_S( SRC3 )			/* F0 F5 F4 */ +	FMUL_S( MAT12 ) +	FLD_S( SRC3 )			/* F1 F0 F5 F4 */ +	FMUL_S( MAT13 )  	FXCH( ST(1) )			/* F0 F1 F5 F4 */ -	FADDP( ST(0), ST(3) )		/* F1 F5 F4 */ -	FADDP( ST(0), ST(1) )		/* F5 F4 */ +	FADDP( ST0, ST(3) )		/* F1 F5 F4 */ +	FADDP( ST0, ST(1) )		/* F5 F4 */ -	MOV_L( SRC(2), EBX ) -	MOV_L( SRC(3), EBP ) +	MOV_L( SRC2, EBX ) +	MOV_L( SRC3, EBP )  	FXCH( ST(1) )			/* F4 F5 */ -	FSTP_S( DST(0)   )		/* F5 */ -	FSTP_S( DST(1)   )		/* */ -	MOV_L( EBX, DST(2) ) -	MOV_L( EBP, DST(3) ) +	FSTP_S( DST0   )		/* F5 */ +	FSTP_S( DST1   )		/* */ +	MOV_L( EBX, DST2 ) +	MOV_L( EBP, DST3 ) -LLBL( x86_p4_2dnrr_skip ): +LLBL(x86_p4_2dnrr_skip):  	ADD_L( CONST(16), EDI )  	ADD_L( EAX, ESI )  	CMP_L( ECX, EDI ) -	JNE( LLBL( x86_p4_2dnrr_loop ) ) +	JNE( LLBL(x86_p4_2dnrr_loop) ) -LLBL( x86_p4_2dnrr_done ): +LLBL(x86_p4_2dnrr_done):  	POP_L( EBP )  	POP_L( EBX ) @@ -592,7 +619,7 @@ GLNAME( _mesa_x86_transform_points4_identity ):  	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )  	TEST_L( ECX, ECX ) -	JZ( LLBL( x86_p4_ir_done ) ) +	JZ( LLBL(x86_p4_ir_done) )  	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )  	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) @@ -607,31 +634,31 @@ GLNAME( _mesa_x86_transform_points4_identity ):  	ADD_L( EDI, ECX )  	CMP_L( ESI, EDI ) -	JE( LLBL( x86_p4_ir_done ) ) +	JE( LLBL(x86_p4_ir_done) )  ALIGNTEXT16 -LLBL( x86_p4_ir_loop ): +LLBL(x86_p4_ir_loop): -	MOV_L( SRC(0), EBX ) -	MOV_L( SRC(1), EDX ) +	MOV_L( SRC0, EBX ) +	MOV_L( SRC1, EDX ) -	MOV_L( EBX, DST(0) ) -	MOV_L( EDX, DST(1) ) +	MOV_L( EBX, DST0 ) +	MOV_L( EDX, DST1 ) -	MOV_L( SRC(2), EBX ) -	MOV_L( SRC(3), EDX ) +	MOV_L( SRC2, EBX ) +	MOV_L( SRC3, EDX ) -	MOV_L( EBX, DST(2) ) -	MOV_L( EDX, DST(3) ) +	MOV_L( EBX, DST2 ) +	MOV_L( EDX, DST3 ) -LLBL( x86_p4_ir_skip ): +LLBL(x86_p4_ir_skip):  	ADD_L( CONST(16), EDI )  	ADD_L( EAX, ESI )  	CMP_L( ECX, EDI ) -	JNE( LLBL( x86_p4_ir_loop ) ) +	JNE( LLBL(x86_p4_ir_loop) ) -LLBL( x86_p4_ir_done ): +LLBL(x86_p4_ir_done):  	POP_L( EBX )  	POP_L( EDI ) | 
