diff options
-rwxr-xr-x | src/mesa/pipe/tgsi/exec/tgsi_sse2.c | 2690 | ||||
-rw-r--r-- | src/mesa/x86/rtasm/x86sse.c | 54 | ||||
-rw-r--r-- | src/mesa/x86/rtasm/x86sse.h | 7 |
3 files changed, 1400 insertions, 1351 deletions
diff --git a/src/mesa/pipe/tgsi/exec/tgsi_sse2.c b/src/mesa/pipe/tgsi/exec/tgsi_sse2.c index 36d9b86f75..359775fdfb 100755 --- a/src/mesa/pipe/tgsi/exec/tgsi_sse2.c +++ b/src/mesa/pipe/tgsi/exec/tgsi_sse2.c @@ -1,7 +1,8 @@ #include "tgsi_platform.h" #include "tgsi_core.h" +#include "x86/rtasm/x86sse.h" -#if defined USE_X86_ASM +#if defined(USE_X86_ASM) || defined(SLANG_X86) #define FOR_EACH_CHANNEL( CHAN )\ for( CHAN = 0; CHAN < 4; CHAN++ ) @@ -181,148 +182,257 @@ emit_tempf( } static void -emit_temps (struct x86_function *func, - unsigned xmm, - unsigned vec, - unsigned chan) +emit_temps ( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) { - sse_movaps (func, - get_temp (vec, chan), - make_xmm (xmm)); + sse_movaps( + func, + get_temp( vec, chan ), + make_xmm( xmm ) ); } static void -emit_addrf( struct x86_function *func, - unsigned xmm, - unsigned vec, - unsigned chan ) +emit_addrf( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) { - emit_tempf( func, xmm, vec + TGSI_EXEC_NUM_TEMPS, chan ); + emit_tempf( + func, + xmm, + vec + TGSI_EXEC_NUM_TEMPS, + chan ); } static void -emit_addrs( struct x86_function *func, - unsigned xmm, - unsigned vec, - unsigned chan ) +emit_addrs( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) { - emit_temps( func, xmm, vec + TGSI_EXEC_NUM_TEMPS, chan ); + emit_temps( + func, + xmm, + vec + TGSI_EXEC_NUM_TEMPS, + chan ); } static void -emit_abs (struct x86_function *func, - unsigned xmm) +emit_abs( + struct x86_function *func, + unsigned xmm ) { - sse_andps (func, - make_xmm (xmm), - get_temp (TGSI_EXEC_TEMP_7FFFFFFF_I, - TGSI_EXEC_TEMP_7FFFFFFF_C)); + sse_andps( + func, + make_xmm( xmm ), + get_temp( + TGSI_EXEC_TEMP_7FFFFFFF_I, + TGSI_EXEC_TEMP_7FFFFFFF_C ) ); } static void -emit_neg (struct x86_function *func, - unsigned xmm) +emit_neg( + struct x86_function *func, + unsigned xmm ) { - sse_xorps (func, - make_xmm (xmm), - get_temp (TGSI_EXEC_TEMP_80000000_I, - TGSI_EXEC_TEMP_80000000_C)); + sse_xorps( + func, + make_xmm( xmm ), + get_temp( + TGSI_EXEC_TEMP_80000000_I, + TGSI_EXEC_TEMP_80000000_C ) ); } static void -emit_setsign (struct x86_function *func, - unsigned xmm) +emit_setsign( + struct x86_function *func, + unsigned xmm ) { - sse_orps (func, - make_xmm (xmm), - get_temp (TGSI_EXEC_TEMP_80000000_I, - TGSI_EXEC_TEMP_80000000_C)); + sse_orps( + func, + make_xmm( xmm ), + get_temp( + TGSI_EXEC_TEMP_80000000_I, + TGSI_EXEC_TEMP_80000000_C ) ); } static void -emit_add (struct x86_function *func, - unsigned xmm_dst, - unsigned xmm_src) +emit_add( + struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src ) { - sse_addps (func, - make_xmm (xmm_dst), - make_xmm (xmm_src)); + sse_addps( + func, + make_xmm( xmm_dst ), + make_xmm( xmm_src ) ); } -static GLfloat g_cos_storage[4 + 3]; +static void +emit_push_abcd( + struct x86_function *func ) +{ + x86_push( + func, + x86_make_reg( file_REG32, reg_AX ) ); + x86_push( + func, + x86_make_reg( file_REG32, reg_BX ) ); + x86_push( + func, + x86_make_reg( file_REG32, reg_CX ) ); + x86_push( + func, + x86_make_reg( file_REG32, reg_DX ) ); +} static void -cos4f (void) +emit_pop_dcba( + struct x86_function *func ) { - GLfloat *store = (GLfloat *) ALIGN16((unsigned) g_cos_storage); + x86_pop( + func, + x86_make_reg( file_REG32, reg_DX ) ); + x86_pop( + func, + x86_make_reg( file_REG32, reg_CX ) ); + x86_pop( + func, + x86_make_reg( file_REG32, reg_BX ) ); + x86_pop( + func, + x86_make_reg( file_REG32, reg_AX ) ); +} + +static void +emit_func_call1( + struct x86_function *func, + unsigned xmm_dst, + unsigned storage, + unsigned char *code ) +{ + x86_push( + func, + x86_make_reg( file_REG32, reg_AX ) ); + x86_mov_reg_imm( + func, + x86_make_reg( file_REG32, reg_AX ), + storage ); + sse_movaps( + func, + x86_deref( x86_make_reg( file_REG32, reg_AX ) ), + make_xmm( xmm_dst ) ); + emit_push_abcd( + func ); + x86_call( + func, + code ); + emit_pop_dcba( + func ); + sse_movaps( + func, + make_xmm( xmm_dst ), + x86_deref( x86_make_reg( file_REG32, reg_AX ) ) ); + x86_pop( + func, + x86_make_reg( file_REG32, reg_AX ) ); +} + +static void +emit_func_call2( + struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src, + unsigned storage, + unsigned char *code ) +{ + x86_push( + func, + x86_make_reg( file_REG32, reg_AX ) ); + x86_mov_reg_imm( + func, + x86_make_reg( file_REG32, reg_AX ), + storage ); + sse_movaps( + func, + x86_deref( x86_make_reg( file_REG32, reg_AX ) ), + make_xmm( xmm_dst ) ); + sse_movaps( + func, + x86_make_disp( x86_make_reg( file_REG32, reg_AX ), 16 ), + make_xmm( xmm_src ) ); + emit_push_abcd( + func ); + x86_call( + func, + code ); + emit_pop_dcba( + func ); + sse_movaps( + func, + make_xmm( xmm_dst ), + x86_deref( x86_make_reg( file_REG32, reg_AX ) ) ); + x86_pop( + func, + x86_make_reg( file_REG32, reg_AX ) ); +} + +/* XXX: move into machine context */ +static float g_cos_storage[4 + 3]; + +static void +cos4f( void ) +{ + float *store = (float *) ALIGN16( (unsigned) g_cos_storage ); #ifdef WIN32 - store[0] = (GLfloat) cos ((GLdouble) store[0]); - store[1] = (GLfloat) cos ((GLdouble) store[1]); - store[2] = (GLfloat) cos ((GLdouble) store[2]); - store[3] = (GLfloat) cos ((GLdouble) store[3]); + store[0] = (float) cos( (double) store[0] ); + store[1] = (float) cos( (double) store[1] ); + store[2] = (float) cos( (double) store[2] ); + store[3] = (float) cos( (double) store[3] ); #else - store[0] = cosf (store[0]); - store[1] = cosf (store[1]); - store[2] = cosf (store[2]); - store[3] = cosf (store[3]); + store[0] = cosf( store[0] ); + store[1] = cosf( store[1] ); + store[2] = cosf( store[2] ); + store[3] = cosf( store[3] ); #endif } static void -emit_cos (struct x86_function *func, - unsigned xmm_dst) +emit_cos( + struct x86_function *func, + unsigned xmm_dst ) { - x86_push (func, - x86_make_reg (file_REG32, reg_AX)); - x86_mov_reg_imm (func, - x86_make_reg (file_REG32, reg_AX), - ALIGN16((GLint) g_cos_storage)); - sse_movaps (func, - x86_deref (x86_make_reg (file_REG32, reg_AX)), - make_xmm (xmm_dst)); - x86_push (func, - x86_make_reg (file_REG32, reg_AX)); - x86_push (func, - x86_make_reg (file_REG32, reg_BX)); - x86_push (func, - x86_make_reg (file_REG32, reg_CX)); - x86_push (func, - x86_make_reg (file_REG32, reg_DX)); - x86_call (func, - (GLubyte *) cos4f); - x86_pop (func, - x86_make_reg (file_REG32, reg_DX)); - x86_pop (func, - x86_make_reg (file_REG32, reg_CX)); - x86_pop (func, - x86_make_reg (file_REG32, reg_BX)); - x86_pop (func, - x86_make_reg (file_REG32, reg_AX)); - sse_movaps (func, - make_xmm (xmm_dst), - x86_deref (x86_make_reg (file_REG32, reg_AX))); - x86_pop (func, - x86_make_reg (file_REG32, reg_AX)); + emit_func_call1( + func, + xmm_dst, + ALIGN16( (unsigned) g_cos_storage ), + (unsigned char *) cos4f ); } -static GLfloat g_sin_storage[4 + 3]; +/* XXX: move into machine context */ +static float g_sin_storage[4 + 3]; static void -sin4f (void) +sin4f( void ) { - GLfloat *store = (GLfloat *) ALIGN16((unsigned) g_sin_storage); + float *store = (float *) ALIGN16( (unsigned) g_sin_storage ); #ifdef WIN32 - store[0] = (GLfloat) sin ((GLdouble) store[0]); - store[1] = (GLfloat) sin ((GLdouble) store[1]); - store[2] = (GLfloat) sin ((GLdouble) store[2]); - store[3] = (GLfloat) sin ((GLdouble) store[3]); + store[0] = (float) sin( (double) store[0] ); + store[1] = (float) sin( (double) store[1] ); + store[2] = (float) sin( (double) store[2] ); + store[3] = (float) sin( (double) store[3] ); #else - store[0] = sin (store[0]); - store[1] = sin (store[1]); - store[2] = sin (store[2]); - store[3] = sin (store[3]); + store[0] = sinf( store[0] ); + store[1] = sinf( store[1] ); + store[2] = sinf( store[2] ); + store[3] = sinf( store[3] ); #endif } @@ -330,47 +440,23 @@ static void emit_sin (struct x86_function *func, unsigned xmm_dst) { - x86_push (func, - x86_make_reg (file_REG32, reg_AX)); - x86_mov_reg_imm (func, - x86_make_reg (file_REG32, reg_AX), - ALIGN16((GLint) g_sin_storage)); - sse_movaps (func, - x86_deref (x86_make_reg (file_REG32, reg_AX)), - make_xmm (xmm_dst)); - x86_push (func, - x86_make_reg (file_REG32, reg_AX)); - x86_push (func, - x86_make_reg (file_REG32, reg_BX)); - x86_push (func, - x86_make_reg (file_REG32, reg_CX)); - x86_push (func, - x86_make_reg (file_REG32, reg_DX)); - x86_call (func, - (GLubyte *) sin4f); - x86_pop (func, - x86_make_reg (file_REG32, reg_DX)); - x86_pop (func, - x86_make_reg (file_REG32, reg_CX)); - x86_pop (func, - x86_make_reg (file_REG32, reg_BX)); - x86_pop (func, - x86_make_reg (file_REG32, reg_AX)); - sse_movaps (func, - make_xmm (xmm_dst), - x86_deref (x86_make_reg (file_REG32, reg_AX))); - x86_pop (func, - x86_make_reg (file_REG32, reg_AX)); + emit_func_call1( + func, + xmm_dst, + ALIGN16( (unsigned) g_sin_storage ), + (unsigned char *) sin4f ); } static void -emit_mov (struct x86_function *func, - unsigned xmm_dst, - unsigned xmm_src) +emit_mov( + struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src ) { - sse_movups (func, - make_xmm (xmm_dst), - make_xmm (xmm_src)); + sse_movups( + func, + make_xmm( xmm_dst ), + make_xmm( xmm_src ) ); } static void @@ -378,1353 +464,1255 @@ emit_mul (struct x86_function *func, unsigned xmm_dst, unsigned xmm_src) { - sse_mulps (func, - make_xmm (xmm_dst), - make_xmm (xmm_src)); + sse_mulps( + func, + make_xmm( xmm_dst ), + make_xmm( xmm_src ) ); } -static GLfloat g_pow_storage[4 + 4 + 3]; +/* XXX: move into machine context */ +static float g_pow_storage[4 + 4 + 3]; static void -pow4f (void) +pow4f( void ) { - GLfloat *store = (GLfloat *) ALIGN16((unsigned) g_pow_storage); + float *store = (float *) ALIGN16( (unsigned) g_pow_storage ); #ifdef WIN32 - store[0] = (GLfloat) pow ((GLdouble) store[0], (GLdouble) store[4]); - store[1] = (GLfloat) pow ((GLdouble) store[1], (GLdouble) store[5]); - store[2] = (GLfloat) pow ((GLdouble) store[2], (GLdouble) store[6]); - store[3] = (GLfloat) pow ((GLdouble) store[3], (GLdouble) store[7]); + store[0] = (float) pow( (double) store[0], (double) store[4] ); + store[1] = (float) pow( (double) store[1], (double) store[5] ); + store[2] = (float) pow( (double) store[2], (double) store[6] ); + store[3] = (float) pow( (double) store[3], (double) store[7] ); #else - store[0] = powf (store[0], store[4]); - store[1] = powf (store[1], store[5]); - store[2] = powf (store[2], store[6]); - store[3] = powf (store[3], store[7]); + store[0] = powf( store[0], store[4] ); + store[1] = powf( store[1], store[5] ); + store[2] = powf( store[2], store[6] ); + store[3] = powf( store[3], store[7] ); #endif } static void -emit_pow (struct x86_function *func, - unsigned xmm_dst, - unsigned xmm_src) +emit_pow( + struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src ) { - x86_push (func, - x86_make_reg (file_REG32, reg_AX)); - x86_mov_reg_imm (func, - x86_make_reg (file_REG32, reg_AX), - ALIGN16((GLint) g_pow_storage)); - sse_movaps (func, - x86_make_disp (x86_make_reg (file_REG32, reg_AX), 0), - make_xmm (xmm_dst)); - sse_movaps (func, - x86_make_disp (x86_make_reg (file_REG32, reg_AX), 16), - make_xmm (xmm_src)); - x86_push (func, - x86_make_reg (file_REG32, reg_AX)); - x86_push (func, - x86_make_reg (file_REG32, reg_BX)); - x86_push (func, - x86_make_reg (file_REG32, reg_CX)); - x86_push (func, - x86_make_reg (file_REG32, reg_DX)); - x86_call (func, - (GLubyte *) pow4f); - x86_pop (func, - x86_make_reg (file_REG32, reg_DX)); - x86_pop (func, - x86_make_reg (file_REG32, reg_CX)); - x86_pop (func, - x86_make_reg (file_REG32, reg_BX)); - x86_pop (func, - x86_make_reg (file_REG32, reg_AX)); - sse_movaps (func, - make_xmm (xmm_dst), - x86_deref (x86_make_reg (file_REG32, reg_AX))); - x86_pop (func, - x86_make_reg (file_REG32, reg_AX)); + emit_func_call2( + func, + xmm_dst, + xmm_src, + ALIGN16( (unsigned) g_pow_storage ), + (unsigned char *) pow4f ); } -static GLfloat g_ex2_storage[4 + 3]; +/* XXX: move into machine context */ +static float g_ex2_storage[4 + 3]; static void -ex24f (void) +ex24f( void ) { - GLfloat *store = (GLfloat *) ALIGN16((unsigned) g_ex2_storage); + float *store = (float *) ALIGN16( (unsigned) g_ex2_storage ); - store[0] = (GLfloat) pow (2.0, (GLdouble) store[0]); - store[1] = (GLfloat) pow (2.0, (GLdouble) store[1]); - store[2] = (GLfloat) pow (2.0, (GLdouble) store[2]); - store[3] = (GLfloat) pow (2.0, (GLdouble) store[3]); +#ifdef WIN32 + store[0] = (float) pow( 2.0, (double) store[0] ); + store[1] = (float) pow( 2.0, (double) store[1] ); + store[2] = (float) pow( 2.0, (double) store[2] ); + store[3] = (float) pow( 2.0, (double) store[3] ); +#else + store[0] = powf( 2.0f, store[0] ); + store[1] = powf( 2.0f, store[1] ); + store[2] = powf( 2.0f, store[2] ); + store[3] = powf( 2.0f, store[3] ); +#endif } static void -emit_ex2 (struct x86_function *func, - unsigned xmm_dst) +emit_ex2( + struct x86_function *func, + unsigned xmm_dst ) { - x86_push (func, - x86_make_reg (file_REG32, reg_AX)); - x86_mov_reg_imm (func, - x86_make_reg (file_REG32, reg_AX), - ALIGN16((GLint) g_ex2_storage)); - sse_movaps (func, - x86_deref (x86_make_reg (file_REG32, reg_AX)), - make_xmm (xmm_dst)); - x86_push (func, - x86_make_reg (file_REG32, reg_AX)); - x86_push (func, - x86_make_reg (file_REG32, reg_BX)); - x86_push (func, - x86_make_reg (file_REG32, reg_CX)); - x86_push (func, - x86_make_reg (file_REG32, reg_DX)); - x86_call (func, - (GLubyte *) ex24f); - x86_pop (func, - x86_make_reg (file_REG32, reg_DX)); - x86_pop (func, - x86_make_reg (file_REG32, reg_CX)); - x86_pop (func, - x86_make_reg (file_REG32, reg_BX)); - x86_pop (func, - x86_make_reg (file_REG32, reg_AX)); - sse_movaps (func, - make_xmm (xmm_dst), - x86_deref (x86_make_reg (file_REG32, reg_AX))); - x86_pop (func, - x86_make_reg (file_REG32, reg_AX)); + emit_func_call1( + func, + xmm_dst, + ALIGN16( (unsigned) g_ex2_storage ), + (unsigned char *) ex24f ); } -static GLfloat g_lg2_storage[4 + 3]; +/* XXX: move into machine context */ +static float g_lg2_storage[4 + 3]; static void -lg24f (void) +lg24f( void ) { - GLfloat *store = (GLfloat *) ALIGN16((unsigned) g_lg2_storage); + float *store = (float *) ALIGN16( (unsigned) g_lg2_storage ); - store[0] = LOG2 (store[0]); - store[1] = LOG2 (store[1]); - store[2] = LOG2 (store[2]); - store[3] = LOG2 (store[3]); + store[0] = LOG2( store[0] ); + store[1] = LOG2( store[1] ); + store[2] = LOG2( store[2] ); + store[3] = LOG2( store[3] ); } static void -emit_lg2 (struct x86_function *func, - unsigned xmm_dst) +emit_lg2( + struct x86_function *func, + unsigned xmm_dst ) { - x86_push (func, - x86_make_reg (file_REG32, reg_AX)); - x86_mov_reg_imm (func, - x86_make_reg (file_REG32, reg_AX), - ALIGN16((GLint) g_lg2_storage)); - sse_movaps (func, - x86_deref (x86_make_reg (file_REG32, reg_AX)), - make_xmm (xmm_dst)); - x86_push (func, - x86_make_reg (file_REG32, reg_AX)); - x86_push (func, - x86_make_reg (file_REG32, reg_BX)); - x86_push (func, - x86_make_reg (file_REG32, reg_CX)); - x86_push (func, - x86_make_reg (file_REG32, reg_DX)); - x86_call (func, - (GLubyte *) lg24f); - x86_pop (func, - x86_make_reg (file_REG32, reg_DX)); - x86_pop (func, - x86_make_reg (file_REG32, reg_CX)); - x86_pop (func, - x86_make_reg (file_REG32, reg_BX)); - x86_pop (func, - x86_make_reg (file_REG32, reg_AX)); - sse_movaps (func, - make_xmm (xmm_dst), - x86_deref (x86_make_reg (file_REG32, reg_AX))); - x86_pop (func, - x86_make_reg (file_REG32, reg_AX)); + emit_func_call1( + func, + xmm_dst, + ALIGN16( (unsigned) g_lg2_storage ), + (unsigned char *) lg24f ); } -static GLfloat g_flr_storage[4 + 3]; +/* XXX: move into machine context */ +static float g_flr_storage[4 + 3]; static void -flr4f (void) +flr4f( void ) { - GLfloat *store = (GLfloat *) ALIGN16((unsigned) g_flr_storage); + float *store = (float *) ALIGN16( (unsigned) g_flr_storage ); - store[0] = (GLfloat) floor ((GLdouble) store[0]); - store[1] = (GLfloat) floor ((GLdouble) store[1]); - store[2] = (GLfloat) floor ((GLdouble) store[2]); - store[3] = (GLfloat) floor ((GLdouble) store[3]); + store[0] = (float) floor( (double) store[0] ); + store[1] = (float) floor( (double) store[1] ); + store[2] = (float) floor( (double) store[2] ); + store[3] = (float) floor( (double) store[3] ); } static void -emit_flr (struct x86_function *func, - unsigned xmm_dst) +emit_flr( + struct x86_function *func, + unsigned xmm_dst ) { - x86_push (func, - x86_make_reg (file_REG32, reg_AX)); - x86_mov_reg_imm (func, - x86_make_reg (file_REG32, reg_AX), - ALIGN16((GLint) g_flr_storage)); - sse_movaps (func, - x86_deref (x86_make_reg (file_REG32, reg_AX)), - make_xmm (xmm_dst)); - x86_push (func, - x86_make_reg (file_REG32, reg_AX)); - x86_push (func, - x86_make_reg (file_REG32, reg_BX)); - x86_push (func, - x86_make_reg (file_REG32, reg_CX)); - x86_push (func, - x86_make_reg (file_REG32, reg_DX)); - x86_call (func, - (GLubyte *) flr4f); - x86_pop (func, - x86_make_reg (file_REG32, reg_DX)); - x86_pop (func, - x86_make_reg (file_REG32, reg_CX)); - x86_pop (func, - x86_make_reg (file_REG32, reg_BX)); - x86_pop (func, - x86_make_reg (file_REG32, reg_AX)); - sse_movaps (func, - make_xmm (xmm_dst), - x86_deref (x86_make_reg (file_REG32, reg_AX))); - x86_pop (func, - x86_make_reg (file_REG32, reg_AX)); + emit_func_call1( + func, + xmm_dst, + ALIGN16( (unsigned) g_flr_storage ), + (unsigned char *) flr4f ); } -static GLfloat g_frc_storage[4 + 3]; +/* XXX: move into machine context */ +static float g_frc_storage[4 + 3]; static void -frc4f (void) +frc4f( void ) { - GLfloat *store = (GLfloat *) ALIGN16((unsigned) g_frc_storage); + float *store = (float *) ALIGN16( (unsigned) g_frc_storage ); - store[0] -= (GLfloat) floor ((GLdouble) store[0]); - store[1] -= (GLfloat) floor ((GLdouble) store[1]); - store[2] -= (GLfloat) floor ((GLdouble) store[2]); - store[3] -= (GLfloat) floor ((GLdouble) store[3]); + store[0] -= (float) floor( (double) store[0] ); + store[1] -= (float) floor( (double) store[1] ); + store[2] -= (float) floor( (double) store[2] ); + store[3] -= (float) floor( (double) store[3] ); } static void -emit_frc (struct x86_function *func, - unsigned xmm_dst) +emit_frc( + struct x86_function *func, + unsigned xmm_dst ) { - x86_push (func, - x86_make_reg (file_REG32, reg_AX)); - x86_mov_reg_imm (func, - x86_make_reg (file_REG32, reg_AX), - ALIGN16((GLint) g_frc_storage)); - sse_movaps (func, - x86_deref (x86_make_reg (file_REG32, reg_AX)), - make_xmm (xmm_dst)); - x86_push (func, - x86_make_reg (file_REG32, reg_AX)); - x86_push (func, - x86_make_reg (file_REG32, reg_BX)); - x86_push (func, - x86_make_reg (file_REG32, reg_CX)); - x86_push (func, - x86_make_reg (file_REG32, reg_DX)); - x86_call (func, - (GLubyte *) frc4f); - x86_pop (func, - x86_make_reg (file_REG32, reg_DX)); - x86_pop (func, - x86_make_reg (file_REG32, reg_CX)); - x86_pop (func, - x86_make_reg (file_REG32, reg_BX)); - x86_pop (func, - x86_make_reg (file_REG32, reg_AX)); - sse_movaps (func, - make_xmm (xmm_dst), - x86_deref (x86_make_reg (file_REG32, reg_AX))); - x86_pop (func, - x86_make_reg (file_REG32, reg_AX)); + emit_func_call1( + func, + xmm_dst, + ALIGN16( (unsigned) g_frc_storage ), + (unsigned char *) frc4f ); } static void -emit_rcp (struct x86_function *func, - unsigned xmm_dst, - unsigned xmm_src) +emit_rcp ( + struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src ) { - sse2_rcpps (func, - make_xmm (xmm_dst), - make_xmm (xmm_src)); + sse2_rcpps( + func, + make_xmm( xmm_dst ), + make_xmm( xmm_src ) ); } static void -emit_rsqrt (struct x86_function *func, - unsigned xmm_dst, - unsigned xmm_src) +emit_rsqrt( + struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src ) { - sse_rsqrtps (func, - make_xmm (xmm_dst), - make_xmm (xmm_src)); + sse_rsqrtps( + func, + make_xmm( xmm_dst ), + make_xmm( xmm_src ) ); } static void -emit_sub (struct x86_function *func, - unsigned xmm_dst, - unsigned xmm_src) +emit_sub( + struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src ) { - sse_subps (func, - make_xmm (xmm_dst), - make_xmm (xmm_src)); + sse_subps( + func, + make_xmm( xmm_dst ), + make_xmm( xmm_src ) ); } static void -emit_fetch (struct x86_function *func, - unsigned xmm, - const struct tgsi_full_src_register *reg, - const unsigned chan_index) +emit_fetch( + struct x86_function *func, + unsigned xmm, + const struct tgsi_full_src_register *reg, + const unsigned chan_index ) { - unsigned swizzle = tgsi_util_get_full_src_register_extswizzle (reg, chan_index); - - switch (swizzle) - { - case TGSI_EXTSWIZZLE_X: - case TGSI_EXTSWIZZLE_Y: - case TGSI_EXTSWIZZLE_Z: - case TGSI_EXTSWIZZLE_W: - switch (reg->SrcRegister.File) - { - case TGSI_FILE_CONSTANT: - emit_const (func, xmm, reg->SrcRegister.Index, swizzle); - break; - - case TGSI_FILE_INPUT: - emit_input (func, xmm, reg->SrcRegister.Index, swizzle); - break; - - case TGSI_FILE_TEMPORARY: - emit_tempf (func, xmm, reg->SrcRegister.Index, swizzle); - break; - - default: - assert (0); - } - break; - - case TGSI_EXTSWIZZLE_ZERO: - emit_tempf (func, - xmm, - TGSI_EXEC_TEMP_00000000_I, - TGSI_EXEC_TEMP_00000000_C); - break; - - case TGSI_EXTSWIZZLE_ONE: - emit_tempf (func, - xmm, - TGSI_EXEC_TEMP_ONE_I, - TGSI_EXEC_TEMP_ONE_C); - break; - - default: - assert (0); - } - - switch (tgsi_util_get_full_src_register_sign_mode (reg, chan_index)) - { - case TGSI_UTIL_SIGN_CLEAR: - emit_abs (func, xmm); - break; - - case TGSI_UTIL_SIGN_SET: - emit_setsign (func, xmm); - break; - - case TGSI_UTIL_SIGN_TOGGLE: - emit_neg (func, xmm); - break; - - case TGSI_UTIL_SIGN_KEEP: - break; - } + unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); + + switch( swizzle ) { + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + switch( reg->SrcRegister.File ) { + case TGSI_FILE_CONSTANT: + emit_const( + func, + xmm, + reg->SrcRegister.Index, + swizzle ); + break; + + case TGSI_FILE_INPUT: + emit_input( + func, + xmm, + reg->SrcRegister.Index, + swizzle ); + break; + + case TGSI_FILE_TEMPORARY: + emit_tempf( + func, + xmm, + reg->SrcRegister.Index, + swizzle ); + break; + + default: + assert( 0 ); + } + break; + + case TGSI_EXTSWIZZLE_ZERO: + emit_tempf( + func, + xmm, + TGSI_EXEC_TEMP_00000000_I, + TGSI_EXEC_TEMP_00000000_C ); + break; + + case TGSI_EXTSWIZZLE_ONE: + emit_tempf( + func, + xmm, + TGSI_EXEC_TEMP_ONE_I, + TGSI_EXEC_TEMP_ONE_C ); + break; + + default: + assert( 0 ); + } + + switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) { + case TGSI_UTIL_SIGN_CLEAR: + emit_abs( func, xmm ); + break; + + case TGSI_UTIL_SIGN_SET: + emit_setsign( func, xmm ); + break; + + case TGSI_UTIL_SIGN_TOGGLE: + emit_neg( func, xmm ); + break; + + case TGSI_UTIL_SIGN_KEEP: + break; + } } static void -emit_store (struct x86_function *func, - unsigned xmm, - const struct tgsi_full_dst_register *reg, - const struct tgsi_full_instruction *inst, - unsigned chan_index) +emit_store( + struct x86_function *func, + unsigned xmm, + const struct tgsi_full_dst_register *reg, + const struct tgsi_full_instruction *inst, + unsigned chan_index ) { - switch (reg->DstRegister.File) - { - case TGSI_FILE_OUTPUT: - emit_output (func, xmm, reg->DstRegister.Index, chan_index); - break; + switch( reg->DstRegister.File ) { + case TGSI_FILE_OUTPUT: + emit_output( + func, + xmm, + reg->DstRegister.Index, + chan_index ); + break; - case TGSI_FILE_TEMPORARY: - emit_temps (func, xmm, reg->DstRegister.Index, chan_index); - break; + case TGSI_FILE_TEMPORARY: + emit_temps( + func, + xmm, + reg->DstRegister.Index, + chan_index ); + break; case TGSI_FILE_ADDRESS: - emit_addrs( func, xmm, reg->DstRegister.Index, chan_index ); + emit_addrs( + func, + xmm, + reg->DstRegister.Index, + chan_index ); break; - default: - assert (0); - } + default: + assert( 0 ); + } - switch (inst->Instruction.Saturate) - { - case TGSI_SAT_NONE: - break; + switch( inst->Instruction.Saturate ) { + case TGSI_SAT_NONE: + break; - case TGSI_SAT_ZERO_ONE: -// assert (0); - break; + case TGSI_SAT_ZERO_ONE: +// assert( 0 ); + break; - case TGSI_SAT_MINUS_PLUS_ONE: - assert (0); - break; - } + case TGSI_SAT_MINUS_PLUS_ONE: + assert( 0 ); + break; + } } -#define FETCH(XMM,INDEX,CHAN)\ - emit_fetch (func, XMM, &inst->FullSrcRegisters[INDEX], CHAN) - -#define STORE(XMM,INDEX,CHAN)\ - emit_store (func, XMM, &inst->FullDstRegisters[INDEX], inst, CHAN) - static void -emit_kil (struct x86_function *func, - const struct tgsi_full_src_register *reg) +emit_kil( + struct x86_function *func, + const struct tgsi_full_src_register *reg ) { - unsigned uniquemask; - unsigned registers[4]; - unsigned nextregister = 0; - unsigned firstchan = ~0; - unsigned chan_index; - - /* This mask stores component bits that were already tested. Note that - * we test if the value is less than zero, so 1.0 and 0.0 need not to be - * tested. */ - uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE); - - for (chan_index = 0; chan_index < 4; chan_index++) - { - unsigned swizzle; - - /* unswizzle channel */ - swizzle = tgsi_util_get_full_src_register_extswizzle (reg, chan_index); - - /* check if the component has not been already tested */ - if (!(uniquemask & (1 << swizzle))) - { - uniquemask |= 1 << swizzle; - - /* allocate register */ - registers[chan_index] = nextregister; - emit_fetch (func, nextregister, reg, chan_index); - nextregister++; - - /* mark the first channel used */ - if (firstchan == ~0) - firstchan = chan_index; - } - } - - x86_push (func, - x86_make_reg (file_REG32, reg_AX)); - x86_push (func, - x86_make_reg (file_REG32, reg_DX)); - - for (chan_index = 0; chan_index < 4; chan_index++) - { - if (uniquemask & (1 << chan_index)) - { - sse_cmpps (func, - make_xmm (registers[chan_index]), - get_temp (TGSI_EXEC_TEMP_00000000_I, - TGSI_EXEC_TEMP_00000000_C), - cc_LessThan); - - if (chan_index == firstchan) - { - sse_pmovmskb (func, - x86_make_reg (file_REG32, reg_AX), - make_xmm (registers[chan_index])); - } - else - { - sse_pmovmskb (func, - x86_make_reg (file_REG32, reg_DX), - make_xmm (registers[chan_index])); - x86_or (func, - x86_make_reg (file_REG32, reg_AX), - x86_make_reg (file_REG32, reg_DX)); - } - } - } - - x86_or (func, - get_temp(TGSI_EXEC_TEMP_KILMASK_I, - TGSI_EXEC_TEMP_KILMASK_C), - x86_make_reg (file_REG32, reg_AX)); - - x86_pop (func, - x86_make_reg (file_REG32, reg_DX)); - x86_pop (func, - x86_make_reg (file_REG32, reg_AX)); -} + unsigned uniquemask; + unsigned registers[4]; + unsigned nextregister = 0; + unsigned firstchan = ~0; + unsigned chan_index; + + /* This mask stores component bits that were already tested. Note that + * we test if the value is less than zero, so 1.0 and 0.0 need not to be + * tested. */ + uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE); + + FOR_EACH_CHANNEL( chan_index ) { + unsigned swizzle; + + /* unswizzle channel */ + swizzle = tgsi_util_get_full_src_register_extswizzle( + reg, + chan_index ); + + /* check if the component has not been already tested */ + if( !(uniquemask & (1 << swizzle)) ) { + uniquemask |= 1 << swizzle; + + /* allocate register */ + registers[chan_index] = nextregister; + emit_fetch( + func, + nextregister, + reg, + chan_index ); + nextregister++; + + /* mark the first channel used */ + if( firstchan == ~0 ) { + firstchan = chan_index; + } + } + } -static void -emit_setcc (struct x86_function *func, - struct tgsi_full_instruction *inst, - enum sse_cc cc) -{ - unsigned chan_index; + x86_push( + func, + x86_make_reg( file_REG32, reg_AX ) ); + x86_push( + func, + x86_make_reg( file_REG32, reg_DX ) ); + + FOR_EACH_CHANNEL( chan_index ) { + if( uniquemask & (1 << chan_index) ) { + sse_cmpps( + func, + make_xmm( registers[chan_index] ), + get_temp( + TGSI_EXEC_TEMP_00000000_I, + TGSI_EXEC_TEMP_00000000_C ), + cc_LessThan ); + + if( chan_index == firstchan ) { + sse_pmovmskb( + func, + x86_make_reg( file_REG32, reg_AX ), + make_xmm( registers[chan_index] ) ); + } + else { + sse_pmovmskb( + func, + x86_make_reg( file_REG32, reg_DX ), + make_xmm( registers[chan_index] ) ); + x86_or( + func, + x86_make_reg( file_REG32, reg_AX ), + x86_make_reg( file_REG32, reg_DX ) ); + } + } + } + + x86_or( + func, + get_temp( + TGSI_EXEC_TEMP_KILMASK_I, + TGSI_EXEC_TEMP_KILMASK_C ), + x86_make_reg( file_REG32, reg_AX ) ); - FOR_EACH_ENABLED_CHANNEL - { - emit_fetch (func, 0, &inst->FullSrcRegisters[0], chan_index); - emit_fetch (func, 1, &inst->FullSrcRegisters[1], chan_index); + x86_pop( + func, + x86_make_reg( file_REG32, reg_DX ) ); + x86_pop( + func, + x86_make_reg( file_REG32, reg_AX ) ); +} - sse_cmpps (func, - make_xmm (0), - make_xmm (1), - cc); +#define FETCH( FUNC, INST, XMM, INDEX, CHAN )\ + emit_fetch( FUNC, XMM, &(INST).FullSrcRegisters[INDEX], CHAN ) - sse_andps (func, - make_xmm (0), - get_temp (TGSI_EXEC_TEMP_ONE_I, - TGSI_EXEC_TEMP_ONE_C)); +#define STORE( FUNC, INST, XMM, INDEX, CHAN )\ + emit_store( FUNC, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN ) - emit_store (func, 0, &inst->FullDstRegisters[0], inst, chan_index); - } +static void +emit_setcc( + struct x86_function *func, + struct tgsi_full_instruction *inst, + enum sse_cc cc ) +{ + unsigned chan_index; + + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + sse_cmpps( + func, + make_xmm( 0 ), + make_xmm( 1 ), + cc ); + sse_andps( + func, + make_xmm( 0 ), + get_temp( + TGSI_EXEC_TEMP_ONE_I, + TGSI_EXEC_TEMP_ONE_C ) ); + STORE( func, *inst, 0, 0, chan_index ); + } } static void -emit_cmp (struct x86_function *func, - struct tgsi_full_instruction *inst) +emit_cmp( + struct x86_function *func, + struct tgsi_full_instruction *inst ) { - unsigned chan_index; - - FOR_EACH_ENABLED_CHANNEL - { - emit_fetch (func, 0, &inst->FullSrcRegisters[0], chan_index); - emit_fetch (func, 1, &inst->FullSrcRegisters[1], chan_index); - emit_fetch (func, 2, &inst->FullSrcRegisters[2], chan_index); - - sse_cmpps (func, - make_xmm (0), - get_temp (TGSI_EXEC_TEMP_00000000_I, - TGSI_EXEC_TEMP_00000000_C), - cc_LessThan); - - sse_andps (func, - make_xmm (1), - make_xmm (0)); - sse_andnps (func, - make_xmm (0), - make_xmm (2)); - sse_orps (func, - make_xmm (0), - make_xmm (1)); - - emit_store (func, 0, &inst->FullDstRegisters[0], inst, chan_index); - } + unsigned chan_index; + + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + FETCH( func, *inst, 2, 2, chan_index ); + sse_cmpps( + func, + make_xmm( 0 ), + get_temp( + TGSI_EXEC_TEMP_00000000_I, + TGSI_EXEC_TEMP_00000000_C ), + cc_LessThan ); + sse_andps( + func, + make_xmm( 1 ), + make_xmm( 0 ) ); + sse_andnps( + func, + make_xmm( 0 ), + make_xmm( 2 ) ); + sse_orps( + func, + make_xmm( 0 ), + make_xmm( 1 ) ); + STORE( func, *inst, 0, 0, chan_index ); + } } static void -emit_f2it( struct x86_function *func, - unsigned xmm ) +emit_f2it( + struct x86_function *func, + unsigned xmm ) { - sse2_cvttps2dq( func, make_xmm( xmm ), make_xmm( xmm ) ); + sse2_cvttps2dq( + func, + make_xmm( xmm ), + make_xmm( xmm ) ); } static void -emit_instruction (struct x86_function *func, - struct tgsi_full_instruction *inst) +emit_instruction( + struct x86_function *func, + struct tgsi_full_instruction *inst ) { - unsigned chan_index; + unsigned chan_index; - switch (inst->Instruction.Opcode) { + switch( inst->Instruction.Opcode ) { case TGSI_OPCODE_ARL: - FOR_EACH_ENABLED_CHANNEL { - FETCH( 0, 0, chan_index ); - emit_f2it( func, 0 ); - STORE( 0, 0, chan_index ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + emit_f2it( func, 0 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_MOV: + /* TGSI_OPCODE_SWZ */ + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_LIT: + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) { + emit_tempf( + func, + 0, + TGSI_EXEC_TEMP_ONE_I, + TGSI_EXEC_TEMP_ONE_C); + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ) { + STORE( func, *inst, 0, 0, CHAN_X ); + } + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) { + STORE( func, *inst, 0, 0, CHAN_W ); + } + } + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { + FETCH( func, *inst, 0, 0, CHAN_X ); + sse_maxps( + func, + make_xmm( 0 ), + get_temp( + TGSI_EXEC_TEMP_00000000_I, + TGSI_EXEC_TEMP_00000000_C ) ); + STORE( func, *inst, 0, 0, CHAN_Y ); + } + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { + FETCH( func, *inst, 1, 0, CHAN_Y ); + sse_maxps( + func, + make_xmm( 1 ), + get_temp( + TGSI_EXEC_TEMP_00000000_I, + TGSI_EXEC_TEMP_00000000_C ) ); + FETCH( func, *inst, 2, 0, CHAN_W ); + sse_minps( + func, + make_xmm( 2 ), + get_temp( + TGSI_EXEC_TEMP_128_I, + TGSI_EXEC_TEMP_128_C ) ); + sse_maxps( + func, + make_xmm( 2 ), + get_temp( + TGSI_EXEC_TEMP_MINUS_128_I, + TGSI_EXEC_TEMP_MINUS_128_C ) ); + emit_pow( func, 1, 2 ); + FETCH( func, *inst, 0, 0, CHAN_X ); + sse_xorps( + func, + make_xmm( 2 ), + make_xmm( 2 ) ); + sse_cmpps( + func, + make_xmm( 2 ), + make_xmm( 0 ), + cc_LessThanEqual ); + sse_andps( + func, + make_xmm( 2 ), + make_xmm( 1 ) ); + STORE( func, *inst, 2, 0, CHAN_Z ); + } + } + break; + + case TGSI_OPCODE_RCP: + /* TGSI_OPCODE_RECIP */ + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_rcp( func, 0, 0 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_RSQ: + /* TGSI_OPCODE_RECIPSQRT */ + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_rsqrt( func, 0, 0 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_EXP: + assert( 0 ); + break; + + case TGSI_OPCODE_LOG: + assert( 0 ); + break; + + case TGSI_OPCODE_MUL: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + emit_mul( func, 0, 1 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_ADD: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + emit_add( func, 0, 1 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_DP3: + /* TGSI_OPCODE_DOT3 */ + FETCH( func, *inst, 0, 0, CHAN_X ); + FETCH( func, *inst, 1, 1, CHAN_X ); + emit_mul( func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_Y ); + FETCH( func, *inst, 2, 1, CHAN_Y ); + emit_mul( func, 1, 2 ); + emit_add( func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_Z ); + FETCH( func, *inst, 2, 1, CHAN_Z ); + emit_mul( func, 1, 2 ); + emit_add( func, 0, 1 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_DP4: + /* TGSI_OPCODE_DOT4 */ + FETCH( func, *inst, 0, 0, CHAN_X ); + FETCH( func, *inst, 1, 1, CHAN_X ); + emit_mul( func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_Y ); + FETCH( func, *inst, 2, 1, CHAN_Y ); + emit_mul( func, 1, 2 ); + emit_add( func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_Z ); + FETCH( func, *inst, 2, 1, CHAN_Z ); + emit_mul(func, 1, 2 ); + emit_add(func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_W ); + FETCH( func, *inst, 2, 1, CHAN_W ); + emit_mul( func, 1, 2 ); + emit_add( func, 0, 1 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_DST: + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) { + emit_tempf( + func, + 0, + TGSI_EXEC_TEMP_ONE_I, + TGSI_EXEC_TEMP_ONE_C ); + STORE( func, *inst, 0, 0, CHAN_X ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { + FETCH( func, *inst, 0, 0, CHAN_Y ); + FETCH( func, *inst, 1, 1, CHAN_Y ); + emit_mul( func, 0, 1 ); + STORE( func, *inst, 0, 0, CHAN_Y ); } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) { + FETCH( func, *inst, 0, 0, CHAN_Z ); + STORE( func, *inst, 0, 0, CHAN_Z ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) { + FETCH( func, *inst, 0, 1, CHAN_W ); + STORE( func, *inst, 0, 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_MIN: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + sse_minps( + func, + make_xmm( 0 ), + make_xmm( 1 ) ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_MAX: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + sse_maxps( + func, + make_xmm( 0 ), + make_xmm( 1 ) ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_SLT: + /* TGSI_OPCODE_SETLT */ + emit_setcc( func, inst, cc_LessThan ); + break; + + case TGSI_OPCODE_SGE: + /* TGSI_OPCODE_SETGE */ + emit_setcc( func, inst, cc_NotLessThan ); + break; + + case TGSI_OPCODE_MAD: + /* TGSI_OPCODE_MADD */ + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + FETCH( func, *inst, 2, 2, chan_index ); + emit_mul( func, 0, 1 ); + emit_add( func, 0, 2 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_SUB: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + emit_sub( func, 0, 1 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_LERP: + /* TGSI_OPCODE_LRP */ + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + FETCH( func, *inst, 2, 2, chan_index ); + emit_sub( func, 1, 2 ); + emit_mul( func, 0, 1 ); + emit_add( func, 0, 2 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_CND: + assert( 0 ); + break; + + case TGSI_OPCODE_CND0: + assert( 0 ); + break; + + case TGSI_OPCODE_DOT2ADD: + /* TGSI_OPCODE_DP2A */ + assert( 0 ); + break; + + case TGSI_OPCODE_INDEX: + assert( 0 ); + break; + + case TGSI_OPCODE_NEGATE: + assert( 0 ); + break; + + case TGSI_OPCODE_FRAC: + /* TGSI_OPCODE_FRC */ + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + emit_frc( func, 0 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_CLAMP: + assert( 0 ); + break; + + case TGSI_OPCODE_FLOOR: + /* TGSI_OPCODE_FLR */ + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + emit_flr( func, 0 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_ROUND: + assert( 0 ); + break; + + case TGSI_OPCODE_EXPBASE2: + /* TGSI_OPCODE_EX2 */ + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_ex2( func, 0 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_LOGBASE2: + /* TGSI_OPCODE_LG2 */ + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_lg2( func, 0 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_POWER: + /* TGSI_OPCODE_POW */ + FETCH( func, *inst, 0, 0, CHAN_X ); + FETCH( func, *inst, 1, 1, CHAN_X ); + emit_pow( func, 0, 1 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_CROSSPRODUCT: + /* TGSI_OPCODE_XPD */ + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { + FETCH( func, *inst, 1, 1, CHAN_Z ); + FETCH( func, *inst, 3, 0, CHAN_Z ); + } + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { + FETCH( func, *inst, 0, 0, CHAN_Y ); + FETCH( func, *inst, 4, 1, CHAN_Y ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) { + emit_mov( func, 2, 0 ); + emit_mul( func, 2, 1 ); + emit_mov( func, 5, 3 ); + emit_mul( func, 5, 4 ); + emit_sub( func, 2, 5 ); + STORE( func, *inst, 2, 0, CHAN_X ); + } + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { + FETCH( func, *inst, 2, 1, CHAN_X ); + FETCH( func, *inst, 5, 0, CHAN_X ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { + emit_mul( func, 3, 2 ); + emit_mul( func, 1, 5 ); + emit_sub( func, 3, 1 ); + STORE( func, *inst, 3, 0, CHAN_Y ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) { + emit_mul( func, 5, 4 ); + emit_mul( func, 0, 2 ); + emit_sub( func, 5, 0 ); + STORE( func, *inst, 5, 0, CHAN_Z ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) { + FETCH( func, *inst, 0, TGSI_EXEC_TEMP_ONE_I, TGSI_EXEC_TEMP_ONE_C ); + STORE( func, *inst, 0, 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_MULTIPLYMATRIX: + assert( 0 ); + break; + + case TGSI_OPCODE_ABS: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + emit_abs( func, 0) ; + + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_RCC: + assert( 0 ); + break; + + case TGSI_OPCODE_DPH: + FETCH( func, *inst, 0, 0, CHAN_X ); + FETCH( func, *inst, 1, 1, CHAN_X ); + emit_mul( func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_Y ); + FETCH( func, *inst, 2, 1, CHAN_Y ); + emit_mul( func, 1, 2 ); + emit_add( func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_Z ); + FETCH( func, *inst, 2, 1, CHAN_Z ); + emit_mul( func, 1, 2 ); + emit_add( func, 0, 1 ); + FETCH( func, *inst, 1, 1, CHAN_W ); + emit_add( func, 0, 1 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_COS: + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_cos( func, 0 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_DDX: + assert( 0 ); + break; + + case TGSI_OPCODE_DDY: + assert( 0 ); + break; + + case TGSI_OPCODE_KIL: + emit_kil( func, &inst->FullSrcRegisters[0] ); + break; + + case TGSI_OPCODE_PK2H: + assert( 0 ); + break; + + case TGSI_OPCODE_PK2US: + assert( 0 ); + break; + + case TGSI_OPCODE_PK4B: + assert( 0 ); + break; + + case TGSI_OPCODE_PK4UB: + assert( 0 ); + break; + + case TGSI_OPCODE_RFL: + assert( 0 ); + break; + + case TGSI_OPCODE_SEQ: + assert( 0 ); + break; + + case TGSI_OPCODE_SFL: + assert( 0 ); + break; + + case TGSI_OPCODE_SGT: + assert( 0 ); + break; + + case TGSI_OPCODE_SIN: + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_sin( func, 0 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_SLE: + assert( 0 ); + break; + + case TGSI_OPCODE_SNE: + assert( 0 ); + break; + + case TGSI_OPCODE_STR: + assert( 0 ); + break; + + case TGSI_OPCODE_TEX: + assert( 0 ); + break; + + case TGSI_OPCODE_TXD: + assert( 0 ); + break; + + case TGSI_OPCODE_UP2H: + assert( 0 ); + break; + + case TGSI_OPCODE_UP2US: + assert( 0 ); + break; + + case TGSI_OPCODE_UP4B: + assert( 0 ); break; - case TGSI_OPCODE_MOV: - /* TGSI_OPCODE_SWZ */ - FOR_EACH_ENABLED_CHANNEL - { - FETCH(0, 0, chan_index); - STORE(0, 0, chan_index); - } - break; - - case TGSI_OPCODE_LIT: - if (IS_CHANNEL_ENABLED(CHAN_X) || IS_CHANNEL_ENABLED(CHAN_W)) - { - emit_tempf (func, 0, TGSI_EXEC_TEMP_ONE_I, TGSI_EXEC_TEMP_ONE_C); - if (IS_CHANNEL_ENABLED(CHAN_X)) - STORE(0, 0, CHAN_X); - if (IS_CHANNEL_ENABLED(CHAN_W)) - STORE(0, 0, CHAN_W); - } - if (IS_CHANNEL_ENABLED(CHAN_Y) || IS_CHANNEL_ENABLED(CHAN_Z)) - { - if (IS_CHANNEL_ENABLED(CHAN_Y)) - { - FETCH(0, 0, CHAN_X); - sse_maxps (func, - make_xmm (0), - get_temp (TGSI_EXEC_TEMP_00000000_I, - TGSI_EXEC_TEMP_00000000_C)); - STORE(0, 0, CHAN_Y); - } - if (IS_CHANNEL_ENABLED(CHAN_Z)) - { - FETCH(1, 0, CHAN_Y); - sse_maxps (func, - make_xmm (1), - get_temp (TGSI_EXEC_TEMP_00000000_I, - TGSI_EXEC_TEMP_00000000_C)); - - FETCH(2, 0, CHAN_W); - sse_minps (func, - make_xmm (2), - get_temp (TGSI_EXEC_TEMP_128_I, - TGSI_EXEC_TEMP_128_C)); - sse_maxps (func, - make_xmm (2), - get_temp (TGSI_EXEC_TEMP_MINUS_128_I, - TGSI_EXEC_TEMP_MINUS_128_C)); - - emit_pow (func, 1, 2); - - FETCH(0, 0, CHAN_X); - sse_xorps (func, make_xmm (2), make_xmm (2)); - sse_cmpps (func, - make_xmm (2), - make_xmm (0), - cc_LessThanEqual); - sse_andps (func, - make_xmm (2), - make_xmm (1)); - - emit_store (func, 2, &inst->FullDstRegisters[0], inst, CHAN_Z); - } - } - break; - - case TGSI_OPCODE_RCP: - /* TGSI_OPCODE_RECIP */ - FETCH(0, 0, CHAN_X); - emit_rcp (func, 0, 0); - - FOR_EACH_ENABLED_CHANNEL - { - STORE(0, 0, chan_index); - } - break; - - case TGSI_OPCODE_RSQ: - /* TGSI_OPCODE_RECIPSQRT */ - FETCH(0, 0, CHAN_X); - emit_rsqrt (func, 0, 0); - - FOR_EACH_ENABLED_CHANNEL - { - STORE(0, 0, chan_index); - } - break; - - case TGSI_OPCODE_EXP: - assert (0); - break; - - case TGSI_OPCODE_LOG: - assert (0); - break; - - case TGSI_OPCODE_MUL: - FOR_EACH_ENABLED_CHANNEL - { - FETCH(0, 0, chan_index); - FETCH(1, 1, chan_index); - emit_mul (func, 0, 1); - - STORE(0, 0, chan_index); - } - break; - - case TGSI_OPCODE_ADD: - FOR_EACH_ENABLED_CHANNEL - { - FETCH(0, 0, chan_index); - FETCH(1, 1, chan_index); - emit_add (func, 0, 1); - - STORE(0, 0, chan_index); - } - break; - - case TGSI_OPCODE_DP3: - /* TGSI_OPCODE_DOT3 */ - FETCH(0, 0, CHAN_X); - FETCH(1, 1, CHAN_X); - emit_mul (func, 0, 1); - - FETCH(1, 0, CHAN_Y); - FETCH(2, 1, CHAN_Y); - emit_mul (func, 1, 2); - emit_add (func, 0, 1); - - FETCH(1, 0, CHAN_Z); - FETCH(2, 1, CHAN_Z); - emit_mul (func, 1, 2); - emit_add (func, 0, 1); - - FOR_EACH_ENABLED_CHANNEL - { - STORE(0, 0, chan_index); - } - break; - - case TGSI_OPCODE_DP4: - /* TGSI_OPCODE_DOT4 */ - FETCH(0, 0, CHAN_X); - FETCH(1, 1, CHAN_X); - emit_mul (func, 0, 1); - - FETCH(1, 0, CHAN_Y); - FETCH(2, 1, CHAN_Y); - emit_mul (func, 1, 2); - emit_add (func, 0, 1); - - FETCH(1, 0, CHAN_Z); - FETCH(2, 1, CHAN_Z); - emit_mul (func, 1, 2); - emit_add (func, 0, 1); - - FETCH(1, 0, CHAN_W); - FETCH(2, 1, CHAN_W); - emit_mul (func, 1, 2); - emit_add (func, 0, 1); - - FOR_EACH_ENABLED_CHANNEL - { - STORE(0, 0, chan_index); - } - break; - - case TGSI_OPCODE_DST: - IF_IS_CHANNEL_ENABLED(CHAN_X) - { - emit_tempf (func, - 0, - TGSI_EXEC_TEMP_ONE_I, - TGSI_EXEC_TEMP_ONE_C); - STORE(0, 0, CHAN_X); - } - IF_IS_CHANNEL_ENABLED(CHAN_Y) - { - FETCH(0, 0, CHAN_Y); - FETCH(1, 1, CHAN_Y); - emit_mul (func, 0, 1); - STORE(0, 0, CHAN_Y); - } - IF_IS_CHANNEL_ENABLED(CHAN_Z) - { - FETCH(0, 0, CHAN_Z); - STORE(0, 0, CHAN_Z); - } - IF_IS_CHANNEL_ENABLED(CHAN_W) - { - FETCH(0, 1, CHAN_W); - STORE(0, 0, CHAN_W); - } - break; - - case TGSI_OPCODE_MIN: - FOR_EACH_ENABLED_CHANNEL - { - FETCH(0, 0, chan_index); - FETCH(1, 1, chan_index); - sse_minps (func, - make_xmm (0), - make_xmm (1)); - STORE(0, 0, chan_index); - } - break; - - case TGSI_OPCODE_MAX: - FOR_EACH_ENABLED_CHANNEL - { - FETCH(0, 0, chan_index); - FETCH(1, 1, chan_index); - sse_maxps (func, - make_xmm (0), - make_xmm (1)); - STORE(0, 0, chan_index); - } - break; - - case TGSI_OPCODE_SLT: - /* TGSI_OPCODE_SETLT */ - emit_setcc (func, inst, cc_LessThan); - break; - - case TGSI_OPCODE_SGE: - /* TGSI_OPCODE_SETGE */ - emit_setcc (func, inst, cc_NotLessThan); - break; - - case TGSI_OPCODE_MAD: - /* TGSI_OPCODE_MADD */ - FOR_EACH_ENABLED_CHANNEL - { - FETCH(0, 0, chan_index); - FETCH(1, 1, chan_index); - FETCH(2, 2, chan_index); - emit_mul (func, 0, 1); - emit_add (func, 0, 2); - - STORE(0, 0, chan_index); - } - break; - - case TGSI_OPCODE_SUB: - FOR_EACH_ENABLED_CHANNEL - { - FETCH(0, 0, chan_index); - FETCH(1, 1, chan_index); - emit_sub (func, 0, 1); - - STORE(0, 0, chan_index); - } - break; - - case TGSI_OPCODE_LERP: - /* TGSI_OPCODE_LRP */ - FOR_EACH_ENABLED_CHANNEL - { - FETCH(0, 0, chan_index); - FETCH(1, 1, chan_index); - FETCH(2, 2, chan_index); - emit_sub (func, 1, 2); - emit_mul (func, 0, 1); - emit_add (func, 0, 2); - - STORE(0, 0, chan_index); - } - break; - - case TGSI_OPCODE_CND: - assert (0); - break; - - case TGSI_OPCODE_CND0: - assert (0); - break; - - case TGSI_OPCODE_DOT2ADD: - /* TGSI_OPCODE_DP2A */ - assert (0); - break; - - case TGSI_OPCODE_INDEX: - assert (0); - break; - - case TGSI_OPCODE_NEGATE: - assert (0); - break; - - case TGSI_OPCODE_FRAC: - /* TGSI_OPCODE_FRC */ - FOR_EACH_ENABLED_CHANNEL - { - FETCH(0, 0, chan_index); - emit_frc (func, 0); - STORE(0, 0, chan_index); - } - break; - - case TGSI_OPCODE_CLAMP: - assert (0); - break; - - case TGSI_OPCODE_FLOOR: - /* TGSI_OPCODE_FLR */ - FOR_EACH_ENABLED_CHANNEL - { - FETCH(0, 0, chan_index); - emit_flr (func, 0); - STORE(0, 0, chan_index); - } - break; - - case TGSI_OPCODE_ROUND: - assert (0); - break; - - case TGSI_OPCODE_EXPBASE2: - /* TGSI_OPCODE_EX2 */ - FETCH(0, 0, CHAN_X); - emit_ex2 (func, 0); - - FOR_EACH_ENABLED_CHANNEL - { - STORE(0, 0, chan_index); - } - break; - - case TGSI_OPCODE_LOGBASE2: - /* TGSI_OPCODE_LG2 */ - FETCH(0, 0, CHAN_X); - emit_lg2 (func, 0); - - FOR_EACH_ENABLED_CHANNEL - { - STORE(0, 0, chan_index); - } - break; - - case TGSI_OPCODE_POWER: - /* TGSI_OPCODE_POW */ - FETCH(0, 0, CHAN_X); - FETCH(1, 1, CHAN_X); - emit_pow (func, 0, 1); - - FOR_EACH_ENABLED_CHANNEL - { - STORE(0, 0, chan_index); - } - break; - - case TGSI_OPCODE_CROSSPRODUCT: - /* TGSI_OPCODE_XPD */ - if (IS_CHANNEL_ENABLED(CHAN_X) || IS_CHANNEL_ENABLED(CHAN_Y)) - { - FETCH(1, 1, CHAN_Z); - FETCH(3, 0, CHAN_Z); - } - if (IS_CHANNEL_ENABLED(CHAN_X) || IS_CHANNEL_ENABLED(CHAN_Z)) - { - FETCH(0, 0, CHAN_Y); - FETCH(4, 1, CHAN_Y); - } - IF_IS_CHANNEL_ENABLED(CHAN_X) - { - emit_mov (func, 2, 0); - emit_mul (func, 2, 1); - emit_mov (func, 5, 3); - emit_mul (func, 5, 4); - emit_sub (func, 2, 5); - STORE(2, 0, CHAN_X); - } - - if (IS_CHANNEL_ENABLED(CHAN_Y) || IS_CHANNEL_ENABLED(CHAN_Z)) - { - FETCH(2, 1, CHAN_X); - FETCH(5, 0, CHAN_X); - } - IF_IS_CHANNEL_ENABLED(CHAN_Y) - { - emit_mul (func, 3, 2); - emit_mul (func, 1, 5); - emit_sub (func, 3, 1); - STORE(3, 0, CHAN_Y); - } - - IF_IS_CHANNEL_ENABLED(CHAN_Z) - { - emit_mul (func, 5, 4); - emit_mul (func, 0, 2); - emit_sub (func, 5, 0); - STORE(5, 0, CHAN_Z); - } - - IF_IS_CHANNEL_ENABLED(CHAN_W) - { - FETCH(0, TGSI_EXEC_TEMP_ONE_I, TGSI_EXEC_TEMP_ONE_C); - STORE(0, 0, CHAN_W); - } - break; - - case TGSI_OPCODE_MULTIPLYMATRIX: - assert (0); - break; - - case TGSI_OPCODE_ABS: - FOR_EACH_ENABLED_CHANNEL - { - FETCH(0, 0, chan_index); - emit_abs (func, 0); - - STORE(0, 0, chan_index); - } - break; - - case TGSI_OPCODE_RCC: - assert (0); - break; - - case TGSI_OPCODE_DPH: - FETCH(0, 0, CHAN_X); - FETCH(1, 1, CHAN_X); - emit_mul (func, 0, 1); - - FETCH(1, 0, CHAN_Y); - FETCH(2, 1, CHAN_Y); - emit_mul (func, 1, 2); - emit_add (func, 0, 1); - - FETCH(1, 0, CHAN_Z); - FETCH(2, 1, CHAN_Z); - emit_mul (func, 1, 2); - emit_add (func, 0, 1); - - FETCH(1, 1, CHAN_W); - emit_add (func, 0, 1); - - FOR_EACH_ENABLED_CHANNEL - { - STORE(0, 0, chan_index); - } - break; - - case TGSI_OPCODE_COS: - FETCH(0, 0, CHAN_X); - emit_cos (func, 0); - - FOR_EACH_ENABLED_CHANNEL - { - STORE(0, 0, chan_index); - } - break; - - case TGSI_OPCODE_DDX: - assert (0); - break; - - case TGSI_OPCODE_DDY: - assert (0); - break; - - case TGSI_OPCODE_KIL: - emit_kil (func, &inst->FullSrcRegisters[0]); - break; - - case TGSI_OPCODE_PK2H: - assert (0); - break; - - case TGSI_OPCODE_PK2US: - assert (0); - break; - - case TGSI_OPCODE_PK4B: - assert (0); - break; - - case TGSI_OPCODE_PK4UB: - assert (0); - break; - - case TGSI_OPCODE_RFL: - assert (0); - break; - - case TGSI_OPCODE_SEQ: - assert (0); - break; - - case TGSI_OPCODE_SFL: - assert (0); - break; - - case TGSI_OPCODE_SGT: - assert (0); - break; - - case TGSI_OPCODE_SIN: - FETCH(0, 0, CHAN_X); - emit_sin (func, 0); - - FOR_EACH_ENABLED_CHANNEL - { - STORE(0, 0, chan_index); - } - break; - - case TGSI_OPCODE_SLE: - assert (0); - break; - - case TGSI_OPCODE_SNE: - assert (0); - break; - - case TGSI_OPCODE_STR: - assert (0); - break; - - case TGSI_OPCODE_TEX: - assert (0); - break; - - case TGSI_OPCODE_TXD: - assert (0); - break; - - case TGSI_OPCODE_UP2H: - assert (0); - break; - - case TGSI_OPCODE_UP2US: - assert (0); - break; - - case TGSI_OPCODE_UP4B: - assert (0); - break; - - case TGSI_OPCODE_UP4UB: - assert (0); - break; - - case TGSI_OPCODE_X2D: - assert (0); - break; - - case TGSI_OPCODE_ARA: - assert (0); - break; - - case TGSI_OPCODE_ARR: - assert (0); - break; - - case TGSI_OPCODE_BRA: - assert (0); - break; - - case TGSI_OPCODE_CAL: - assert (0); - break; - - case TGSI_OPCODE_RET: - assert (0); - break; - - case TGSI_OPCODE_SSG: - assert (0); - break; - - case TGSI_OPCODE_CMP: - emit_cmp (func, inst); - break; - - case TGSI_OPCODE_SCS: - IF_IS_CHANNEL_ENABLED(CHAN_X) - { - FETCH(0, 0, CHAN_X); - emit_cos (func, 0); - STORE(0, 0, CHAN_X); - } - - IF_IS_CHANNEL_ENABLED(CHAN_Y) - { - FETCH(0, 0, CHAN_Y); - emit_sin (func, 0); - STORE(0, 0, CHAN_Y); - } - - IF_IS_CHANNEL_ENABLED(CHAN_Z) - { - FETCH(0, TGSI_EXEC_TEMP_00000000_I, TGSI_EXEC_TEMP_00000000_C); - STORE(0, 0, CHAN_Z); - } - - IF_IS_CHANNEL_ENABLED(CHAN_W) - { - FETCH(0, TGSI_EXEC_TEMP_ONE_I, TGSI_EXEC_TEMP_ONE_C); - STORE(0, 0, CHAN_W); - } - break; - - case TGSI_OPCODE_TXB: - assert (0); - break; - - case TGSI_OPCODE_NRM: - assert (0); - break; - - case TGSI_OPCODE_DIV: - assert (0); - break; - - case TGSI_OPCODE_DP2: - assert (0); - break; - - case TGSI_OPCODE_TXL: - assert (0); - break; - - case TGSI_OPCODE_BRK: - assert (0); - break; - - case TGSI_OPCODE_IF: - assert (0); - break; - - case TGSI_OPCODE_LOOP: - assert (0); - break; - - case TGSI_OPCODE_REP: - assert (0); - break; - - case TGSI_OPCODE_ELSE: - assert (0); - break; - - case TGSI_OPCODE_ENDIF: - assert (0); - break; - - case TGSI_OPCODE_ENDLOOP: - assert (0); - break; - - case TGSI_OPCODE_ENDREP: - assert (0); - break; - - case TGSI_OPCODE_PUSHA: - assert (0); - break; - - case TGSI_OPCODE_POPA: - assert (0); - break; - - case TGSI_OPCODE_CEIL: - assert (0); - break; - - case TGSI_OPCODE_I2F: - assert (0); - break; - - case TGSI_OPCODE_NOT: - assert (0); - break; - - case TGSI_OPCODE_TRUNC: - assert (0); - break; - - case TGSI_OPCODE_SHL: - assert (0); - break; - - case TGSI_OPCODE_SHR: - assert (0); - break; - - case TGSI_OPCODE_AND: - assert (0); - break; - - case TGSI_OPCODE_OR: - assert (0); - break; - - case TGSI_OPCODE_MOD: - assert (0); - break; - - case TGSI_OPCODE_XOR: - assert (0); - break; + case TGSI_OPCODE_UP4UB: + assert( 0 ); + break; - case TGSI_OPCODE_SAD: - assert (0); - break; + case TGSI_OPCODE_X2D: + assert( 0 ); + break; - case TGSI_OPCODE_TXF: - assert (0); - break; + case TGSI_OPCODE_ARA: + assert( 0 ); + break; - case TGSI_OPCODE_TXQ: - assert (0); - break; + case TGSI_OPCODE_ARR: + assert( 0 ); + break; - case TGSI_OPCODE_CONT: - assert (0); - break; + case TGSI_OPCODE_BRA: + assert( 0 ); + break; - case TGSI_OPCODE_EMIT: - assert (0); - break; + case TGSI_OPCODE_CAL: + assert( 0 ); + break; - case TGSI_OPCODE_ENDPRIM: - assert (0); - break; + case TGSI_OPCODE_RET: + assert( 0 ); + break; - default: - assert (0); - } + case TGSI_OPCODE_SSG: + assert( 0 ); + break; + + case TGSI_OPCODE_CMP: + emit_cmp (func, inst); + break; + + case TGSI_OPCODE_SCS: + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) { + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_cos( func, 0 ); + STORE( func, *inst, 0, 0, CHAN_X ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { + FETCH( func, *inst, 0, 0, CHAN_Y ); + emit_sin( func, 0 ); + STORE( func, *inst, 0, 0, CHAN_Y ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) { + FETCH( func, *inst, 0, TGSI_EXEC_TEMP_00000000_I, TGSI_EXEC_TEMP_00000000_C ); + STORE( func, *inst, 0, 0, CHAN_Z ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) { + FETCH( func, *inst, 0, TGSI_EXEC_TEMP_ONE_I, TGSI_EXEC_TEMP_ONE_C ); + STORE( func, *inst, 0, 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_TXB: + assert( 0 ); + break; + + case TGSI_OPCODE_NRM: + assert( 0 ); + break; + + case TGSI_OPCODE_DIV: + assert( 0 ); + break; + + case TGSI_OPCODE_DP2: + assert( 0 ); + break; + + case TGSI_OPCODE_TXL: + assert( 0 ); + break; + + case TGSI_OPCODE_BRK: + assert( 0 ); + break; + + case TGSI_OPCODE_IF: + assert( 0 ); + break; + + case TGSI_OPCODE_LOOP: + assert( 0 ); + break; + + case TGSI_OPCODE_REP: + assert( 0 ); + break; + + case TGSI_OPCODE_ELSE: + assert( 0 ); + break; + + case TGSI_OPCODE_ENDIF: + assert( 0 ); + break; + + case TGSI_OPCODE_ENDLOOP: + assert( 0 ); + break; + + case TGSI_OPCODE_ENDREP: + assert( 0 ); + break; + + case TGSI_OPCODE_PUSHA: + assert( 0 ); + break; + + case TGSI_OPCODE_POPA: + assert( 0 ); + break; + + case TGSI_OPCODE_CEIL: + assert( 0 ); + break; + + case TGSI_OPCODE_I2F: + assert( 0 ); + break; + + case TGSI_OPCODE_NOT: + assert( 0 ); + break; + + case TGSI_OPCODE_TRUNC: + assert( 0 ); + break; + + case TGSI_OPCODE_SHL: + assert( 0 ); + break; + + case TGSI_OPCODE_SHR: + assert( 0 ); + break; + + case TGSI_OPCODE_AND: + assert( 0 ); + break; + + case TGSI_OPCODE_OR: + assert( 0 ); + break; + + case TGSI_OPCODE_MOD: + assert( 0 ); + break; + + case TGSI_OPCODE_XOR: + assert( 0 ); + break; + + case TGSI_OPCODE_SAD: + assert( 0 ); + break; + + case TGSI_OPCODE_TXF: + assert( 0 ); + break; + + case TGSI_OPCODE_TXQ: + assert( 0 ); + break; + + case TGSI_OPCODE_CONT: + assert( 0 ); + break; + + case TGSI_OPCODE_EMIT: + assert( 0 ); + break; + + case TGSI_OPCODE_ENDPRIM: + assert( 0 ); + break; + + default: + assert( 0 ); + } } -GLboolean -tgsi_emit_sse (struct tgsi_token *tokens, - struct x86_function *function) +unsigned +tgsi_emit_sse( + struct tgsi_token *tokens, + struct x86_function *func ) { - struct tgsi_parse_context parse; + struct tgsi_parse_context parse; - x86_init_func (function); + x86_init_func( func ); - x86_mov (function, get_input_base (), get_argument (0)); - x86_mov (function, get_output_base (), get_argument (1)); - x86_mov (function, get_const_base (), get_argument (2)); - x86_mov (function, get_temp_base (), get_argument (3)); + x86_mov( + func, + get_input_base(), + get_argument( 0 ) ); + x86_mov( + func, + get_output_base(), + get_argument( 1 ) ); + x86_mov( + func, + get_const_base(), + get_argument( 2 ) ); + x86_mov( + func, + get_temp_base(), + get_argument( 3 ) ); - tgsi_parse_init (&parse, tokens); + tgsi_parse_init( &parse, tokens ); - while (!tgsi_parse_end_of_tokens (&parse)) - { - tgsi_parse_token (&parse); + while( !tgsi_parse_end_of_tokens( &parse ) ) { + tgsi_parse_token( &parse ); - switch (parse.FullToken.Token.Type) - { - case TGSI_TOKEN_TYPE_DECLARATION: - break; + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + break; - case TGSI_TOKEN_TYPE_INSTRUCTION: - emit_instruction (function, &parse.FullToken.FullInstruction); - break; + case TGSI_TOKEN_TYPE_INSTRUCTION: + emit_instruction( + func, + &parse.FullToken.FullInstruction ); + break; - default: - assert (0); - } - } + default: + assert( 0 ); + } + } - tgsi_parse_free (&parse); + tgsi_parse_free( &parse ); #ifdef WIN32 - x86_retw (function, 16); + x86_retw( func, 16 ); #else - x86_ret (function); + x86_ret( func ); #endif - return GL_FALSE; + return 1; } #endif diff --git a/src/mesa/x86/rtasm/x86sse.c b/src/mesa/x86/rtasm/x86sse.c index 3ea37bb5e7..f5b0ccdb9b 100644 --- a/src/mesa/x86/rtasm/x86sse.c +++ b/src/mesa/x86/rtasm/x86sse.c @@ -502,6 +502,14 @@ void sse_addss( struct x86_function *p, emit_modrm( p, dst, src ); } +void sse_andnps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x55); + emit_modrm( p, dst, src ); +} + void sse_andps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) @@ -510,6 +518,13 @@ void sse_andps( struct x86_function *p, emit_modrm( p, dst, src ); } +void sse_rsqrtps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x52); + emit_modrm( p, dst, src ); +} void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, @@ -538,6 +553,21 @@ void sse_movlhps( struct x86_function *p, emit_modrm( p, dst, src ); } +void sse_orps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x56); + emit_modrm( p, dst, src ); +} + +void sse_xorps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x57); + emit_modrm( p, dst, src ); +} void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, @@ -576,6 +606,14 @@ void sse_cmpps( struct x86_function *p, emit_1ub(p, cc); } +void sse_pmovmskb( struct x86_function *p, + struct x86_reg dest, + struct x86_reg src) +{ + emit_3ub(p, 0x66, X86_TWOB, 0xD7); + emit_modrm(p, dest, src); +} + /*********************************************************************** * SSE2 instructions */ @@ -593,6 +631,14 @@ void sse2_pshufd( struct x86_function *p, emit_1ub(p, shuf); } +void sse2_cvttps2dq( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub( p, 0xF3, X86_TWOB, 0x5B ); + emit_modrm( p, dst, src ); +} + void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) @@ -625,6 +671,14 @@ void sse2_packuswb( struct x86_function *p, emit_modrm( p, dst, src ); } +void sse2_rcpps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x53); + emit_modrm( p, dst, src ); +} + void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) diff --git a/src/mesa/x86/rtasm/x86sse.h b/src/mesa/x86/rtasm/x86sse.h index 66fb852ac9..c6236395b2 100644 --- a/src/mesa/x86/rtasm/x86sse.h +++ b/src/mesa/x86/rtasm/x86sse.h @@ -142,17 +142,20 @@ void mmx_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg sr void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse2_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, GLubyte shuf ); +void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_addps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_addss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_divss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_andnps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_andps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_cmpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src, GLubyte cc ); void sse_maxps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); @@ -167,9 +170,13 @@ void sse_movss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) void sse_movups( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_mulps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_mulss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_orps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_xorps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_subps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_rsqrtps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, GLubyte shuf ); +void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src ); void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_and( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |