diff options
-rwxr-xr-x | src/mesa/pipe/tgsi/exec/tgsi_sse2.c | 484 |
1 files changed, 264 insertions, 220 deletions
diff --git a/src/mesa/pipe/tgsi/exec/tgsi_sse2.c b/src/mesa/pipe/tgsi/exec/tgsi_sse2.c index b8edcf0a2e..abdebd6f97 100755 --- a/src/mesa/pipe/tgsi/exec/tgsi_sse2.c +++ b/src/mesa/pipe/tgsi/exec/tgsi_sse2.c @@ -22,14 +22,9 @@ #define TEMP_R0 TGSI_EXEC_TEMP_R0 -static struct x86_reg -get_argument( - unsigned index ) -{ - return x86_make_disp( - x86_make_reg( file_REG32, reg_SP ), - (index + 1) * 4 ); -} +/** + * X86 utility functions. + */ static struct x86_reg make_xmm( @@ -40,6 +35,10 @@ make_xmm( (enum x86_reg_name) xmm ); } +/** + * X86 register mapping helpers. + */ + static struct x86_reg get_const_base( void ) { @@ -49,16 +48,6 @@ get_const_base( void ) } static struct x86_reg -get_const( - unsigned vec, - unsigned chan ) -{ - return x86_make_disp( - get_const_base(), - (vec * 4 + chan) * 4 ); -} - -static struct x86_reg get_input_base( void ) { return x86_make_reg( @@ -67,55 +56,78 @@ get_input_base( void ) } static struct x86_reg -get_input( - unsigned vec, - unsigned chan ) +get_output_base( void ) { - return x86_make_disp( - get_input_base(), - (vec * 4 + chan) * 16 ); + return x86_make_reg( + file_REG32, + reg_DX ); } static struct x86_reg -get_output_base( void ) +get_temp_base( void ) { return x86_make_reg( file_REG32, - reg_DX ); + reg_BX ); } static struct x86_reg -get_output( +get_coef_base( void ) +{ + return get_output_base(); +} + +/** + * Data access helpers. + */ + +static struct x86_reg +get_argument( + unsigned index ) +{ + return x86_make_disp( + x86_make_reg( file_REG32, reg_SP ), + (index + 1) * 4 ); +} + +static struct x86_reg +get_const( unsigned vec, unsigned chan ) { return x86_make_disp( - get_output_base(), - (vec * 4 + chan) * 16 ); + get_const_base(), + (vec * 4 + chan) * 4 ); } static struct x86_reg -get_temp_base( void ) +get_input( + unsigned vec, + unsigned chan ) { - return x86_make_reg( - file_REG32, - reg_BX ); + return x86_make_disp( + get_input_base(), + (vec * 4 + chan) * 16 ); } static struct x86_reg -get_temp( +get_output( unsigned vec, unsigned chan ) { return x86_make_disp( - get_temp_base(), + get_output_base(), (vec * 4 + chan) * 16 ); } static struct x86_reg -get_coef_base( void ) +get_temp( + unsigned vec, + unsigned chan ) { - return get_output_base(); + return x86_make_disp( + get_temp_base(), + (vec * 4 + chan) * 16 ); } static struct x86_reg @@ -129,6 +141,10 @@ get_coef( ((vec * 3 + member) * 4 + chan) * 4 ); } +/** + * Data fetch helpers. + */ + static void emit_const( struct x86_function *func, @@ -161,19 +177,6 @@ emit_inputf( } static void -emit_inputs( - struct x86_function *func, - unsigned xmm, - unsigned vec, - unsigned chan ) -{ - sse_movups( - func, - get_input( vec, chan ), - make_xmm( xmm ) ); -} - -static void emit_output( struct x86_function *func, unsigned xmm, @@ -200,19 +203,6 @@ emit_tempf( } static void -emit_temps( - struct x86_function *func, - unsigned xmm, - unsigned vec, - unsigned chan ) -{ - sse_movaps( - func, - get_temp( vec, chan ), - make_xmm( xmm ) ); -} - -static void emit_coef( struct x86_function *func, unsigned xmm, @@ -231,49 +221,34 @@ emit_coef( SHUF( 0, 0, 0, 0 ) ); } -static void -emit_coef_a0( - struct x86_function *func, - unsigned xmm, - unsigned vec, - unsigned chan ) -{ - emit_coef( - func, - xmm, - vec, - chan, - 0 ); -} +/** + * Data store helpers. + */ static void -emit_coef_dadx( +emit_inputs( struct x86_function *func, unsigned xmm, unsigned vec, unsigned chan ) { - emit_coef( + sse_movups( func, - xmm, - vec, - chan, - 1 ); + get_input( vec, chan ), + make_xmm( xmm ) ); } static void -emit_coef_dady( +emit_temps( struct x86_function *func, unsigned xmm, unsigned vec, unsigned chan ) { - emit_coef( + sse_movaps( func, - xmm, - vec, - chan, - 2 ); + get_temp( vec, chan ), + make_xmm( xmm ) ); } static void @@ -290,57 +265,59 @@ emit_addrs( chan ); } -static void -emit_abs( - struct x86_function *func, - unsigned xmm ) -{ - sse_andps( - func, - make_xmm( xmm ), - get_temp( - TGSI_EXEC_TEMP_7FFFFFFF_I, - TGSI_EXEC_TEMP_7FFFFFFF_C ) ); -} +/** + * Coefficent fetch helpers. + */ static void -emit_neg( +emit_coef_a0( struct x86_function *func, - unsigned xmm ) + unsigned xmm, + unsigned vec, + unsigned chan ) { - sse_xorps( + emit_coef( func, - make_xmm( xmm ), - get_temp( - TGSI_EXEC_TEMP_80000000_I, - TGSI_EXEC_TEMP_80000000_C ) ); + xmm, + vec, + chan, + 0 ); } static void -emit_setsign( +emit_coef_dadx( struct x86_function *func, - unsigned xmm ) + unsigned xmm, + unsigned vec, + unsigned chan ) { - sse_orps( + emit_coef( func, - make_xmm( xmm ), - get_temp( - TGSI_EXEC_TEMP_80000000_I, - TGSI_EXEC_TEMP_80000000_C ) ); + xmm, + vec, + chan, + 1 ); } static void -emit_add( +emit_coef_dady( struct x86_function *func, - unsigned xmm_dst, - unsigned xmm_src ) + unsigned xmm, + unsigned vec, + unsigned chan ) { - sse_addps( + emit_coef( func, - make_xmm( xmm_dst ), - make_xmm( xmm_src ) ); + xmm, + vec, + chan, + 2 ); } +/** + * Function call helpers. + */ + static void emit_push_gp( struct x86_function *func ) @@ -433,6 +410,35 @@ emit_func_call_dst_src( code ); } +/** + * Low-level instruction translators. + */ + +static void +emit_abs( + struct x86_function *func, + unsigned xmm ) +{ + sse_andps( + func, + make_xmm( xmm ), + get_temp( + TGSI_EXEC_TEMP_7FFFFFFF_I, + TGSI_EXEC_TEMP_7FFFFFFF_C ) ); +} + +static void +emit_add( + struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src ) +{ + sse_addps( + func, + make_xmm( xmm_dst ), + make_xmm( xmm_src ) ); +} + static void XSTDCALL cos4f( float *store ) @@ -463,114 +469,95 @@ emit_cos( } static void XSTDCALL -sin4f( +ex24f( float *store ) { #ifdef WIN32 - store[0] = (float) sin( (double) store[0] ); - store[1] = (float) sin( (double) store[1] ); - store[2] = (float) sin( (double) store[2] ); - store[3] = (float) sin( (double) store[3] ); + store[0] = (float) pow( 2.0, (double) store[0] ); + store[1] = (float) pow( 2.0, (double) store[1] ); + store[2] = (float) pow( 2.0, (double) store[2] ); + store[3] = (float) pow( 2.0, (double) store[3] ); #else const unsigned X = TEMP_R0 * 16; - store[X + 0] = sinf( store[X + 0] ); - store[X + 1] = sinf( store[X + 1] ); - store[X + 2] = sinf( store[X + 2] ); - store[X + 3] = sinf( store[X + 3] ); + store[X + 0] = powf( 2.0f, store[X + 0] ); + store[X + 1] = powf( 2.0f, store[X + 1] ); + store[X + 2] = powf( 2.0f, store[X + 2] ); + store[X + 3] = powf( 2.0f, store[X + 3] ); #endif } static void -emit_sin (struct x86_function *func, - unsigned xmm_dst) +emit_ex2( + struct x86_function *func, + unsigned xmm_dst ) { emit_func_call_dst( func, xmm_dst, - sin4f ); + ex24f ); } static void -emit_mov( +emit_f2it( struct x86_function *func, - unsigned xmm_dst, - unsigned xmm_src ) -{ - sse_movups( - func, - make_xmm( xmm_dst ), - make_xmm( xmm_src ) ); -} - -static void -emit_mul (struct x86_function *func, - unsigned xmm_dst, - unsigned xmm_src) + unsigned xmm ) { - sse_mulps( + sse2_cvttps2dq( func, - make_xmm( xmm_dst ), - make_xmm( xmm_src ) ); + make_xmm( xmm ), + make_xmm( xmm ) ); } static void XSTDCALL -pow4f( +flr4f( float *store ) { #ifdef WIN32 - store[0] = (float) pow( (double) store[0], (double) store[4] ); - store[1] = (float) pow( (double) store[1], (double) store[5] ); - store[2] = (float) pow( (double) store[2], (double) store[6] ); - store[3] = (float) pow( (double) store[3], (double) store[7] ); + const unsigned X = 0; #else const unsigned X = TEMP_R0 * 16; - store[X + 0] = powf( store[X + 0], store[X + 4] ); - store[X + 1] = powf( store[X + 1], store[X + 5] ); - store[X + 2] = powf( store[X + 2], store[X + 6] ); - store[X + 3] = powf( store[X + 3], store[X + 7] ); #endif + store[X + 0] = (float) floor( (double) store[X + 0] ); + store[X + 1] = (float) floor( (double) store[X + 1] ); + store[X + 2] = (float) floor( (double) store[X + 2] ); + store[X + 3] = (float) floor( (double) store[X + 3] ); } static void -emit_pow( +emit_flr( struct x86_function *func, - unsigned xmm_dst, - unsigned xmm_src ) + unsigned xmm_dst ) { - emit_func_call_dst_src( + emit_func_call_dst( func, xmm_dst, - xmm_src, - pow4f ); + flr4f ); } static void XSTDCALL -ex24f( +frc4f( float *store ) { #ifdef WIN32 - store[0] = (float) pow( 2.0, (double) store[0] ); - store[1] = (float) pow( 2.0, (double) store[1] ); - store[2] = (float) pow( 2.0, (double) store[2] ); - store[3] = (float) pow( 2.0, (double) store[3] ); + const unsigned X = 0; #else const unsigned X = TEMP_R0 * 16; - store[X + 0] = powf( 2.0f, store[X + 0] ); - store[X + 1] = powf( 2.0f, store[X + 1] ); - store[X + 2] = powf( 2.0f, store[X + 2] ); - store[X + 3] = powf( 2.0f, store[X + 3] ); #endif + store[X + 0] -= (float) floor( (double) store[X + 0] ); + store[X + 1] -= (float) floor( (double) store[X + 1] ); + store[X + 2] -= (float) floor( (double) store[X + 2] ); + store[X + 3] -= (float) floor( (double) store[X + 3] ); } static void -emit_ex2( +emit_frc( struct x86_function *func, unsigned xmm_dst ) { emit_func_call_dst( func, xmm_dst, - ex24f ); + frc4f ); } static void XSTDCALL @@ -599,56 +586,71 @@ emit_lg2( lg24f ); } -static void XSTDCALL -flr4f( - float *store ) +static void +emit_mov( + struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src ) { -#ifdef WIN32 - const unsigned X = 0; -#else - const unsigned X = TEMP_R0 * 16; -#endif - store[X + 0] = (float) floor( (double) store[X + 0] ); - store[X + 1] = (float) floor( (double) store[X + 1] ); - store[X + 2] = (float) floor( (double) store[X + 2] ); - store[X + 3] = (float) floor( (double) store[X + 3] ); + sse_movups( + func, + make_xmm( xmm_dst ), + make_xmm( xmm_src ) ); } static void -emit_flr( +emit_mul (struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src) +{ + sse_mulps( + func, + make_xmm( xmm_dst ), + make_xmm( xmm_src ) ); +} + +static void +emit_neg( struct x86_function *func, - unsigned xmm_dst ) + unsigned xmm ) { - emit_func_call_dst( + sse_xorps( func, - xmm_dst, - flr4f ); + make_xmm( xmm ), + get_temp( + TGSI_EXEC_TEMP_80000000_I, + TGSI_EXEC_TEMP_80000000_C ) ); } static void XSTDCALL -frc4f( +pow4f( float *store ) { #ifdef WIN32 - const unsigned X = 0; + store[0] = (float) pow( (double) store[0], (double) store[4] ); + store[1] = (float) pow( (double) store[1], (double) store[5] ); + store[2] = (float) pow( (double) store[2], (double) store[6] ); + store[3] = (float) pow( (double) store[3], (double) store[7] ); #else const unsigned X = TEMP_R0 * 16; + store[X + 0] = powf( store[X + 0], store[X + 4] ); + store[X + 1] = powf( store[X + 1], store[X + 5] ); + store[X + 2] = powf( store[X + 2], store[X + 6] ); + store[X + 3] = powf( store[X + 3], store[X + 7] ); #endif - store[X + 0] -= (float) floor( (double) store[X + 0] ); - store[X + 1] -= (float) floor( (double) store[X + 1] ); - store[X + 2] -= (float) floor( (double) store[X + 2] ); - store[X + 3] -= (float) floor( (double) store[X + 3] ); } static void -emit_frc( +emit_pow( struct x86_function *func, - unsigned xmm_dst ) + unsigned xmm_dst, + unsigned xmm_src ) { - emit_func_call_dst( + emit_func_call_dst_src( func, xmm_dst, - frc4f ); + xmm_src, + pow4f ); } static void @@ -676,6 +678,47 @@ emit_rsqrt( } static void +emit_setsign( + struct x86_function *func, + unsigned xmm ) +{ + sse_orps( + func, + make_xmm( xmm ), + get_temp( + TGSI_EXEC_TEMP_80000000_I, + TGSI_EXEC_TEMP_80000000_C ) ); +} + +static void XSTDCALL +sin4f( + float *store ) +{ +#ifdef WIN32 + store[0] = (float) sin( (double) store[0] ); + store[1] = (float) sin( (double) store[1] ); + store[2] = (float) sin( (double) store[2] ); + store[3] = (float) sin( (double) store[3] ); +#else + const unsigned X = TEMP_R0 * 16; + store[X + 0] = sinf( store[X + 0] ); + store[X + 1] = sinf( store[X + 1] ); + store[X + 2] = sinf( store[X + 2] ); + store[X + 3] = sinf( store[X + 3] ); +#endif +} + +static void +emit_sin (struct x86_function *func, + unsigned xmm_dst) +{ + emit_func_call_dst( + func, + xmm_dst, + sin4f ); +} + +static void emit_sub( struct x86_function *func, unsigned xmm_dst, @@ -687,6 +730,10 @@ emit_sub( make_xmm( xmm_src ) ); } +/** + * Register fetch. + */ + static void emit_fetch( struct x86_function *func, @@ -769,6 +816,13 @@ emit_fetch( } } +#define FETCH( FUNC, INST, XMM, INDEX, CHAN )\ + emit_fetch( FUNC, XMM, &(INST).FullSrcRegisters[INDEX], CHAN ) + +/** + * Register store. + */ + static void emit_store( struct x86_function *func, @@ -820,6 +874,13 @@ emit_store( } } +#define STORE( FUNC, INST, XMM, INDEX, CHAN )\ + emit_store( FUNC, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN ) + +/** + * High-level instruction translators. + */ + static void emit_kil( struct x86_function *func, @@ -915,12 +976,6 @@ emit_kil( x86_make_reg( file_REG32, reg_AX ) ); } -#define FETCH( FUNC, INST, XMM, INDEX, CHAN )\ - emit_fetch( FUNC, XMM, &(INST).FullSrcRegisters[INDEX], CHAN ) - -#define STORE( FUNC, INST, XMM, INDEX, CHAN )\ - emit_store( FUNC, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN ) - static void emit_setcc( struct x86_function *func, @@ -982,17 +1037,6 @@ emit_cmp( } static void -emit_f2it( - struct x86_function *func, - unsigned xmm ) -{ - sse2_cvttps2dq( - func, - make_xmm( xmm ), - make_xmm( xmm ) ); -} - -static void emit_instruction( struct x86_function *func, struct tgsi_full_instruction *inst ) |