diff options
Diffstat (limited to 'src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c')
-rwxr-xr-x | src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 196 |
1 files changed, 91 insertions, 105 deletions
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index 1138f59997..2fd76a3072 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -103,15 +103,9 @@ get_output_base( void ) static struct x86_reg get_temp_base( void ) { -#ifdef WIN32 return x86_make_reg( file_REG32, reg_BX ); -#else - return x86_make_reg( - file_REG32, - reg_SI ); -#endif } static struct x86_reg @@ -133,14 +127,6 @@ get_immediate_base( void ) * Data access helpers. */ -static struct x86_reg -get_argument( - unsigned index ) -{ - return x86_make_disp( - x86_make_reg( file_REG32, reg_SP ), - (index + 1) * 4 ); -} static struct x86_reg get_immediate( @@ -455,19 +441,13 @@ emit_push_gp( { x86_push( func, - get_const_base() ); + x86_make_reg( file_REG32, reg_AX) ); x86_push( func, - get_input_base() ); + x86_make_reg( file_REG32, reg_CX) ); x86_push( func, - get_output_base() ); - - /* It is important on non-win32 platforms that temp base is pushed last. - */ - x86_push( - func, - get_temp_base() ); + x86_make_reg( file_REG32, reg_DX) ); } static void @@ -478,16 +458,13 @@ x86_pop_gp( */ x86_pop( func, - get_temp_base() ); - x86_pop( - func, - get_output_base() ); + x86_make_reg( file_REG32, reg_DX) ); x86_pop( func, - get_input_base() ); + x86_make_reg( file_REG32, reg_CX) ); x86_pop( func, - get_const_base() ); + x86_make_reg( file_REG32, reg_AX) ); } static void @@ -504,19 +481,23 @@ emit_func_call_dst( emit_push_gp( func ); -#ifdef WIN32 - x86_push( - func, - get_temp( TEMP_R0, 0 ) ); -#endif - { struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX ); + x86_lea( + func, + ecx, + get_temp( TEMP_R0, 0 ) ); + + x86_push( func, ecx ); x86_mov_reg_imm( func, ecx, (unsigned long) code ); x86_call( func, ecx ); +#ifndef WIN32 + x86_pop(func, ecx ); +#endif } + x86_pop_gp( func ); @@ -577,11 +558,7 @@ static void XSTDCALL cos4f( float *store ) { -#ifdef WIN32 const unsigned X = 0; -#else - const unsigned X = TEMP_R0 * 16; -#endif store[X + 0] = cosf( store[X + 0] ); store[X + 1] = cosf( store[X + 1] ); @@ -604,11 +581,8 @@ static void XSTDCALL ex24f( float *store ) { -#ifdef WIN32 const unsigned X = 0; -#else - const unsigned X = TEMP_R0 * 16; -#endif + store[X + 0] = powf( 2.0f, store[X + 0] ); store[X + 1] = powf( 2.0f, store[X + 1] ); store[X + 2] = powf( 2.0f, store[X + 2] ); @@ -641,11 +615,8 @@ static void XSTDCALL flr4f( float *store ) { -#ifdef WIN32 const unsigned X = 0; -#else - const unsigned X = TEMP_R0 * 16; -#endif + store[X + 0] = floorf( store[X + 0] ); store[X + 1] = floorf( store[X + 1] ); store[X + 2] = floorf( store[X + 2] ); @@ -667,11 +638,8 @@ static void XSTDCALL frc4f( float *store ) { -#ifdef WIN32 const unsigned X = 0; -#else - const unsigned X = TEMP_R0 * 16; -#endif + store[X + 0] -= floorf( store[X + 0] ); store[X + 1] -= floorf( store[X + 1] ); store[X + 2] -= floorf( store[X + 2] ); @@ -693,11 +661,8 @@ static void XSTDCALL lg24f( float *store ) { -#ifdef WIN32 const unsigned X = 0; -#else - const unsigned X = TEMP_R0 * 16; -#endif + store[X + 0] = LOG2( store[X + 0] ); store[X + 1] = LOG2( store[X + 1] ); store[X + 2] = LOG2( store[X + 2] ); @@ -755,11 +720,8 @@ static void XSTDCALL pow4f( float *store ) { -#ifdef WIN32 const unsigned X = 0; -#else - const unsigned X = TEMP_R0 * 16; -#endif + store[X + 0] = powf( store[X + 0], store[X + 4] ); store[X + 1] = powf( store[X + 1], store[X + 5] ); store[X + 2] = powf( store[X + 2], store[X + 6] ); @@ -800,11 +762,8 @@ static void XSTDCALL rsqrt4f( float *store ) { -#ifdef WIN32 const unsigned X = 0; -#else - const unsigned X = TEMP_R0 * 16; -#endif + store[X + 0] = 1.0F / sqrtf( store[X + 0] ); store[X + 1] = 1.0F / sqrtf( store[X + 1] ); store[X + 2] = 1.0F / sqrtf( store[X + 2] ); @@ -878,11 +837,8 @@ static void XSTDCALL sin4f( float *store ) { -#ifdef WIN32 const unsigned X = 0; -#else - const unsigned X = TEMP_R0 * 16; -#endif + store[X + 0] = sinf( store[X + 0] ); store[X + 1] = sinf( store[X + 1] ); store[X + 2] = sinf( store[X + 2] ); @@ -1234,11 +1190,16 @@ emit_instruction( switch( inst->Instruction.Opcode ) { case TGSI_OPCODE_ARL: +#if 0 + /* XXX this isn't working properly (see glean vertProg1 test) */ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( func, *inst, 0, 0, chan_index ); emit_f2it( func, 0 ); STORE( func, *inst, 0, 0, chan_index ); } +#else + return 0; +#endif break; case TGSI_OPCODE_MOV: @@ -2029,40 +1990,40 @@ emit_declaration( } } -static void aos_to_soa( struct x86_function *func, uint aos, uint soa, uint num, uint stride ) +static void aos_to_soa( struct x86_function *func, + uint arg_aos, + uint arg_soa, + uint arg_num, + uint arg_stride ) { - struct x86_reg soa_input; - struct x86_reg aos_input; - struct x86_reg num_inputs; - struct x86_reg temp; - unsigned char *inner_loop; + struct x86_reg soa_input = x86_make_reg( file_REG32, reg_AX ); + struct x86_reg aos_input = x86_make_reg( file_REG32, reg_BX ); + struct x86_reg num_inputs = x86_make_reg( file_REG32, reg_CX ); + struct x86_reg stride = x86_make_reg( file_REG32, reg_DX ); + int inner_loop; - soa_input = x86_make_reg( file_REG32, reg_AX ); - aos_input = x86_make_reg( file_REG32, reg_BX ); - num_inputs = x86_make_reg( file_REG32, reg_CX ); - temp = x86_make_reg( file_REG32, reg_DX ); /* Save EBX */ x86_push( func, x86_make_reg( file_REG32, reg_BX ) ); - x86_mov( func, soa_input, get_argument( soa + 1 ) ); - x86_mov( func, aos_input, get_argument( aos + 1 ) ); - x86_mov( func, num_inputs, get_argument( num + 1 ) ); + x86_mov( func, aos_input, x86_fn_arg( func, arg_aos ) ); + x86_mov( func, soa_input, x86_fn_arg( func, arg_soa ) ); + x86_mov( func, num_inputs, x86_fn_arg( func, arg_num ) ); + x86_mov( func, stride, x86_fn_arg( func, arg_stride ) ); /* do */ inner_loop = x86_get_label( func ); { - x86_mov( func, temp, get_argument( stride + 1 ) ); x86_push( func, aos_input ); sse_movlps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) ); sse_movlps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) ); - x86_add( func, aos_input, temp ); + x86_add( func, aos_input, stride ); sse_movhps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) ); sse_movhps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) ); - x86_add( func, aos_input, temp ); + x86_add( func, aos_input, stride ); sse_movlps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) ); sse_movlps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) ); - x86_add( func, aos_input, temp ); + x86_add( func, aos_input, stride ); sse_movhps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) ); sse_movhps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) ); x86_pop( func, aos_input ); @@ -2088,7 +2049,7 @@ static void aos_to_soa( struct x86_function *func, uint aos, uint soa, uint num, x86_jcc( func, cc_NE, inner_loop ); /* Restore EBX */ - x86_pop( func, x86_make_reg( file_REG32, reg_BX ) ); + x86_pop( func, aos_input ); } static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num, uint stride ) @@ -2097,7 +2058,7 @@ static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num, struct x86_reg aos_output; struct x86_reg num_outputs; struct x86_reg temp; - unsigned char *inner_loop; + int inner_loop; soa_output = x86_make_reg( file_REG32, reg_AX ); aos_output = x86_make_reg( file_REG32, reg_BX ); @@ -2105,11 +2066,11 @@ static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num, temp = x86_make_reg( file_REG32, reg_DX ); /* Save EBX */ - x86_push( func, x86_make_reg( file_REG32, reg_BX ) ); + x86_push( func, aos_output ); - x86_mov( func, soa_output, get_argument( soa + 1 ) ); - x86_mov( func, aos_output, get_argument( aos + 1 ) ); - x86_mov( func, num_outputs, get_argument( num + 1 ) ); + x86_mov( func, soa_output, x86_fn_arg( func, soa ) ); + x86_mov( func, aos_output, x86_fn_arg( func, aos ) ); + x86_mov( func, num_outputs, x86_fn_arg( func, num ) ); /* do */ inner_loop = x86_get_label( func ); @@ -2126,7 +2087,7 @@ static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num, sse_unpcklps( func, make_xmm( 3 ), make_xmm( 4 ) ); sse_unpckhps( func, make_xmm( 5 ), make_xmm( 4 ) ); - x86_mov( func, temp, get_argument( stride + 1 ) ); + x86_mov( func, temp, x86_fn_arg( func, stride ) ); x86_push( func, aos_output ); sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) ); sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) ); @@ -2150,7 +2111,7 @@ static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num, x86_jcc( func, cc_NE, inner_loop ); /* Restore EBX */ - x86_pop( func, x86_make_reg( file_REG32, reg_BX ) ); + x86_pop( func, aos_output ); } /** @@ -2185,6 +2146,17 @@ tgsi_emit_sse2( tgsi_parse_init( &parse, tokens ); + /* Can't just use EDI, EBX without save/restoring them: + */ + x86_push( + func, + get_immediate_base() ); + + x86_push( + func, + get_temp_base() ); + + /* * Different function args for vertex/fragment shaders: */ @@ -2193,51 +2165,55 @@ tgsi_emit_sse2( x86_mov( func, get_input_base(), - get_argument( 0 ) ); + x86_fn_arg( func, 1 ) ); /* skipping outputs argument here */ x86_mov( func, get_const_base(), - get_argument( 2 ) ); + x86_fn_arg( func, 3 ) ); x86_mov( func, get_temp_base(), - get_argument( 3 ) ); + x86_fn_arg( func, 4 ) ); x86_mov( func, get_coef_base(), - get_argument( 4 ) ); + x86_fn_arg( func, 5 ) ); x86_mov( func, get_immediate_base(), - get_argument( 5 ) ); + x86_fn_arg( func, 6 ) ); } else { assert(parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX); if (do_swizzles) - aos_to_soa( func, 5, 0, 6, 7 ); + aos_to_soa( func, + 6, /* aos_input */ + 1, /* machine->input */ + 7, /* num_inputs */ + 8 ); /* input_stride */ x86_mov( func, get_input_base(), - get_argument( 0 ) ); + x86_fn_arg( func, 1 ) ); x86_mov( func, get_output_base(), - get_argument( 1 ) ); + x86_fn_arg( func, 2 ) ); x86_mov( func, get_const_base(), - get_argument( 2 ) ); + x86_fn_arg( func, 3 ) ); x86_mov( func, get_temp_base(), - get_argument( 3 ) ); + x86_fn_arg( func, 4 ) ); x86_mov( func, get_immediate_base(), - get_argument( 4 ) ); + x86_fn_arg( func, 5 ) ); } while( !tgsi_parse_end_of_tokens( &parse ) && ok ) { @@ -2260,7 +2236,7 @@ tgsi_emit_sse2( x86_mov( func, get_output_base(), - get_argument( 1 ) ); + x86_fn_arg( func, 2 ) ); } } @@ -2307,9 +2283,19 @@ tgsi_emit_sse2( if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX) { if (do_swizzles) - soa_to_aos( func, 8, 1, 9, 10 ); + soa_to_aos( func, 9, 2, 10, 11 ); } + /* Can't just use EBX, EDI without save/restoring them: + */ + x86_pop( + func, + get_temp_base() ); + + x86_pop( + func, + get_immediate_base() ); + #ifdef WIN32 emit_retw( func, 16 ); #else |