diff options
Diffstat (limited to 'src/mesa/pipe')
| -rwxr-xr-x | src/mesa/pipe/tgsi/exec/tgsi_sse2.c | 484 | 
1 files changed, 264 insertions, 220 deletions
diff --git a/src/mesa/pipe/tgsi/exec/tgsi_sse2.c b/src/mesa/pipe/tgsi/exec/tgsi_sse2.c index b8edcf0a2e..abdebd6f97 100755 --- a/src/mesa/pipe/tgsi/exec/tgsi_sse2.c +++ b/src/mesa/pipe/tgsi/exec/tgsi_sse2.c @@ -22,14 +22,9 @@  #define TEMP_R0   TGSI_EXEC_TEMP_R0 -static struct x86_reg -get_argument( -   unsigned index ) -{ -   return x86_make_disp( -      x86_make_reg( file_REG32, reg_SP ), -      (index + 1) * 4 ); -} +/** + * X86 utility functions. + */  static struct x86_reg  make_xmm( @@ -40,6 +35,10 @@ make_xmm(        (enum x86_reg_name) xmm );  } +/** + * X86 register mapping helpers. + */ +  static struct x86_reg  get_const_base( void )  { @@ -49,16 +48,6 @@ get_const_base( void )  }  static struct x86_reg -get_const( -   unsigned vec, -   unsigned chan ) -{ -   return x86_make_disp( -      get_const_base(), -      (vec * 4 + chan) * 4 ); -} - -static struct x86_reg  get_input_base( void )  {     return x86_make_reg( @@ -67,55 +56,78 @@ get_input_base( void )  }  static struct x86_reg -get_input( -   unsigned vec, -   unsigned chan ) +get_output_base( void )  { -   return x86_make_disp( -      get_input_base(), -      (vec * 4 + chan) * 16 ); +   return x86_make_reg( +      file_REG32, +      reg_DX );  }  static struct x86_reg -get_output_base( void ) +get_temp_base( void )  {     return x86_make_reg(        file_REG32, -      reg_DX ); +      reg_BX );  }  static struct x86_reg -get_output( +get_coef_base( void ) +{ +   return get_output_base(); +} + +/** + * Data access helpers. + */ + +static struct x86_reg +get_argument( +   unsigned index ) +{ +   return x86_make_disp( +      x86_make_reg( file_REG32, reg_SP ), +      (index + 1) * 4 ); +} + +static struct x86_reg +get_const(     unsigned vec,     unsigned chan )  {     return x86_make_disp( -      get_output_base(), -      (vec * 4 + chan) * 16 ); +      get_const_base(), +      (vec * 4 + chan) * 4 );  }  static struct x86_reg -get_temp_base( void ) +get_input( +   unsigned vec, +   unsigned chan )  { -   return x86_make_reg( -      file_REG32, -      reg_BX ); +   return x86_make_disp( +      get_input_base(), +      (vec * 4 + chan) * 16 );  }  static struct x86_reg -get_temp( +get_output(     unsigned vec,     unsigned chan )  {     return x86_make_disp( -      get_temp_base(), +      get_output_base(),        (vec * 4 + chan) * 16 );  }  static struct x86_reg -get_coef_base( void ) +get_temp( +   unsigned vec, +   unsigned chan )  { -   return get_output_base(); +   return x86_make_disp( +      get_temp_base(), +      (vec * 4 + chan) * 16 );  }  static struct x86_reg @@ -129,6 +141,10 @@ get_coef(        ((vec * 3 + member) * 4 + chan) * 4 );  } +/** + * Data fetch helpers. + */ +  static void  emit_const(     struct x86_function *func, @@ -161,19 +177,6 @@ emit_inputf(  }  static void -emit_inputs( -   struct x86_function *func, -   unsigned xmm, -   unsigned vec, -   unsigned chan ) -{ -   sse_movups( -      func, -      get_input( vec, chan ), -      make_xmm( xmm ) ); -} - -static void  emit_output(     struct x86_function *func,     unsigned xmm, @@ -200,19 +203,6 @@ emit_tempf(  }  static void -emit_temps( -   struct x86_function *func, -   unsigned xmm, -   unsigned vec, -   unsigned chan ) -{ -   sse_movaps( -      func, -      get_temp( vec, chan ), -      make_xmm( xmm ) ); -} - -static void  emit_coef(     struct x86_function *func,     unsigned xmm, @@ -231,49 +221,34 @@ emit_coef(        SHUF( 0, 0, 0, 0 ) );  } -static void -emit_coef_a0( -   struct x86_function *func, -   unsigned xmm, -   unsigned vec, -   unsigned chan ) -{ -   emit_coef( -      func, -      xmm, -      vec, -      chan, -      0 ); -} +/** + * Data store helpers. + */  static void -emit_coef_dadx( +emit_inputs(     struct x86_function *func,     unsigned xmm,     unsigned vec,     unsigned chan )  { -   emit_coef( +   sse_movups(        func, -      xmm, -      vec, -      chan, -      1 ); +      get_input( vec, chan ), +      make_xmm( xmm ) );  }  static void -emit_coef_dady( +emit_temps(     struct x86_function *func,     unsigned xmm,     unsigned vec,     unsigned chan )  { -   emit_coef( +   sse_movaps(        func, -      xmm, -      vec, -      chan, -      2 ); +      get_temp( vec, chan ), +      make_xmm( xmm ) );  }  static void @@ -290,57 +265,59 @@ emit_addrs(        chan );  } -static void -emit_abs( -   struct x86_function *func, -   unsigned xmm ) -{ -   sse_andps( -      func, -      make_xmm( xmm ), -      get_temp( -         TGSI_EXEC_TEMP_7FFFFFFF_I, -         TGSI_EXEC_TEMP_7FFFFFFF_C ) ); -} +/** + * Coefficent fetch helpers. + */  static void -emit_neg( +emit_coef_a0(     struct x86_function *func, -   unsigned xmm ) +   unsigned xmm, +   unsigned vec, +   unsigned chan )  { -   sse_xorps( +   emit_coef(        func, -      make_xmm( xmm ), -      get_temp( -         TGSI_EXEC_TEMP_80000000_I, -         TGSI_EXEC_TEMP_80000000_C ) ); +      xmm, +      vec, +      chan, +      0 );  }  static void -emit_setsign( +emit_coef_dadx(     struct x86_function *func, -   unsigned xmm ) +   unsigned xmm, +   unsigned vec, +   unsigned chan )  { -   sse_orps( +   emit_coef(        func, -      make_xmm( xmm ), -      get_temp( -         TGSI_EXEC_TEMP_80000000_I, -         TGSI_EXEC_TEMP_80000000_C ) ); +      xmm, +      vec, +      chan, +      1 );  }  static void -emit_add( +emit_coef_dady(     struct x86_function *func, -   unsigned xmm_dst, -   unsigned xmm_src ) +   unsigned xmm, +   unsigned vec, +   unsigned chan )  { -   sse_addps( +   emit_coef(        func, -      make_xmm( xmm_dst ), -      make_xmm( xmm_src ) ); +      xmm, +      vec, +      chan, +      2 );  } +/** + * Function call helpers. + */ +  static void  emit_push_gp(     struct x86_function *func ) @@ -433,6 +410,35 @@ emit_func_call_dst_src(        code );  } +/** + * Low-level instruction translators. + */ + +static void +emit_abs( +   struct x86_function *func, +   unsigned xmm ) +{ +   sse_andps( +      func, +      make_xmm( xmm ), +      get_temp( +         TGSI_EXEC_TEMP_7FFFFFFF_I, +         TGSI_EXEC_TEMP_7FFFFFFF_C ) ); +} + +static void +emit_add( +   struct x86_function *func, +   unsigned xmm_dst, +   unsigned xmm_src ) +{ +   sse_addps( +      func, +      make_xmm( xmm_dst ), +      make_xmm( xmm_src ) ); +} +  static void XSTDCALL  cos4f(     float *store ) @@ -463,114 +469,95 @@ emit_cos(  }  static void XSTDCALL -sin4f( +ex24f(     float *store )  {  #ifdef WIN32 -   store[0] = (float) sin( (double) store[0] ); -   store[1] = (float) sin( (double) store[1] ); -   store[2] = (float) sin( (double) store[2] ); -   store[3] = (float) sin( (double) store[3] ); +   store[0] = (float) pow( 2.0, (double) store[0] ); +   store[1] = (float) pow( 2.0, (double) store[1] ); +   store[2] = (float) pow( 2.0, (double) store[2] ); +   store[3] = (float) pow( 2.0, (double) store[3] );  #else     const unsigned X = TEMP_R0 * 16; -   store[X + 0] = sinf( store[X + 0] ); -   store[X + 1] = sinf( store[X + 1] ); -   store[X + 2] = sinf( store[X + 2] ); -   store[X + 3] = sinf( store[X + 3] ); +   store[X + 0] = powf( 2.0f, store[X + 0] ); +   store[X + 1] = powf( 2.0f, store[X + 1] ); +   store[X + 2] = powf( 2.0f, store[X + 2] ); +   store[X + 3] = powf( 2.0f, store[X + 3] );  #endif  }  static void -emit_sin (struct x86_function *func, -          unsigned xmm_dst) +emit_ex2( +   struct x86_function *func, +   unsigned xmm_dst )  {     emit_func_call_dst(        func,        xmm_dst, -      sin4f ); +      ex24f );  }  static void -emit_mov( +emit_f2it(     struct x86_function *func, -   unsigned xmm_dst, -   unsigned xmm_src ) -{ -   sse_movups( -      func, -      make_xmm( xmm_dst ), -      make_xmm( xmm_src ) ); -} - -static void -emit_mul (struct x86_function *func, -          unsigned xmm_dst, -          unsigned xmm_src) +   unsigned xmm )  { -   sse_mulps( +   sse2_cvttps2dq(        func, -      make_xmm( xmm_dst ), -      make_xmm( xmm_src ) ); +      make_xmm( xmm ), +      make_xmm( xmm ) );  }  static void XSTDCALL -pow4f( +flr4f(     float *store )  {  #ifdef WIN32 -   store[0] = (float) pow( (double) store[0], (double) store[4] ); -   store[1] = (float) pow( (double) store[1], (double) store[5] ); -   store[2] = (float) pow( (double) store[2], (double) store[6] ); -   store[3] = (float) pow( (double) store[3], (double) store[7] ); +   const unsigned X = 0;  #else     const unsigned X = TEMP_R0 * 16; -   store[X + 0] = powf( store[X + 0], store[X + 4] ); -   store[X + 1] = powf( store[X + 1], store[X + 5] ); -   store[X + 2] = powf( store[X + 2], store[X + 6] ); -   store[X + 3] = powf( store[X + 3], store[X + 7] );  #endif +   store[X + 0] = (float) floor( (double) store[X + 0] ); +   store[X + 1] = (float) floor( (double) store[X + 1] ); +   store[X + 2] = (float) floor( (double) store[X + 2] ); +   store[X + 3] = (float) floor( (double) store[X + 3] );  }  static void -emit_pow( +emit_flr(     struct x86_function *func, -   unsigned xmm_dst, -   unsigned xmm_src ) +   unsigned xmm_dst )  { -   emit_func_call_dst_src( +   emit_func_call_dst(        func,        xmm_dst, -      xmm_src, -      pow4f ); +      flr4f );  }  static void XSTDCALL -ex24f( +frc4f(     float *store )  {  #ifdef WIN32 -   store[0] = (float) pow( 2.0, (double) store[0] ); -   store[1] = (float) pow( 2.0, (double) store[1] ); -   store[2] = (float) pow( 2.0, (double) store[2] ); -   store[3] = (float) pow( 2.0, (double) store[3] ); +   const unsigned X = 0;  #else     const unsigned X = TEMP_R0 * 16; -   store[X + 0] = powf( 2.0f, store[X + 0] ); -   store[X + 1] = powf( 2.0f, store[X + 1] ); -   store[X + 2] = powf( 2.0f, store[X + 2] ); -   store[X + 3] = powf( 2.0f, store[X + 3] );  #endif +   store[X + 0] -= (float) floor( (double) store[X + 0] ); +   store[X + 1] -= (float) floor( (double) store[X + 1] ); +   store[X + 2] -= (float) floor( (double) store[X + 2] ); +   store[X + 3] -= (float) floor( (double) store[X + 3] );  }  static void -emit_ex2( +emit_frc(     struct x86_function *func,     unsigned xmm_dst )  {     emit_func_call_dst(        func,        xmm_dst, -      ex24f ); +      frc4f );  }  static void XSTDCALL @@ -599,56 +586,71 @@ emit_lg2(        lg24f );  } -static void XSTDCALL -flr4f( -   float *store ) +static void +emit_mov( +   struct x86_function *func, +   unsigned xmm_dst, +   unsigned xmm_src )  { -#ifdef WIN32 -   const unsigned X = 0; -#else -   const unsigned X = TEMP_R0 * 16; -#endif -   store[X + 0] = (float) floor( (double) store[X + 0] ); -   store[X + 1] = (float) floor( (double) store[X + 1] ); -   store[X + 2] = (float) floor( (double) store[X + 2] ); -   store[X + 3] = (float) floor( (double) store[X + 3] ); +   sse_movups( +      func, +      make_xmm( xmm_dst ), +      make_xmm( xmm_src ) );  }  static void -emit_flr( +emit_mul (struct x86_function *func, +          unsigned xmm_dst, +          unsigned xmm_src) +{ +   sse_mulps( +      func, +      make_xmm( xmm_dst ), +      make_xmm( xmm_src ) ); +} + +static void +emit_neg(     struct x86_function *func, -   unsigned xmm_dst ) +   unsigned xmm )  { -   emit_func_call_dst( +   sse_xorps(        func, -      xmm_dst, -      flr4f ); +      make_xmm( xmm ), +      get_temp( +         TGSI_EXEC_TEMP_80000000_I, +         TGSI_EXEC_TEMP_80000000_C ) );  }  static void XSTDCALL -frc4f( +pow4f(     float *store )  {  #ifdef WIN32 -   const unsigned X = 0; +   store[0] = (float) pow( (double) store[0], (double) store[4] ); +   store[1] = (float) pow( (double) store[1], (double) store[5] ); +   store[2] = (float) pow( (double) store[2], (double) store[6] ); +   store[3] = (float) pow( (double) store[3], (double) store[7] );  #else     const unsigned X = TEMP_R0 * 16; +   store[X + 0] = powf( store[X + 0], store[X + 4] ); +   store[X + 1] = powf( store[X + 1], store[X + 5] ); +   store[X + 2] = powf( store[X + 2], store[X + 6] ); +   store[X + 3] = powf( store[X + 3], store[X + 7] );  #endif -   store[X + 0] -= (float) floor( (double) store[X + 0] ); -   store[X + 1] -= (float) floor( (double) store[X + 1] ); -   store[X + 2] -= (float) floor( (double) store[X + 2] ); -   store[X + 3] -= (float) floor( (double) store[X + 3] );  }  static void -emit_frc( +emit_pow(     struct x86_function *func, -   unsigned xmm_dst ) +   unsigned xmm_dst, +   unsigned xmm_src )  { -   emit_func_call_dst( +   emit_func_call_dst_src(        func,        xmm_dst, -      frc4f ); +      xmm_src, +      pow4f );  }  static void @@ -676,6 +678,47 @@ emit_rsqrt(  }  static void +emit_setsign( +   struct x86_function *func, +   unsigned xmm ) +{ +   sse_orps( +      func, +      make_xmm( xmm ), +      get_temp( +         TGSI_EXEC_TEMP_80000000_I, +         TGSI_EXEC_TEMP_80000000_C ) ); +} + +static void XSTDCALL +sin4f( +   float *store ) +{ +#ifdef WIN32 +   store[0] = (float) sin( (double) store[0] ); +   store[1] = (float) sin( (double) store[1] ); +   store[2] = (float) sin( (double) store[2] ); +   store[3] = (float) sin( (double) store[3] ); +#else +   const unsigned X = TEMP_R0 * 16; +   store[X + 0] = sinf( store[X + 0] ); +   store[X + 1] = sinf( store[X + 1] ); +   store[X + 2] = sinf( store[X + 2] ); +   store[X + 3] = sinf( store[X + 3] ); +#endif +} + +static void +emit_sin (struct x86_function *func, +          unsigned xmm_dst) +{ +   emit_func_call_dst( +      func, +      xmm_dst, +      sin4f ); +} + +static void  emit_sub(     struct x86_function *func,     unsigned xmm_dst, @@ -687,6 +730,10 @@ emit_sub(        make_xmm( xmm_src ) );  } +/** + * Register fetch. + */ +  static void  emit_fetch(     struct x86_function *func, @@ -769,6 +816,13 @@ emit_fetch(     }  } +#define FETCH( FUNC, INST, XMM, INDEX, CHAN )\ +   emit_fetch( FUNC, XMM, &(INST).FullSrcRegisters[INDEX], CHAN ) + +/** + * Register store. + */ +  static void  emit_store(     struct x86_function *func, @@ -820,6 +874,13 @@ emit_store(     }  } +#define STORE( FUNC, INST, XMM, INDEX, CHAN )\ +   emit_store( FUNC, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN ) + +/** + * High-level instruction translators. + */ +  static void  emit_kil(     struct x86_function *func, @@ -915,12 +976,6 @@ emit_kil(        x86_make_reg( file_REG32, reg_AX ) );  } -#define FETCH( FUNC, INST, XMM, INDEX, CHAN )\ -   emit_fetch( FUNC, XMM, &(INST).FullSrcRegisters[INDEX], CHAN ) - -#define STORE( FUNC, INST, XMM, INDEX, CHAN )\ -   emit_store( FUNC, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN ) -  static void  emit_setcc(     struct x86_function *func, @@ -982,17 +1037,6 @@ emit_cmp(  }  static void -emit_f2it( -   struct x86_function *func, -   unsigned xmm ) -{ -   sse2_cvttps2dq( -      func, -      make_xmm( xmm ), -      make_xmm( xmm ) ); -} - -static void  emit_instruction(     struct x86_function *func,     struct tgsi_full_instruction *inst )  | 
