diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_sse2.c | 202 | ||||
| -rw-r--r-- | src/gallium/drivers/softpipe/sp_fs_sse.c | 1 | 
2 files changed, 184 insertions, 19 deletions
| diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 5928f874a9..e43bee00a9 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -101,7 +101,7 @@ get_const_base( void )  {     return x86_make_reg(        file_REG32, -      reg_CX ); +      reg_AX );  }  static struct x86_reg @@ -109,7 +109,7 @@ get_machine_base( void )  {     return x86_make_reg(        file_REG32, -      reg_AX ); +      reg_CX );  }  static struct x86_reg @@ -145,6 +145,14 @@ get_coef_base( void )  }  static struct x86_reg +get_sampler_base( void ) +{ +   return x86_make_reg( +      file_REG32, +      reg_DI ); +} + +static struct x86_reg  get_immediate_base( void )  {     return x86_make_reg( @@ -179,6 +187,15 @@ get_const(  }  static struct x86_reg +get_sampler_ptr( +   unsigned unit ) +{ +   return x86_make_disp( +      get_sampler_base(), +      unit * sizeof( struct tgsi_sampler * ) ); +} + +static struct x86_reg  get_input(     unsigned vec,     unsigned chan ) @@ -1222,6 +1239,12 @@ emit_sub(        make_xmm( xmm_src ) );  } + + + + + +  /**   * Register fetch.   */ @@ -1380,11 +1403,156 @@ emit_store(  #define STORE( FUNC, INST, XMM, INDEX, CHAN )\     emit_store( FUNC, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN ) + +static void PIPE_CDECL +fetch_texel( struct tgsi_sampler **sampler, +             float *store ) +{ +#if 0 +   uint j; + +   debug_printf("%s sampler: %p (%p) store: %p\n",  +                __FUNCTION__, +                sampler, *sampler, +                store ); + +   debug_printf("lodbias %f\n", store[12]); + +   for (j = 0; j < 4; j++) +      debug_printf("sample %d texcoord %f %f\n",  +                   j,  +                   store[0+j], +                   store[4+j]); +#endif + +   { +      float rgba[NUM_CHANNELS][QUAD_SIZE]; +      (*sampler)->get_samples(*sampler,  +                              &store[0],  +                              &store[4],  +                              &store[8],  +                              0.0f, /*store[12],  lodbias */ +                              rgba); + +      memcpy( store, rgba, 16 * sizeof(float)); +   } + +#if 0 +   for (j = 0; j < 4; j++) +      debug_printf("sample %d result %f %f %f %f\n",  +                   j,  +                   store[0+j], +                   store[4+j], +                   store[8+j], +                   store[12+j]); +#endif +} +  /**   * High-level instruction translators.   */  static void +emit_tex( struct x86_function *func, +          const struct tgsi_full_instruction *inst, +          boolean lodbias, +          boolean projected) +{ +   const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; +   struct x86_reg args[2]; +   unsigned count; +   unsigned i; + +   switch (inst->InstructionExtTexture.Texture) { +   case TGSI_TEXTURE_1D: +   case TGSI_TEXTURE_SHADOW1D: +      count = 1; +      break; +   case TGSI_TEXTURE_2D: +   case TGSI_TEXTURE_RECT: +   case TGSI_TEXTURE_SHADOW2D: +   case TGSI_TEXTURE_SHADOWRECT: +      count = 2; +      break; +   case TGSI_TEXTURE_3D: +   case TGSI_TEXTURE_CUBE: +      count = 3; +      break; +   default: +      assert(0); +      return; +   } + +   if (lodbias) { +      FETCH( func, *inst, 3, 0, 3 ); +   } +   else { +      emit_tempf( +         func, +         3, +         TGSI_EXEC_TEMP_00000000_I, +         TGSI_EXEC_TEMP_00000000_C ); + +   } + +   /* store lodbias whether enabled or not -- fetch_texel currently +    * respects it always. +    */ +   sse_movaps( func, +               get_temp( TEMP_R0, 3 ), +               make_xmm( 3 ) ); + +    +   if (projected) { +      FETCH( func, *inst, 3, 0, 3 ); + +      emit_rcp( func, 3, 3 ); +   } + +   for (i = 0; i < count; i++) { +      FETCH( func, *inst, i, 0, i ); + +      if (projected) { +         sse_mulps( +            func, +            make_xmm( i ), +            make_xmm( 3 ) ); +      } +       +      /* Store in the argument buffer: +       */ +      sse_movaps( +         func, +         get_temp( TEMP_R0, i ), +         make_xmm( i ) ); +   } + +   args[0] = get_temp( TEMP_R0, 0 ); +   args[1] = get_sampler_ptr( unit ); + + +   emit_func_call( func, +                   0, +                   args, +                   Elements(args), +                   fetch_texel ); + +   /* If all four channels are enabled, could use a pointer to +    * dst[0].x instead of TEMP_R0 for store? +    */ +   FOR_EACH_DST0_ENABLED_CHANNEL( *inst, i ) { + +      sse_movaps( +         func, +         make_xmm( 0 ), +         get_temp( TEMP_R0, i ) ); + +      STORE( func, *inst, 0, 0, i ); +   } +} + + +static void  emit_kil(     struct x86_function *func,     const struct tgsi_full_src_register *reg ) @@ -2168,21 +2336,7 @@ emit_instruction(        break;     case TGSI_OPCODE_TEX: -      if (0) { -	 /* Disable dummy texture code:  -	  */ -	 emit_tempf( -	    func, -	    0, -	    TEMP_ONE_I, -	    TEMP_ONE_C ); -	 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { -	    STORE( func, *inst, 0, 0, chan_index ); -	 } -      } -      else { -	 return 0; -      } +      emit_tex( func, inst, FALSE, FALSE );        break;     case TGSI_OPCODE_TXD: @@ -2280,7 +2434,7 @@ emit_instruction(        break;     case TGSI_OPCODE_TXB: -      return 0; +      emit_tex( func, inst, TRUE, FALSE );        break;     case TGSI_OPCODE_NRM: @@ -2388,9 +2542,13 @@ emit_instruction(        break;     case TGSI_OPCODE_TXL: -      return 0; +      emit_tex( func, inst, TRUE, FALSE );        break; +   case TGSI_OPCODE_TXP: +      emit_tex( func, inst, FALSE, TRUE ); +      break; +           case TGSI_OPCODE_BRK:        return 0;        break; @@ -2758,6 +2916,12 @@ tgsi_emit_sse2(  	 func,  	 get_coef_base(),  	 x86_fn_arg( func, 4 ) ); + +      x86_mov( +	 func, +	 get_sampler_base(), +	 x86_make_disp( get_machine_base(), +                        Offset( struct tgsi_exec_machine, Samplers ) ) );     } diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index f9362efcb7..f4fa0905d7 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -72,6 +72,7 @@ fs_sse_prepare( const struct sp_fragment_shader *base,  		struct tgsi_exec_machine *machine,  		struct tgsi_sampler **samplers )  { +   machine->Samplers = samplers;  } | 
