summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xsrc/mesa/pipe/tgsi/exec/tgsi_sse2.c484
1 files changed, 264 insertions, 220 deletions
diff --git a/src/mesa/pipe/tgsi/exec/tgsi_sse2.c b/src/mesa/pipe/tgsi/exec/tgsi_sse2.c
index b8edcf0a2e..abdebd6f97 100755
--- a/src/mesa/pipe/tgsi/exec/tgsi_sse2.c
+++ b/src/mesa/pipe/tgsi/exec/tgsi_sse2.c
@@ -22,14 +22,9 @@
#define TEMP_R0 TGSI_EXEC_TEMP_R0
-static struct x86_reg
-get_argument(
- unsigned index )
-{
- return x86_make_disp(
- x86_make_reg( file_REG32, reg_SP ),
- (index + 1) * 4 );
-}
+/**
+ * X86 utility functions.
+ */
static struct x86_reg
make_xmm(
@@ -40,6 +35,10 @@ make_xmm(
(enum x86_reg_name) xmm );
}
+/**
+ * X86 register mapping helpers.
+ */
+
static struct x86_reg
get_const_base( void )
{
@@ -49,16 +48,6 @@ get_const_base( void )
}
static struct x86_reg
-get_const(
- unsigned vec,
- unsigned chan )
-{
- return x86_make_disp(
- get_const_base(),
- (vec * 4 + chan) * 4 );
-}
-
-static struct x86_reg
get_input_base( void )
{
return x86_make_reg(
@@ -67,55 +56,78 @@ get_input_base( void )
}
static struct x86_reg
-get_input(
- unsigned vec,
- unsigned chan )
+get_output_base( void )
{
- return x86_make_disp(
- get_input_base(),
- (vec * 4 + chan) * 16 );
+ return x86_make_reg(
+ file_REG32,
+ reg_DX );
}
static struct x86_reg
-get_output_base( void )
+get_temp_base( void )
{
return x86_make_reg(
file_REG32,
- reg_DX );
+ reg_BX );
}
static struct x86_reg
-get_output(
+get_coef_base( void )
+{
+ return get_output_base();
+}
+
+/**
+ * Data access helpers.
+ */
+
+static struct x86_reg
+get_argument(
+ unsigned index )
+{
+ return x86_make_disp(
+ x86_make_reg( file_REG32, reg_SP ),
+ (index + 1) * 4 );
+}
+
+static struct x86_reg
+get_const(
unsigned vec,
unsigned chan )
{
return x86_make_disp(
- get_output_base(),
- (vec * 4 + chan) * 16 );
+ get_const_base(),
+ (vec * 4 + chan) * 4 );
}
static struct x86_reg
-get_temp_base( void )
+get_input(
+ unsigned vec,
+ unsigned chan )
{
- return x86_make_reg(
- file_REG32,
- reg_BX );
+ return x86_make_disp(
+ get_input_base(),
+ (vec * 4 + chan) * 16 );
}
static struct x86_reg
-get_temp(
+get_output(
unsigned vec,
unsigned chan )
{
return x86_make_disp(
- get_temp_base(),
+ get_output_base(),
(vec * 4 + chan) * 16 );
}
static struct x86_reg
-get_coef_base( void )
+get_temp(
+ unsigned vec,
+ unsigned chan )
{
- return get_output_base();
+ return x86_make_disp(
+ get_temp_base(),
+ (vec * 4 + chan) * 16 );
}
static struct x86_reg
@@ -129,6 +141,10 @@ get_coef(
((vec * 3 + member) * 4 + chan) * 4 );
}
+/**
+ * Data fetch helpers.
+ */
+
static void
emit_const(
struct x86_function *func,
@@ -161,19 +177,6 @@ emit_inputf(
}
static void
-emit_inputs(
- struct x86_function *func,
- unsigned xmm,
- unsigned vec,
- unsigned chan )
-{
- sse_movups(
- func,
- get_input( vec, chan ),
- make_xmm( xmm ) );
-}
-
-static void
emit_output(
struct x86_function *func,
unsigned xmm,
@@ -200,19 +203,6 @@ emit_tempf(
}
static void
-emit_temps(
- struct x86_function *func,
- unsigned xmm,
- unsigned vec,
- unsigned chan )
-{
- sse_movaps(
- func,
- get_temp( vec, chan ),
- make_xmm( xmm ) );
-}
-
-static void
emit_coef(
struct x86_function *func,
unsigned xmm,
@@ -231,49 +221,34 @@ emit_coef(
SHUF( 0, 0, 0, 0 ) );
}
-static void
-emit_coef_a0(
- struct x86_function *func,
- unsigned xmm,
- unsigned vec,
- unsigned chan )
-{
- emit_coef(
- func,
- xmm,
- vec,
- chan,
- 0 );
-}
+/**
+ * Data store helpers.
+ */
static void
-emit_coef_dadx(
+emit_inputs(
struct x86_function *func,
unsigned xmm,
unsigned vec,
unsigned chan )
{
- emit_coef(
+ sse_movups(
func,
- xmm,
- vec,
- chan,
- 1 );
+ get_input( vec, chan ),
+ make_xmm( xmm ) );
}
static void
-emit_coef_dady(
+emit_temps(
struct x86_function *func,
unsigned xmm,
unsigned vec,
unsigned chan )
{
- emit_coef(
+ sse_movaps(
func,
- xmm,
- vec,
- chan,
- 2 );
+ get_temp( vec, chan ),
+ make_xmm( xmm ) );
}
static void
@@ -290,57 +265,59 @@ emit_addrs(
chan );
}
-static void
-emit_abs(
- struct x86_function *func,
- unsigned xmm )
-{
- sse_andps(
- func,
- make_xmm( xmm ),
- get_temp(
- TGSI_EXEC_TEMP_7FFFFFFF_I,
- TGSI_EXEC_TEMP_7FFFFFFF_C ) );
-}
+/**
+ * Coefficent fetch helpers.
+ */
static void
-emit_neg(
+emit_coef_a0(
struct x86_function *func,
- unsigned xmm )
+ unsigned xmm,
+ unsigned vec,
+ unsigned chan )
{
- sse_xorps(
+ emit_coef(
func,
- make_xmm( xmm ),
- get_temp(
- TGSI_EXEC_TEMP_80000000_I,
- TGSI_EXEC_TEMP_80000000_C ) );
+ xmm,
+ vec,
+ chan,
+ 0 );
}
static void
-emit_setsign(
+emit_coef_dadx(
struct x86_function *func,
- unsigned xmm )
+ unsigned xmm,
+ unsigned vec,
+ unsigned chan )
{
- sse_orps(
+ emit_coef(
func,
- make_xmm( xmm ),
- get_temp(
- TGSI_EXEC_TEMP_80000000_I,
- TGSI_EXEC_TEMP_80000000_C ) );
+ xmm,
+ vec,
+ chan,
+ 1 );
}
static void
-emit_add(
+emit_coef_dady(
struct x86_function *func,
- unsigned xmm_dst,
- unsigned xmm_src )
+ unsigned xmm,
+ unsigned vec,
+ unsigned chan )
{
- sse_addps(
+ emit_coef(
func,
- make_xmm( xmm_dst ),
- make_xmm( xmm_src ) );
+ xmm,
+ vec,
+ chan,
+ 2 );
}
+/**
+ * Function call helpers.
+ */
+
static void
emit_push_gp(
struct x86_function *func )
@@ -433,6 +410,35 @@ emit_func_call_dst_src(
code );
}
+/**
+ * Low-level instruction translators.
+ */
+
+static void
+emit_abs(
+ struct x86_function *func,
+ unsigned xmm )
+{
+ sse_andps(
+ func,
+ make_xmm( xmm ),
+ get_temp(
+ TGSI_EXEC_TEMP_7FFFFFFF_I,
+ TGSI_EXEC_TEMP_7FFFFFFF_C ) );
+}
+
+static void
+emit_add(
+ struct x86_function *func,
+ unsigned xmm_dst,
+ unsigned xmm_src )
+{
+ sse_addps(
+ func,
+ make_xmm( xmm_dst ),
+ make_xmm( xmm_src ) );
+}
+
static void XSTDCALL
cos4f(
float *store )
@@ -463,114 +469,95 @@ emit_cos(
}
static void XSTDCALL
-sin4f(
+ex24f(
float *store )
{
#ifdef WIN32
- store[0] = (float) sin( (double) store[0] );
- store[1] = (float) sin( (double) store[1] );
- store[2] = (float) sin( (double) store[2] );
- store[3] = (float) sin( (double) store[3] );
+ store[0] = (float) pow( 2.0, (double) store[0] );
+ store[1] = (float) pow( 2.0, (double) store[1] );
+ store[2] = (float) pow( 2.0, (double) store[2] );
+ store[3] = (float) pow( 2.0, (double) store[3] );
#else
const unsigned X = TEMP_R0 * 16;
- store[X + 0] = sinf( store[X + 0] );
- store[X + 1] = sinf( store[X + 1] );
- store[X + 2] = sinf( store[X + 2] );
- store[X + 3] = sinf( store[X + 3] );
+ store[X + 0] = powf( 2.0f, store[X + 0] );
+ store[X + 1] = powf( 2.0f, store[X + 1] );
+ store[X + 2] = powf( 2.0f, store[X + 2] );
+ store[X + 3] = powf( 2.0f, store[X + 3] );
#endif
}
static void
-emit_sin (struct x86_function *func,
- unsigned xmm_dst)
+emit_ex2(
+ struct x86_function *func,
+ unsigned xmm_dst )
{
emit_func_call_dst(
func,
xmm_dst,
- sin4f );
+ ex24f );
}
static void
-emit_mov(
+emit_f2it(
struct x86_function *func,
- unsigned xmm_dst,
- unsigned xmm_src )
-{
- sse_movups(
- func,
- make_xmm( xmm_dst ),
- make_xmm( xmm_src ) );
-}
-
-static void
-emit_mul (struct x86_function *func,
- unsigned xmm_dst,
- unsigned xmm_src)
+ unsigned xmm )
{
- sse_mulps(
+ sse2_cvttps2dq(
func,
- make_xmm( xmm_dst ),
- make_xmm( xmm_src ) );
+ make_xmm( xmm ),
+ make_xmm( xmm ) );
}
static void XSTDCALL
-pow4f(
+flr4f(
float *store )
{
#ifdef WIN32
- store[0] = (float) pow( (double) store[0], (double) store[4] );
- store[1] = (float) pow( (double) store[1], (double) store[5] );
- store[2] = (float) pow( (double) store[2], (double) store[6] );
- store[3] = (float) pow( (double) store[3], (double) store[7] );
+ const unsigned X = 0;
#else
const unsigned X = TEMP_R0 * 16;
- store[X + 0] = powf( store[X + 0], store[X + 4] );
- store[X + 1] = powf( store[X + 1], store[X + 5] );
- store[X + 2] = powf( store[X + 2], store[X + 6] );
- store[X + 3] = powf( store[X + 3], store[X + 7] );
#endif
+ store[X + 0] = (float) floor( (double) store[X + 0] );
+ store[X + 1] = (float) floor( (double) store[X + 1] );
+ store[X + 2] = (float) floor( (double) store[X + 2] );
+ store[X + 3] = (float) floor( (double) store[X + 3] );
}
static void
-emit_pow(
+emit_flr(
struct x86_function *func,
- unsigned xmm_dst,
- unsigned xmm_src )
+ unsigned xmm_dst )
{
- emit_func_call_dst_src(
+ emit_func_call_dst(
func,
xmm_dst,
- xmm_src,
- pow4f );
+ flr4f );
}
static void XSTDCALL
-ex24f(
+frc4f(
float *store )
{
#ifdef WIN32
- store[0] = (float) pow( 2.0, (double) store[0] );
- store[1] = (float) pow( 2.0, (double) store[1] );
- store[2] = (float) pow( 2.0, (double) store[2] );
- store[3] = (float) pow( 2.0, (double) store[3] );
+ const unsigned X = 0;
#else
const unsigned X = TEMP_R0 * 16;
- store[X + 0] = powf( 2.0f, store[X + 0] );
- store[X + 1] = powf( 2.0f, store[X + 1] );
- store[X + 2] = powf( 2.0f, store[X + 2] );
- store[X + 3] = powf( 2.0f, store[X + 3] );
#endif
+ store[X + 0] -= (float) floor( (double) store[X + 0] );
+ store[X + 1] -= (float) floor( (double) store[X + 1] );
+ store[X + 2] -= (float) floor( (double) store[X + 2] );
+ store[X + 3] -= (float) floor( (double) store[X + 3] );
}
static void
-emit_ex2(
+emit_frc(
struct x86_function *func,
unsigned xmm_dst )
{
emit_func_call_dst(
func,
xmm_dst,
- ex24f );
+ frc4f );
}
static void XSTDCALL
@@ -599,56 +586,71 @@ emit_lg2(
lg24f );
}
-static void XSTDCALL
-flr4f(
- float *store )
+static void
+emit_mov(
+ struct x86_function *func,
+ unsigned xmm_dst,
+ unsigned xmm_src )
{
-#ifdef WIN32
- const unsigned X = 0;
-#else
- const unsigned X = TEMP_R0 * 16;
-#endif
- store[X + 0] = (float) floor( (double) store[X + 0] );
- store[X + 1] = (float) floor( (double) store[X + 1] );
- store[X + 2] = (float) floor( (double) store[X + 2] );
- store[X + 3] = (float) floor( (double) store[X + 3] );
+ sse_movups(
+ func,
+ make_xmm( xmm_dst ),
+ make_xmm( xmm_src ) );
}
static void
-emit_flr(
+emit_mul (struct x86_function *func,
+ unsigned xmm_dst,
+ unsigned xmm_src)
+{
+ sse_mulps(
+ func,
+ make_xmm( xmm_dst ),
+ make_xmm( xmm_src ) );
+}
+
+static void
+emit_neg(
struct x86_function *func,
- unsigned xmm_dst )
+ unsigned xmm )
{
- emit_func_call_dst(
+ sse_xorps(
func,
- xmm_dst,
- flr4f );
+ make_xmm( xmm ),
+ get_temp(
+ TGSI_EXEC_TEMP_80000000_I,
+ TGSI_EXEC_TEMP_80000000_C ) );
}
static void XSTDCALL
-frc4f(
+pow4f(
float *store )
{
#ifdef WIN32
- const unsigned X = 0;
+ store[0] = (float) pow( (double) store[0], (double) store[4] );
+ store[1] = (float) pow( (double) store[1], (double) store[5] );
+ store[2] = (float) pow( (double) store[2], (double) store[6] );
+ store[3] = (float) pow( (double) store[3], (double) store[7] );
#else
const unsigned X = TEMP_R0 * 16;
+ store[X + 0] = powf( store[X + 0], store[X + 4] );
+ store[X + 1] = powf( store[X + 1], store[X + 5] );
+ store[X + 2] = powf( store[X + 2], store[X + 6] );
+ store[X + 3] = powf( store[X + 3], store[X + 7] );
#endif
- store[X + 0] -= (float) floor( (double) store[X + 0] );
- store[X + 1] -= (float) floor( (double) store[X + 1] );
- store[X + 2] -= (float) floor( (double) store[X + 2] );
- store[X + 3] -= (float) floor( (double) store[X + 3] );
}
static void
-emit_frc(
+emit_pow(
struct x86_function *func,
- unsigned xmm_dst )
+ unsigned xmm_dst,
+ unsigned xmm_src )
{
- emit_func_call_dst(
+ emit_func_call_dst_src(
func,
xmm_dst,
- frc4f );
+ xmm_src,
+ pow4f );
}
static void
@@ -676,6 +678,47 @@ emit_rsqrt(
}
static void
+emit_setsign(
+ struct x86_function *func,
+ unsigned xmm )
+{
+ sse_orps(
+ func,
+ make_xmm( xmm ),
+ get_temp(
+ TGSI_EXEC_TEMP_80000000_I,
+ TGSI_EXEC_TEMP_80000000_C ) );
+}
+
+static void XSTDCALL
+sin4f(
+ float *store )
+{
+#ifdef WIN32
+ store[0] = (float) sin( (double) store[0] );
+ store[1] = (float) sin( (double) store[1] );
+ store[2] = (float) sin( (double) store[2] );
+ store[3] = (float) sin( (double) store[3] );
+#else
+ const unsigned X = TEMP_R0 * 16;
+ store[X + 0] = sinf( store[X + 0] );
+ store[X + 1] = sinf( store[X + 1] );
+ store[X + 2] = sinf( store[X + 2] );
+ store[X + 3] = sinf( store[X + 3] );
+#endif
+}
+
+static void
+emit_sin (struct x86_function *func,
+ unsigned xmm_dst)
+{
+ emit_func_call_dst(
+ func,
+ xmm_dst,
+ sin4f );
+}
+
+static void
emit_sub(
struct x86_function *func,
unsigned xmm_dst,
@@ -687,6 +730,10 @@ emit_sub(
make_xmm( xmm_src ) );
}
+/**
+ * Register fetch.
+ */
+
static void
emit_fetch(
struct x86_function *func,
@@ -769,6 +816,13 @@ emit_fetch(
}
}
+#define FETCH( FUNC, INST, XMM, INDEX, CHAN )\
+ emit_fetch( FUNC, XMM, &(INST).FullSrcRegisters[INDEX], CHAN )
+
+/**
+ * Register store.
+ */
+
static void
emit_store(
struct x86_function *func,
@@ -820,6 +874,13 @@ emit_store(
}
}
+#define STORE( FUNC, INST, XMM, INDEX, CHAN )\
+ emit_store( FUNC, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN )
+
+/**
+ * High-level instruction translators.
+ */
+
static void
emit_kil(
struct x86_function *func,
@@ -915,12 +976,6 @@ emit_kil(
x86_make_reg( file_REG32, reg_AX ) );
}
-#define FETCH( FUNC, INST, XMM, INDEX, CHAN )\
- emit_fetch( FUNC, XMM, &(INST).FullSrcRegisters[INDEX], CHAN )
-
-#define STORE( FUNC, INST, XMM, INDEX, CHAN )\
- emit_store( FUNC, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN )
-
static void
emit_setcc(
struct x86_function *func,
@@ -982,17 +1037,6 @@ emit_cmp(
}
static void
-emit_f2it(
- struct x86_function *func,
- unsigned xmm )
-{
- sse2_cvttps2dq(
- func,
- make_xmm( xmm ),
- make_xmm( xmm ) );
-}
-
-static void
emit_instruction(
struct x86_function *func,
struct tgsi_full_instruction *inst )