summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c')
-rwxr-xr-xsrc/gallium/auxiliary/tgsi/exec/tgsi_sse2.c196
1 files changed, 91 insertions, 105 deletions
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
index 1138f59997..2fd76a3072 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
@@ -103,15 +103,9 @@ get_output_base( void )
static struct x86_reg
get_temp_base( void )
{
-#ifdef WIN32
return x86_make_reg(
file_REG32,
reg_BX );
-#else
- return x86_make_reg(
- file_REG32,
- reg_SI );
-#endif
}
static struct x86_reg
@@ -133,14 +127,6 @@ get_immediate_base( void )
* Data access helpers.
*/
-static struct x86_reg
-get_argument(
- unsigned index )
-{
- return x86_make_disp(
- x86_make_reg( file_REG32, reg_SP ),
- (index + 1) * 4 );
-}
static struct x86_reg
get_immediate(
@@ -455,19 +441,13 @@ emit_push_gp(
{
x86_push(
func,
- get_const_base() );
+ x86_make_reg( file_REG32, reg_AX) );
x86_push(
func,
- get_input_base() );
+ x86_make_reg( file_REG32, reg_CX) );
x86_push(
func,
- get_output_base() );
-
- /* It is important on non-win32 platforms that temp base is pushed last.
- */
- x86_push(
- func,
- get_temp_base() );
+ x86_make_reg( file_REG32, reg_DX) );
}
static void
@@ -478,16 +458,13 @@ x86_pop_gp(
*/
x86_pop(
func,
- get_temp_base() );
- x86_pop(
- func,
- get_output_base() );
+ x86_make_reg( file_REG32, reg_DX) );
x86_pop(
func,
- get_input_base() );
+ x86_make_reg( file_REG32, reg_CX) );
x86_pop(
func,
- get_const_base() );
+ x86_make_reg( file_REG32, reg_AX) );
}
static void
@@ -504,19 +481,23 @@ emit_func_call_dst(
emit_push_gp(
func );
-#ifdef WIN32
- x86_push(
- func,
- get_temp( TEMP_R0, 0 ) );
-#endif
-
{
struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX );
+ x86_lea(
+ func,
+ ecx,
+ get_temp( TEMP_R0, 0 ) );
+
+ x86_push( func, ecx );
x86_mov_reg_imm( func, ecx, (unsigned long) code );
x86_call( func, ecx );
+#ifndef WIN32
+ x86_pop(func, ecx );
+#endif
}
+
x86_pop_gp(
func );
@@ -577,11 +558,7 @@ static void XSTDCALL
cos4f(
float *store )
{
-#ifdef WIN32
const unsigned X = 0;
-#else
- const unsigned X = TEMP_R0 * 16;
-#endif
store[X + 0] = cosf( store[X + 0] );
store[X + 1] = cosf( store[X + 1] );
@@ -604,11 +581,8 @@ static void XSTDCALL
ex24f(
float *store )
{
-#ifdef WIN32
const unsigned X = 0;
-#else
- const unsigned X = TEMP_R0 * 16;
-#endif
+
store[X + 0] = powf( 2.0f, store[X + 0] );
store[X + 1] = powf( 2.0f, store[X + 1] );
store[X + 2] = powf( 2.0f, store[X + 2] );
@@ -641,11 +615,8 @@ static void XSTDCALL
flr4f(
float *store )
{
-#ifdef WIN32
const unsigned X = 0;
-#else
- const unsigned X = TEMP_R0 * 16;
-#endif
+
store[X + 0] = floorf( store[X + 0] );
store[X + 1] = floorf( store[X + 1] );
store[X + 2] = floorf( store[X + 2] );
@@ -667,11 +638,8 @@ static void XSTDCALL
frc4f(
float *store )
{
-#ifdef WIN32
const unsigned X = 0;
-#else
- const unsigned X = TEMP_R0 * 16;
-#endif
+
store[X + 0] -= floorf( store[X + 0] );
store[X + 1] -= floorf( store[X + 1] );
store[X + 2] -= floorf( store[X + 2] );
@@ -693,11 +661,8 @@ static void XSTDCALL
lg24f(
float *store )
{
-#ifdef WIN32
const unsigned X = 0;
-#else
- const unsigned X = TEMP_R0 * 16;
-#endif
+
store[X + 0] = LOG2( store[X + 0] );
store[X + 1] = LOG2( store[X + 1] );
store[X + 2] = LOG2( store[X + 2] );
@@ -755,11 +720,8 @@ static void XSTDCALL
pow4f(
float *store )
{
-#ifdef WIN32
const unsigned X = 0;
-#else
- const unsigned X = TEMP_R0 * 16;
-#endif
+
store[X + 0] = powf( store[X + 0], store[X + 4] );
store[X + 1] = powf( store[X + 1], store[X + 5] );
store[X + 2] = powf( store[X + 2], store[X + 6] );
@@ -800,11 +762,8 @@ static void XSTDCALL
rsqrt4f(
float *store )
{
-#ifdef WIN32
const unsigned X = 0;
-#else
- const unsigned X = TEMP_R0 * 16;
-#endif
+
store[X + 0] = 1.0F / sqrtf( store[X + 0] );
store[X + 1] = 1.0F / sqrtf( store[X + 1] );
store[X + 2] = 1.0F / sqrtf( store[X + 2] );
@@ -878,11 +837,8 @@ static void XSTDCALL
sin4f(
float *store )
{
-#ifdef WIN32
const unsigned X = 0;
-#else
- const unsigned X = TEMP_R0 * 16;
-#endif
+
store[X + 0] = sinf( store[X + 0] );
store[X + 1] = sinf( store[X + 1] );
store[X + 2] = sinf( store[X + 2] );
@@ -1234,11 +1190,16 @@ emit_instruction(
switch( inst->Instruction.Opcode ) {
case TGSI_OPCODE_ARL:
+#if 0
+ /* XXX this isn't working properly (see glean vertProg1 test) */
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( func, *inst, 0, 0, chan_index );
emit_f2it( func, 0 );
STORE( func, *inst, 0, 0, chan_index );
}
+#else
+ return 0;
+#endif
break;
case TGSI_OPCODE_MOV:
@@ -2029,40 +1990,40 @@ emit_declaration(
}
}
-static void aos_to_soa( struct x86_function *func, uint aos, uint soa, uint num, uint stride )
+static void aos_to_soa( struct x86_function *func,
+ uint arg_aos,
+ uint arg_soa,
+ uint arg_num,
+ uint arg_stride )
{
- struct x86_reg soa_input;
- struct x86_reg aos_input;
- struct x86_reg num_inputs;
- struct x86_reg temp;
- unsigned char *inner_loop;
+ struct x86_reg soa_input = x86_make_reg( file_REG32, reg_AX );
+ struct x86_reg aos_input = x86_make_reg( file_REG32, reg_BX );
+ struct x86_reg num_inputs = x86_make_reg( file_REG32, reg_CX );
+ struct x86_reg stride = x86_make_reg( file_REG32, reg_DX );
+ int inner_loop;
- soa_input = x86_make_reg( file_REG32, reg_AX );
- aos_input = x86_make_reg( file_REG32, reg_BX );
- num_inputs = x86_make_reg( file_REG32, reg_CX );
- temp = x86_make_reg( file_REG32, reg_DX );
/* Save EBX */
x86_push( func, x86_make_reg( file_REG32, reg_BX ) );
- x86_mov( func, soa_input, get_argument( soa + 1 ) );
- x86_mov( func, aos_input, get_argument( aos + 1 ) );
- x86_mov( func, num_inputs, get_argument( num + 1 ) );
+ x86_mov( func, aos_input, x86_fn_arg( func, arg_aos ) );
+ x86_mov( func, soa_input, x86_fn_arg( func, arg_soa ) );
+ x86_mov( func, num_inputs, x86_fn_arg( func, arg_num ) );
+ x86_mov( func, stride, x86_fn_arg( func, arg_stride ) );
/* do */
inner_loop = x86_get_label( func );
{
- x86_mov( func, temp, get_argument( stride + 1 ) );
x86_push( func, aos_input );
sse_movlps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) );
sse_movlps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) );
- x86_add( func, aos_input, temp );
+ x86_add( func, aos_input, stride );
sse_movhps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) );
sse_movhps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) );
- x86_add( func, aos_input, temp );
+ x86_add( func, aos_input, stride );
sse_movlps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) );
sse_movlps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) );
- x86_add( func, aos_input, temp );
+ x86_add( func, aos_input, stride );
sse_movhps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) );
sse_movhps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) );
x86_pop( func, aos_input );
@@ -2088,7 +2049,7 @@ static void aos_to_soa( struct x86_function *func, uint aos, uint soa, uint num,
x86_jcc( func, cc_NE, inner_loop );
/* Restore EBX */
- x86_pop( func, x86_make_reg( file_REG32, reg_BX ) );
+ x86_pop( func, aos_input );
}
static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num, uint stride )
@@ -2097,7 +2058,7 @@ static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num,
struct x86_reg aos_output;
struct x86_reg num_outputs;
struct x86_reg temp;
- unsigned char *inner_loop;
+ int inner_loop;
soa_output = x86_make_reg( file_REG32, reg_AX );
aos_output = x86_make_reg( file_REG32, reg_BX );
@@ -2105,11 +2066,11 @@ static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num,
temp = x86_make_reg( file_REG32, reg_DX );
/* Save EBX */
- x86_push( func, x86_make_reg( file_REG32, reg_BX ) );
+ x86_push( func, aos_output );
- x86_mov( func, soa_output, get_argument( soa + 1 ) );
- x86_mov( func, aos_output, get_argument( aos + 1 ) );
- x86_mov( func, num_outputs, get_argument( num + 1 ) );
+ x86_mov( func, soa_output, x86_fn_arg( func, soa ) );
+ x86_mov( func, aos_output, x86_fn_arg( func, aos ) );
+ x86_mov( func, num_outputs, x86_fn_arg( func, num ) );
/* do */
inner_loop = x86_get_label( func );
@@ -2126,7 +2087,7 @@ static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num,
sse_unpcklps( func, make_xmm( 3 ), make_xmm( 4 ) );
sse_unpckhps( func, make_xmm( 5 ), make_xmm( 4 ) );
- x86_mov( func, temp, get_argument( stride + 1 ) );
+ x86_mov( func, temp, x86_fn_arg( func, stride ) );
x86_push( func, aos_output );
sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) );
sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) );
@@ -2150,7 +2111,7 @@ static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num,
x86_jcc( func, cc_NE, inner_loop );
/* Restore EBX */
- x86_pop( func, x86_make_reg( file_REG32, reg_BX ) );
+ x86_pop( func, aos_output );
}
/**
@@ -2185,6 +2146,17 @@ tgsi_emit_sse2(
tgsi_parse_init( &parse, tokens );
+ /* Can't just use EDI, EBX without save/restoring them:
+ */
+ x86_push(
+ func,
+ get_immediate_base() );
+
+ x86_push(
+ func,
+ get_temp_base() );
+
+
/*
* Different function args for vertex/fragment shaders:
*/
@@ -2193,51 +2165,55 @@ tgsi_emit_sse2(
x86_mov(
func,
get_input_base(),
- get_argument( 0 ) );
+ x86_fn_arg( func, 1 ) );
/* skipping outputs argument here */
x86_mov(
func,
get_const_base(),
- get_argument( 2 ) );
+ x86_fn_arg( func, 3 ) );
x86_mov(
func,
get_temp_base(),
- get_argument( 3 ) );
+ x86_fn_arg( func, 4 ) );
x86_mov(
func,
get_coef_base(),
- get_argument( 4 ) );
+ x86_fn_arg( func, 5 ) );
x86_mov(
func,
get_immediate_base(),
- get_argument( 5 ) );
+ x86_fn_arg( func, 6 ) );
}
else {
assert(parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX);
if (do_swizzles)
- aos_to_soa( func, 5, 0, 6, 7 );
+ aos_to_soa( func,
+ 6, /* aos_input */
+ 1, /* machine->input */
+ 7, /* num_inputs */
+ 8 ); /* input_stride */
x86_mov(
func,
get_input_base(),
- get_argument( 0 ) );
+ x86_fn_arg( func, 1 ) );
x86_mov(
func,
get_output_base(),
- get_argument( 1 ) );
+ x86_fn_arg( func, 2 ) );
x86_mov(
func,
get_const_base(),
- get_argument( 2 ) );
+ x86_fn_arg( func, 3 ) );
x86_mov(
func,
get_temp_base(),
- get_argument( 3 ) );
+ x86_fn_arg( func, 4 ) );
x86_mov(
func,
get_immediate_base(),
- get_argument( 4 ) );
+ x86_fn_arg( func, 5 ) );
}
while( !tgsi_parse_end_of_tokens( &parse ) && ok ) {
@@ -2260,7 +2236,7 @@ tgsi_emit_sse2(
x86_mov(
func,
get_output_base(),
- get_argument( 1 ) );
+ x86_fn_arg( func, 2 ) );
}
}
@@ -2307,9 +2283,19 @@ tgsi_emit_sse2(
if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX) {
if (do_swizzles)
- soa_to_aos( func, 8, 1, 9, 10 );
+ soa_to_aos( func, 9, 2, 10, 11 );
}
+ /* Can't just use EBX, EDI without save/restoring them:
+ */
+ x86_pop(
+ func,
+ get_temp_base() );
+
+ x86_pop(
+ func,
+ get_immediate_base() );
+
#ifdef WIN32
emit_retw( func, 16 );
#else