summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xsrc/mesa/pipe/tgsi/exec/tgsi_sse2.c2690
-rw-r--r--src/mesa/x86/rtasm/x86sse.c54
-rw-r--r--src/mesa/x86/rtasm/x86sse.h7
3 files changed, 1400 insertions, 1351 deletions
diff --git a/src/mesa/pipe/tgsi/exec/tgsi_sse2.c b/src/mesa/pipe/tgsi/exec/tgsi_sse2.c
index 36d9b86f75..359775fdfb 100755
--- a/src/mesa/pipe/tgsi/exec/tgsi_sse2.c
+++ b/src/mesa/pipe/tgsi/exec/tgsi_sse2.c
@@ -1,7 +1,8 @@
#include "tgsi_platform.h"
#include "tgsi_core.h"
+#include "x86/rtasm/x86sse.h"
-#if defined USE_X86_ASM
+#if defined(USE_X86_ASM) || defined(SLANG_X86)
#define FOR_EACH_CHANNEL( CHAN )\
for( CHAN = 0; CHAN < 4; CHAN++ )
@@ -181,148 +182,257 @@ emit_tempf(
}
static void
-emit_temps (struct x86_function *func,
- unsigned xmm,
- unsigned vec,
- unsigned chan)
+emit_temps (
+ struct x86_function *func,
+ unsigned xmm,
+ unsigned vec,
+ unsigned chan )
{
- sse_movaps (func,
- get_temp (vec, chan),
- make_xmm (xmm));
+ sse_movaps(
+ func,
+ get_temp( vec, chan ),
+ make_xmm( xmm ) );
}
static void
-emit_addrf( struct x86_function *func,
- unsigned xmm,
- unsigned vec,
- unsigned chan )
+emit_addrf(
+ struct x86_function *func,
+ unsigned xmm,
+ unsigned vec,
+ unsigned chan )
{
- emit_tempf( func, xmm, vec + TGSI_EXEC_NUM_TEMPS, chan );
+ emit_tempf(
+ func,
+ xmm,
+ vec + TGSI_EXEC_NUM_TEMPS,
+ chan );
}
static void
-emit_addrs( struct x86_function *func,
- unsigned xmm,
- unsigned vec,
- unsigned chan )
+emit_addrs(
+ struct x86_function *func,
+ unsigned xmm,
+ unsigned vec,
+ unsigned chan )
{
- emit_temps( func, xmm, vec + TGSI_EXEC_NUM_TEMPS, chan );
+ emit_temps(
+ func,
+ xmm,
+ vec + TGSI_EXEC_NUM_TEMPS,
+ chan );
}
static void
-emit_abs (struct x86_function *func,
- unsigned xmm)
+emit_abs(
+ struct x86_function *func,
+ unsigned xmm )
{
- sse_andps (func,
- make_xmm (xmm),
- get_temp (TGSI_EXEC_TEMP_7FFFFFFF_I,
- TGSI_EXEC_TEMP_7FFFFFFF_C));
+ sse_andps(
+ func,
+ make_xmm( xmm ),
+ get_temp(
+ TGSI_EXEC_TEMP_7FFFFFFF_I,
+ TGSI_EXEC_TEMP_7FFFFFFF_C ) );
}
static void
-emit_neg (struct x86_function *func,
- unsigned xmm)
+emit_neg(
+ struct x86_function *func,
+ unsigned xmm )
{
- sse_xorps (func,
- make_xmm (xmm),
- get_temp (TGSI_EXEC_TEMP_80000000_I,
- TGSI_EXEC_TEMP_80000000_C));
+ sse_xorps(
+ func,
+ make_xmm( xmm ),
+ get_temp(
+ TGSI_EXEC_TEMP_80000000_I,
+ TGSI_EXEC_TEMP_80000000_C ) );
}
static void
-emit_setsign (struct x86_function *func,
- unsigned xmm)
+emit_setsign(
+ struct x86_function *func,
+ unsigned xmm )
{
- sse_orps (func,
- make_xmm (xmm),
- get_temp (TGSI_EXEC_TEMP_80000000_I,
- TGSI_EXEC_TEMP_80000000_C));
+ sse_orps(
+ func,
+ make_xmm( xmm ),
+ get_temp(
+ TGSI_EXEC_TEMP_80000000_I,
+ TGSI_EXEC_TEMP_80000000_C ) );
}
static void
-emit_add (struct x86_function *func,
- unsigned xmm_dst,
- unsigned xmm_src)
+emit_add(
+ struct x86_function *func,
+ unsigned xmm_dst,
+ unsigned xmm_src )
{
- sse_addps (func,
- make_xmm (xmm_dst),
- make_xmm (xmm_src));
+ sse_addps(
+ func,
+ make_xmm( xmm_dst ),
+ make_xmm( xmm_src ) );
}
-static GLfloat g_cos_storage[4 + 3];
+static void
+emit_push_abcd(
+ struct x86_function *func )
+{
+ x86_push(
+ func,
+ x86_make_reg( file_REG32, reg_AX ) );
+ x86_push(
+ func,
+ x86_make_reg( file_REG32, reg_BX ) );
+ x86_push(
+ func,
+ x86_make_reg( file_REG32, reg_CX ) );
+ x86_push(
+ func,
+ x86_make_reg( file_REG32, reg_DX ) );
+}
static void
-cos4f (void)
+emit_pop_dcba(
+ struct x86_function *func )
{
- GLfloat *store = (GLfloat *) ALIGN16((unsigned) g_cos_storage);
+ x86_pop(
+ func,
+ x86_make_reg( file_REG32, reg_DX ) );
+ x86_pop(
+ func,
+ x86_make_reg( file_REG32, reg_CX ) );
+ x86_pop(
+ func,
+ x86_make_reg( file_REG32, reg_BX ) );
+ x86_pop(
+ func,
+ x86_make_reg( file_REG32, reg_AX ) );
+}
+
+static void
+emit_func_call1(
+ struct x86_function *func,
+ unsigned xmm_dst,
+ unsigned storage,
+ unsigned char *code )
+{
+ x86_push(
+ func,
+ x86_make_reg( file_REG32, reg_AX ) );
+ x86_mov_reg_imm(
+ func,
+ x86_make_reg( file_REG32, reg_AX ),
+ storage );
+ sse_movaps(
+ func,
+ x86_deref( x86_make_reg( file_REG32, reg_AX ) ),
+ make_xmm( xmm_dst ) );
+ emit_push_abcd(
+ func );
+ x86_call(
+ func,
+ code );
+ emit_pop_dcba(
+ func );
+ sse_movaps(
+ func,
+ make_xmm( xmm_dst ),
+ x86_deref( x86_make_reg( file_REG32, reg_AX ) ) );
+ x86_pop(
+ func,
+ x86_make_reg( file_REG32, reg_AX ) );
+}
+
+static void
+emit_func_call2(
+ struct x86_function *func,
+ unsigned xmm_dst,
+ unsigned xmm_src,
+ unsigned storage,
+ unsigned char *code )
+{
+ x86_push(
+ func,
+ x86_make_reg( file_REG32, reg_AX ) );
+ x86_mov_reg_imm(
+ func,
+ x86_make_reg( file_REG32, reg_AX ),
+ storage );
+ sse_movaps(
+ func,
+ x86_deref( x86_make_reg( file_REG32, reg_AX ) ),
+ make_xmm( xmm_dst ) );
+ sse_movaps(
+ func,
+ x86_make_disp( x86_make_reg( file_REG32, reg_AX ), 16 ),
+ make_xmm( xmm_src ) );
+ emit_push_abcd(
+ func );
+ x86_call(
+ func,
+ code );
+ emit_pop_dcba(
+ func );
+ sse_movaps(
+ func,
+ make_xmm( xmm_dst ),
+ x86_deref( x86_make_reg( file_REG32, reg_AX ) ) );
+ x86_pop(
+ func,
+ x86_make_reg( file_REG32, reg_AX ) );
+}
+
+/* XXX: move into machine context */
+static float g_cos_storage[4 + 3];
+
+static void
+cos4f( void )
+{
+ float *store = (float *) ALIGN16( (unsigned) g_cos_storage );
#ifdef WIN32
- store[0] = (GLfloat) cos ((GLdouble) store[0]);
- store[1] = (GLfloat) cos ((GLdouble) store[1]);
- store[2] = (GLfloat) cos ((GLdouble) store[2]);
- store[3] = (GLfloat) cos ((GLdouble) store[3]);
+ store[0] = (float) cos( (double) store[0] );
+ store[1] = (float) cos( (double) store[1] );
+ store[2] = (float) cos( (double) store[2] );
+ store[3] = (float) cos( (double) store[3] );
#else
- store[0] = cosf (store[0]);
- store[1] = cosf (store[1]);
- store[2] = cosf (store[2]);
- store[3] = cosf (store[3]);
+ store[0] = cosf( store[0] );
+ store[1] = cosf( store[1] );
+ store[2] = cosf( store[2] );
+ store[3] = cosf( store[3] );
#endif
}
static void
-emit_cos (struct x86_function *func,
- unsigned xmm_dst)
+emit_cos(
+ struct x86_function *func,
+ unsigned xmm_dst )
{
- x86_push (func,
- x86_make_reg (file_REG32, reg_AX));
- x86_mov_reg_imm (func,
- x86_make_reg (file_REG32, reg_AX),
- ALIGN16((GLint) g_cos_storage));
- sse_movaps (func,
- x86_deref (x86_make_reg (file_REG32, reg_AX)),
- make_xmm (xmm_dst));
- x86_push (func,
- x86_make_reg (file_REG32, reg_AX));
- x86_push (func,
- x86_make_reg (file_REG32, reg_BX));
- x86_push (func,
- x86_make_reg (file_REG32, reg_CX));
- x86_push (func,
- x86_make_reg (file_REG32, reg_DX));
- x86_call (func,
- (GLubyte *) cos4f);
- x86_pop (func,
- x86_make_reg (file_REG32, reg_DX));
- x86_pop (func,
- x86_make_reg (file_REG32, reg_CX));
- x86_pop (func,
- x86_make_reg (file_REG32, reg_BX));
- x86_pop (func,
- x86_make_reg (file_REG32, reg_AX));
- sse_movaps (func,
- make_xmm (xmm_dst),
- x86_deref (x86_make_reg (file_REG32, reg_AX)));
- x86_pop (func,
- x86_make_reg (file_REG32, reg_AX));
+ emit_func_call1(
+ func,
+ xmm_dst,
+ ALIGN16( (unsigned) g_cos_storage ),
+ (unsigned char *) cos4f );
}
-static GLfloat g_sin_storage[4 + 3];
+/* XXX: move into machine context */
+static float g_sin_storage[4 + 3];
static void
-sin4f (void)
+sin4f( void )
{
- GLfloat *store = (GLfloat *) ALIGN16((unsigned) g_sin_storage);
+ float *store = (float *) ALIGN16( (unsigned) g_sin_storage );
#ifdef WIN32
- store[0] = (GLfloat) sin ((GLdouble) store[0]);
- store[1] = (GLfloat) sin ((GLdouble) store[1]);
- store[2] = (GLfloat) sin ((GLdouble) store[2]);
- store[3] = (GLfloat) sin ((GLdouble) store[3]);
+ store[0] = (float) sin( (double) store[0] );
+ store[1] = (float) sin( (double) store[1] );
+ store[2] = (float) sin( (double) store[2] );
+ store[3] = (float) sin( (double) store[3] );
#else
- store[0] = sin (store[0]);
- store[1] = sin (store[1]);
- store[2] = sin (store[2]);
- store[3] = sin (store[3]);
+ store[0] = sinf( store[0] );
+ store[1] = sinf( store[1] );
+ store[2] = sinf( store[2] );
+ store[3] = sinf( store[3] );
#endif
}
@@ -330,47 +440,23 @@ static void
emit_sin (struct x86_function *func,
unsigned xmm_dst)
{
- x86_push (func,
- x86_make_reg (file_REG32, reg_AX));
- x86_mov_reg_imm (func,
- x86_make_reg (file_REG32, reg_AX),
- ALIGN16((GLint) g_sin_storage));
- sse_movaps (func,
- x86_deref (x86_make_reg (file_REG32, reg_AX)),
- make_xmm (xmm_dst));
- x86_push (func,
- x86_make_reg (file_REG32, reg_AX));
- x86_push (func,
- x86_make_reg (file_REG32, reg_BX));
- x86_push (func,
- x86_make_reg (file_REG32, reg_CX));
- x86_push (func,
- x86_make_reg (file_REG32, reg_DX));
- x86_call (func,
- (GLubyte *) sin4f);
- x86_pop (func,
- x86_make_reg (file_REG32, reg_DX));
- x86_pop (func,
- x86_make_reg (file_REG32, reg_CX));
- x86_pop (func,
- x86_make_reg (file_REG32, reg_BX));
- x86_pop (func,
- x86_make_reg (file_REG32, reg_AX));
- sse_movaps (func,
- make_xmm (xmm_dst),
- x86_deref (x86_make_reg (file_REG32, reg_AX)));
- x86_pop (func,
- x86_make_reg (file_REG32, reg_AX));
+ emit_func_call1(
+ func,
+ xmm_dst,
+ ALIGN16( (unsigned) g_sin_storage ),
+ (unsigned char *) sin4f );
}
static void
-emit_mov (struct x86_function *func,
- unsigned xmm_dst,
- unsigned xmm_src)
+emit_mov(
+ struct x86_function *func,
+ unsigned xmm_dst,
+ unsigned xmm_src )
{
- sse_movups (func,
- make_xmm (xmm_dst),
- make_xmm (xmm_src));
+ sse_movups(
+ func,
+ make_xmm( xmm_dst ),
+ make_xmm( xmm_src ) );
}
static void
@@ -378,1353 +464,1255 @@ emit_mul (struct x86_function *func,
unsigned xmm_dst,
unsigned xmm_src)
{
- sse_mulps (func,
- make_xmm (xmm_dst),
- make_xmm (xmm_src));
+ sse_mulps(
+ func,
+ make_xmm( xmm_dst ),
+ make_xmm( xmm_src ) );
}
-static GLfloat g_pow_storage[4 + 4 + 3];
+/* XXX: move into machine context */
+static float g_pow_storage[4 + 4 + 3];
static void
-pow4f (void)
+pow4f( void )
{
- GLfloat *store = (GLfloat *) ALIGN16((unsigned) g_pow_storage);
+ float *store = (float *) ALIGN16( (unsigned) g_pow_storage );
#ifdef WIN32
- store[0] = (GLfloat) pow ((GLdouble) store[0], (GLdouble) store[4]);
- store[1] = (GLfloat) pow ((GLdouble) store[1], (GLdouble) store[5]);
- store[2] = (GLfloat) pow ((GLdouble) store[2], (GLdouble) store[6]);
- store[3] = (GLfloat) pow ((GLdouble) store[3], (GLdouble) store[7]);
+ store[0] = (float) pow( (double) store[0], (double) store[4] );
+ store[1] = (float) pow( (double) store[1], (double) store[5] );
+ store[2] = (float) pow( (double) store[2], (double) store[6] );
+ store[3] = (float) pow( (double) store[3], (double) store[7] );
#else
- store[0] = powf (store[0], store[4]);
- store[1] = powf (store[1], store[5]);
- store[2] = powf (store[2], store[6]);
- store[3] = powf (store[3], store[7]);
+ store[0] = powf( store[0], store[4] );
+ store[1] = powf( store[1], store[5] );
+ store[2] = powf( store[2], store[6] );
+ store[3] = powf( store[3], store[7] );
#endif
}
static void
-emit_pow (struct x86_function *func,
- unsigned xmm_dst,
- unsigned xmm_src)
+emit_pow(
+ struct x86_function *func,
+ unsigned xmm_dst,
+ unsigned xmm_src )
{
- x86_push (func,
- x86_make_reg (file_REG32, reg_AX));
- x86_mov_reg_imm (func,
- x86_make_reg (file_REG32, reg_AX),
- ALIGN16((GLint) g_pow_storage));
- sse_movaps (func,
- x86_make_disp (x86_make_reg (file_REG32, reg_AX), 0),
- make_xmm (xmm_dst));
- sse_movaps (func,
- x86_make_disp (x86_make_reg (file_REG32, reg_AX), 16),
- make_xmm (xmm_src));
- x86_push (func,
- x86_make_reg (file_REG32, reg_AX));
- x86_push (func,
- x86_make_reg (file_REG32, reg_BX));
- x86_push (func,
- x86_make_reg (file_REG32, reg_CX));
- x86_push (func,
- x86_make_reg (file_REG32, reg_DX));
- x86_call (func,
- (GLubyte *) pow4f);
- x86_pop (func,
- x86_make_reg (file_REG32, reg_DX));
- x86_pop (func,
- x86_make_reg (file_REG32, reg_CX));
- x86_pop (func,
- x86_make_reg (file_REG32, reg_BX));
- x86_pop (func,
- x86_make_reg (file_REG32, reg_AX));
- sse_movaps (func,
- make_xmm (xmm_dst),
- x86_deref (x86_make_reg (file_REG32, reg_AX)));
- x86_pop (func,
- x86_make_reg (file_REG32, reg_AX));
+ emit_func_call2(
+ func,
+ xmm_dst,
+ xmm_src,
+ ALIGN16( (unsigned) g_pow_storage ),
+ (unsigned char *) pow4f );
}
-static GLfloat g_ex2_storage[4 + 3];
+/* XXX: move into machine context */
+static float g_ex2_storage[4 + 3];
static void
-ex24f (void)
+ex24f( void )
{
- GLfloat *store = (GLfloat *) ALIGN16((unsigned) g_ex2_storage);
+ float *store = (float *) ALIGN16( (unsigned) g_ex2_storage );
- store[0] = (GLfloat) pow (2.0, (GLdouble) store[0]);
- store[1] = (GLfloat) pow (2.0, (GLdouble) store[1]);
- store[2] = (GLfloat) pow (2.0, (GLdouble) store[2]);
- store[3] = (GLfloat) pow (2.0, (GLdouble) store[3]);
+#ifdef WIN32
+ store[0] = (float) pow( 2.0, (double) store[0] );
+ store[1] = (float) pow( 2.0, (double) store[1] );
+ store[2] = (float) pow( 2.0, (double) store[2] );
+ store[3] = (float) pow( 2.0, (double) store[3] );
+#else
+ store[0] = powf( 2.0f, store[0] );
+ store[1] = powf( 2.0f, store[1] );
+ store[2] = powf( 2.0f, store[2] );
+ store[3] = powf( 2.0f, store[3] );
+#endif
}
static void
-emit_ex2 (struct x86_function *func,
- unsigned xmm_dst)
+emit_ex2(
+ struct x86_function *func,
+ unsigned xmm_dst )
{
- x86_push (func,
- x86_make_reg (file_REG32, reg_AX));
- x86_mov_reg_imm (func,
- x86_make_reg (file_REG32, reg_AX),
- ALIGN16((GLint) g_ex2_storage));
- sse_movaps (func,
- x86_deref (x86_make_reg (file_REG32, reg_AX)),
- make_xmm (xmm_dst));
- x86_push (func,
- x86_make_reg (file_REG32, reg_AX));
- x86_push (func,
- x86_make_reg (file_REG32, reg_BX));
- x86_push (func,
- x86_make_reg (file_REG32, reg_CX));
- x86_push (func,
- x86_make_reg (file_REG32, reg_DX));
- x86_call (func,
- (GLubyte *) ex24f);
- x86_pop (func,
- x86_make_reg (file_REG32, reg_DX));
- x86_pop (func,
- x86_make_reg (file_REG32, reg_CX));
- x86_pop (func,
- x86_make_reg (file_REG32, reg_BX));
- x86_pop (func,
- x86_make_reg (file_REG32, reg_AX));
- sse_movaps (func,
- make_xmm (xmm_dst),
- x86_deref (x86_make_reg (file_REG32, reg_AX)));
- x86_pop (func,
- x86_make_reg (file_REG32, reg_AX));
+ emit_func_call1(
+ func,
+ xmm_dst,
+ ALIGN16( (unsigned) g_ex2_storage ),
+ (unsigned char *) ex24f );
}
-static GLfloat g_lg2_storage[4 + 3];
+/* XXX: move into machine context */
+static float g_lg2_storage[4 + 3];
static void
-lg24f (void)
+lg24f( void )
{
- GLfloat *store = (GLfloat *) ALIGN16((unsigned) g_lg2_storage);
+ float *store = (float *) ALIGN16( (unsigned) g_lg2_storage );
- store[0] = LOG2 (store[0]);
- store[1] = LOG2 (store[1]);
- store[2] = LOG2 (store[2]);
- store[3] = LOG2 (store[3]);
+ store[0] = LOG2( store[0] );
+ store[1] = LOG2( store[1] );
+ store[2] = LOG2( store[2] );
+ store[3] = LOG2( store[3] );
}
static void
-emit_lg2 (struct x86_function *func,
- unsigned xmm_dst)
+emit_lg2(
+ struct x86_function *func,
+ unsigned xmm_dst )
{
- x86_push (func,
- x86_make_reg (file_REG32, reg_AX));
- x86_mov_reg_imm (func,
- x86_make_reg (file_REG32, reg_AX),
- ALIGN16((GLint) g_lg2_storage));
- sse_movaps (func,
- x86_deref (x86_make_reg (file_REG32, reg_AX)),
- make_xmm (xmm_dst));
- x86_push (func,
- x86_make_reg (file_REG32, reg_AX));
- x86_push (func,
- x86_make_reg (file_REG32, reg_BX));
- x86_push (func,
- x86_make_reg (file_REG32, reg_CX));
- x86_push (func,
- x86_make_reg (file_REG32, reg_DX));
- x86_call (func,
- (GLubyte *) lg24f);
- x86_pop (func,
- x86_make_reg (file_REG32, reg_DX));
- x86_pop (func,
- x86_make_reg (file_REG32, reg_CX));
- x86_pop (func,
- x86_make_reg (file_REG32, reg_BX));
- x86_pop (func,
- x86_make_reg (file_REG32, reg_AX));
- sse_movaps (func,
- make_xmm (xmm_dst),
- x86_deref (x86_make_reg (file_REG32, reg_AX)));
- x86_pop (func,
- x86_make_reg (file_REG32, reg_AX));
+ emit_func_call1(
+ func,
+ xmm_dst,
+ ALIGN16( (unsigned) g_lg2_storage ),
+ (unsigned char *) lg24f );
}
-static GLfloat g_flr_storage[4 + 3];
+/* XXX: move into machine context */
+static float g_flr_storage[4 + 3];
static void
-flr4f (void)
+flr4f( void )
{
- GLfloat *store = (GLfloat *) ALIGN16((unsigned) g_flr_storage);
+ float *store = (float *) ALIGN16( (unsigned) g_flr_storage );
- store[0] = (GLfloat) floor ((GLdouble) store[0]);
- store[1] = (GLfloat) floor ((GLdouble) store[1]);
- store[2] = (GLfloat) floor ((GLdouble) store[2]);
- store[3] = (GLfloat) floor ((GLdouble) store[3]);
+ store[0] = (float) floor( (double) store[0] );
+ store[1] = (float) floor( (double) store[1] );
+ store[2] = (float) floor( (double) store[2] );
+ store[3] = (float) floor( (double) store[3] );
}
static void
-emit_flr (struct x86_function *func,
- unsigned xmm_dst)
+emit_flr(
+ struct x86_function *func,
+ unsigned xmm_dst )
{
- x86_push (func,
- x86_make_reg (file_REG32, reg_AX));
- x86_mov_reg_imm (func,
- x86_make_reg (file_REG32, reg_AX),
- ALIGN16((GLint) g_flr_storage));
- sse_movaps (func,
- x86_deref (x86_make_reg (file_REG32, reg_AX)),
- make_xmm (xmm_dst));
- x86_push (func,
- x86_make_reg (file_REG32, reg_AX));
- x86_push (func,
- x86_make_reg (file_REG32, reg_BX));
- x86_push (func,
- x86_make_reg (file_REG32, reg_CX));
- x86_push (func,
- x86_make_reg (file_REG32, reg_DX));
- x86_call (func,
- (GLubyte *) flr4f);
- x86_pop (func,
- x86_make_reg (file_REG32, reg_DX));
- x86_pop (func,
- x86_make_reg (file_REG32, reg_CX));
- x86_pop (func,
- x86_make_reg (file_REG32, reg_BX));
- x86_pop (func,
- x86_make_reg (file_REG32, reg_AX));
- sse_movaps (func,
- make_xmm (xmm_dst),
- x86_deref (x86_make_reg (file_REG32, reg_AX)));
- x86_pop (func,
- x86_make_reg (file_REG32, reg_AX));
+ emit_func_call1(
+ func,
+ xmm_dst,
+ ALIGN16( (unsigned) g_flr_storage ),
+ (unsigned char *) flr4f );
}
-static GLfloat g_frc_storage[4 + 3];
+/* XXX: move into machine context */
+static float g_frc_storage[4 + 3];
static void
-frc4f (void)
+frc4f( void )
{
- GLfloat *store = (GLfloat *) ALIGN16((unsigned) g_frc_storage);
+ float *store = (float *) ALIGN16( (unsigned) g_frc_storage );
- store[0] -= (GLfloat) floor ((GLdouble) store[0]);
- store[1] -= (GLfloat) floor ((GLdouble) store[1]);
- store[2] -= (GLfloat) floor ((GLdouble) store[2]);
- store[3] -= (GLfloat) floor ((GLdouble) store[3]);
+ store[0] -= (float) floor( (double) store[0] );
+ store[1] -= (float) floor( (double) store[1] );
+ store[2] -= (float) floor( (double) store[2] );
+ store[3] -= (float) floor( (double) store[3] );
}
static void
-emit_frc (struct x86_function *func,
- unsigned xmm_dst)
+emit_frc(
+ struct x86_function *func,
+ unsigned xmm_dst )
{
- x86_push (func,
- x86_make_reg (file_REG32, reg_AX));
- x86_mov_reg_imm (func,
- x86_make_reg (file_REG32, reg_AX),
- ALIGN16((GLint) g_frc_storage));
- sse_movaps (func,
- x86_deref (x86_make_reg (file_REG32, reg_AX)),
- make_xmm (xmm_dst));
- x86_push (func,
- x86_make_reg (file_REG32, reg_AX));
- x86_push (func,
- x86_make_reg (file_REG32, reg_BX));
- x86_push (func,
- x86_make_reg (file_REG32, reg_CX));
- x86_push (func,
- x86_make_reg (file_REG32, reg_DX));
- x86_call (func,
- (GLubyte *) frc4f);
- x86_pop (func,
- x86_make_reg (file_REG32, reg_DX));
- x86_pop (func,
- x86_make_reg (file_REG32, reg_CX));
- x86_pop (func,
- x86_make_reg (file_REG32, reg_BX));
- x86_pop (func,
- x86_make_reg (file_REG32, reg_AX));
- sse_movaps (func,
- make_xmm (xmm_dst),
- x86_deref (x86_make_reg (file_REG32, reg_AX)));
- x86_pop (func,
- x86_make_reg (file_REG32, reg_AX));
+ emit_func_call1(
+ func,
+ xmm_dst,
+ ALIGN16( (unsigned) g_frc_storage ),
+ (unsigned char *) frc4f );
}
static void
-emit_rcp (struct x86_function *func,
- unsigned xmm_dst,
- unsigned xmm_src)
+emit_rcp (
+ struct x86_function *func,
+ unsigned xmm_dst,
+ unsigned xmm_src )
{
- sse2_rcpps (func,
- make_xmm (xmm_dst),
- make_xmm (xmm_src));
+ sse2_rcpps(
+ func,
+ make_xmm( xmm_dst ),
+ make_xmm( xmm_src ) );
}
static void
-emit_rsqrt (struct x86_function *func,
- unsigned xmm_dst,
- unsigned xmm_src)
+emit_rsqrt(
+ struct x86_function *func,
+ unsigned xmm_dst,
+ unsigned xmm_src )
{
- sse_rsqrtps (func,
- make_xmm (xmm_dst),
- make_xmm (xmm_src));
+ sse_rsqrtps(
+ func,
+ make_xmm( xmm_dst ),
+ make_xmm( xmm_src ) );
}
static void
-emit_sub (struct x86_function *func,
- unsigned xmm_dst,
- unsigned xmm_src)
+emit_sub(
+ struct x86_function *func,
+ unsigned xmm_dst,
+ unsigned xmm_src )
{
- sse_subps (func,
- make_xmm (xmm_dst),
- make_xmm (xmm_src));
+ sse_subps(
+ func,
+ make_xmm( xmm_dst ),
+ make_xmm( xmm_src ) );
}
static void
-emit_fetch (struct x86_function *func,
- unsigned xmm,
- const struct tgsi_full_src_register *reg,
- const unsigned chan_index)
+emit_fetch(
+ struct x86_function *func,
+ unsigned xmm,
+ const struct tgsi_full_src_register *reg,
+ const unsigned chan_index )
{
- unsigned swizzle = tgsi_util_get_full_src_register_extswizzle (reg, chan_index);
-
- switch (swizzle)
- {
- case TGSI_EXTSWIZZLE_X:
- case TGSI_EXTSWIZZLE_Y:
- case TGSI_EXTSWIZZLE_Z:
- case TGSI_EXTSWIZZLE_W:
- switch (reg->SrcRegister.File)
- {
- case TGSI_FILE_CONSTANT:
- emit_const (func, xmm, reg->SrcRegister.Index, swizzle);
- break;
-
- case TGSI_FILE_INPUT:
- emit_input (func, xmm, reg->SrcRegister.Index, swizzle);
- break;
-
- case TGSI_FILE_TEMPORARY:
- emit_tempf (func, xmm, reg->SrcRegister.Index, swizzle);
- break;
-
- default:
- assert (0);
- }
- break;
-
- case TGSI_EXTSWIZZLE_ZERO:
- emit_tempf (func,
- xmm,
- TGSI_EXEC_TEMP_00000000_I,
- TGSI_EXEC_TEMP_00000000_C);
- break;
-
- case TGSI_EXTSWIZZLE_ONE:
- emit_tempf (func,
- xmm,
- TGSI_EXEC_TEMP_ONE_I,
- TGSI_EXEC_TEMP_ONE_C);
- break;
-
- default:
- assert (0);
- }
-
- switch (tgsi_util_get_full_src_register_sign_mode (reg, chan_index))
- {
- case TGSI_UTIL_SIGN_CLEAR:
- emit_abs (func, xmm);
- break;
-
- case TGSI_UTIL_SIGN_SET:
- emit_setsign (func, xmm);
- break;
-
- case TGSI_UTIL_SIGN_TOGGLE:
- emit_neg (func, xmm);
- break;
-
- case TGSI_UTIL_SIGN_KEEP:
- break;
- }
+ unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
+
+ switch( swizzle ) {
+ case TGSI_EXTSWIZZLE_X:
+ case TGSI_EXTSWIZZLE_Y:
+ case TGSI_EXTSWIZZLE_Z:
+ case TGSI_EXTSWIZZLE_W:
+ switch( reg->SrcRegister.File ) {
+ case TGSI_FILE_CONSTANT:
+ emit_const(
+ func,
+ xmm,
+ reg->SrcRegister.Index,
+ swizzle );
+ break;
+
+ case TGSI_FILE_INPUT:
+ emit_input(
+ func,
+ xmm,
+ reg->SrcRegister.Index,
+ swizzle );
+ break;
+
+ case TGSI_FILE_TEMPORARY:
+ emit_tempf(
+ func,
+ xmm,
+ reg->SrcRegister.Index,
+ swizzle );
+ break;
+
+ default:
+ assert( 0 );
+ }
+ break;
+
+ case TGSI_EXTSWIZZLE_ZERO:
+ emit_tempf(
+ func,
+ xmm,
+ TGSI_EXEC_TEMP_00000000_I,
+ TGSI_EXEC_TEMP_00000000_C );
+ break;
+
+ case TGSI_EXTSWIZZLE_ONE:
+ emit_tempf(
+ func,
+ xmm,
+ TGSI_EXEC_TEMP_ONE_I,
+ TGSI_EXEC_TEMP_ONE_C );
+ break;
+
+ default:
+ assert( 0 );
+ }
+
+ switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
+ case TGSI_UTIL_SIGN_CLEAR:
+ emit_abs( func, xmm );
+ break;
+
+ case TGSI_UTIL_SIGN_SET:
+ emit_setsign( func, xmm );
+ break;
+
+ case TGSI_UTIL_SIGN_TOGGLE:
+ emit_neg( func, xmm );
+ break;
+
+ case TGSI_UTIL_SIGN_KEEP:
+ break;
+ }
}
static void
-emit_store (struct x86_function *func,
- unsigned xmm,
- const struct tgsi_full_dst_register *reg,
- const struct tgsi_full_instruction *inst,
- unsigned chan_index)
+emit_store(
+ struct x86_function *func,
+ unsigned xmm,
+ const struct tgsi_full_dst_register *reg,
+ const struct tgsi_full_instruction *inst,
+ unsigned chan_index )
{
- switch (reg->DstRegister.File)
- {
- case TGSI_FILE_OUTPUT:
- emit_output (func, xmm, reg->DstRegister.Index, chan_index);
- break;
+ switch( reg->DstRegister.File ) {
+ case TGSI_FILE_OUTPUT:
+ emit_output(
+ func,
+ xmm,
+ reg->DstRegister.Index,
+ chan_index );
+ break;
- case TGSI_FILE_TEMPORARY:
- emit_temps (func, xmm, reg->DstRegister.Index, chan_index);
- break;
+ case TGSI_FILE_TEMPORARY:
+ emit_temps(
+ func,
+ xmm,
+ reg->DstRegister.Index,
+ chan_index );
+ break;
case TGSI_FILE_ADDRESS:
- emit_addrs( func, xmm, reg->DstRegister.Index, chan_index );
+ emit_addrs(
+ func,
+ xmm,
+ reg->DstRegister.Index,
+ chan_index );
break;
- default:
- assert (0);
- }
+ default:
+ assert( 0 );
+ }
- switch (inst->Instruction.Saturate)
- {
- case TGSI_SAT_NONE:
- break;
+ switch( inst->Instruction.Saturate ) {
+ case TGSI_SAT_NONE:
+ break;
- case TGSI_SAT_ZERO_ONE:
-// assert (0);
- break;
+ case TGSI_SAT_ZERO_ONE:
+// assert( 0 );
+ break;
- case TGSI_SAT_MINUS_PLUS_ONE:
- assert (0);
- break;
- }
+ case TGSI_SAT_MINUS_PLUS_ONE:
+ assert( 0 );
+ break;
+ }
}
-#define FETCH(XMM,INDEX,CHAN)\
- emit_fetch (func, XMM, &inst->FullSrcRegisters[INDEX], CHAN)
-
-#define STORE(XMM,INDEX,CHAN)\
- emit_store (func, XMM, &inst->FullDstRegisters[INDEX], inst, CHAN)
-
static void
-emit_kil (struct x86_function *func,
- const struct tgsi_full_src_register *reg)
+emit_kil(
+ struct x86_function *func,
+ const struct tgsi_full_src_register *reg )
{
- unsigned uniquemask;
- unsigned registers[4];
- unsigned nextregister = 0;
- unsigned firstchan = ~0;
- unsigned chan_index;
-
- /* This mask stores component bits that were already tested. Note that
- * we test if the value is less than zero, so 1.0 and 0.0 need not to be
- * tested. */
- uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
-
- for (chan_index = 0; chan_index < 4; chan_index++)
- {
- unsigned swizzle;
-
- /* unswizzle channel */
- swizzle = tgsi_util_get_full_src_register_extswizzle (reg, chan_index);
-
- /* check if the component has not been already tested */
- if (!(uniquemask & (1 << swizzle)))
- {
- uniquemask |= 1 << swizzle;
-
- /* allocate register */
- registers[chan_index] = nextregister;
- emit_fetch (func, nextregister, reg, chan_index);
- nextregister++;
-
- /* mark the first channel used */
- if (firstchan == ~0)
- firstchan = chan_index;
- }
- }
-
- x86_push (func,
- x86_make_reg (file_REG32, reg_AX));
- x86_push (func,
- x86_make_reg (file_REG32, reg_DX));
-
- for (chan_index = 0; chan_index < 4; chan_index++)
- {
- if (uniquemask & (1 << chan_index))
- {
- sse_cmpps (func,
- make_xmm (registers[chan_index]),
- get_temp (TGSI_EXEC_TEMP_00000000_I,
- TGSI_EXEC_TEMP_00000000_C),
- cc_LessThan);
-
- if (chan_index == firstchan)
- {
- sse_pmovmskb (func,
- x86_make_reg (file_REG32, reg_AX),
- make_xmm (registers[chan_index]));
- }
- else
- {
- sse_pmovmskb (func,
- x86_make_reg (file_REG32, reg_DX),
- make_xmm (registers[chan_index]));
- x86_or (func,
- x86_make_reg (file_REG32, reg_AX),
- x86_make_reg (file_REG32, reg_DX));
- }
- }
- }
-
- x86_or (func,
- get_temp(TGSI_EXEC_TEMP_KILMASK_I,
- TGSI_EXEC_TEMP_KILMASK_C),
- x86_make_reg (file_REG32, reg_AX));
-
- x86_pop (func,
- x86_make_reg (file_REG32, reg_DX));
- x86_pop (func,
- x86_make_reg (file_REG32, reg_AX));
-}
+ unsigned uniquemask;
+ unsigned registers[4];
+ unsigned nextregister = 0;
+ unsigned firstchan = ~0;
+ unsigned chan_index;
+
+ /* This mask stores component bits that were already tested. Note that
+ * we test if the value is less than zero, so 1.0 and 0.0 need not to be
+ * tested. */
+ uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
+
+ FOR_EACH_CHANNEL( chan_index ) {
+ unsigned swizzle;
+
+ /* unswizzle channel */
+ swizzle = tgsi_util_get_full_src_register_extswizzle(
+ reg,
+ chan_index );
+
+ /* check if the component has not been already tested */
+ if( !(uniquemask & (1 << swizzle)) ) {
+ uniquemask |= 1 << swizzle;
+
+ /* allocate register */
+ registers[chan_index] = nextregister;
+ emit_fetch(
+ func,
+ nextregister,
+ reg,
+ chan_index );
+ nextregister++;
+
+ /* mark the first channel used */
+ if( firstchan == ~0 ) {
+ firstchan = chan_index;
+ }
+ }
+ }
-static void
-emit_setcc (struct x86_function *func,
- struct tgsi_full_instruction *inst,
- enum sse_cc cc)
-{
- unsigned chan_index;
+ x86_push(
+ func,
+ x86_make_reg( file_REG32, reg_AX ) );
+ x86_push(
+ func,
+ x86_make_reg( file_REG32, reg_DX ) );
+
+ FOR_EACH_CHANNEL( chan_index ) {
+ if( uniquemask & (1 << chan_index) ) {
+ sse_cmpps(
+ func,
+ make_xmm( registers[chan_index] ),
+ get_temp(
+ TGSI_EXEC_TEMP_00000000_I,
+ TGSI_EXEC_TEMP_00000000_C ),
+ cc_LessThan );
+
+ if( chan_index == firstchan ) {
+ sse_pmovmskb(
+ func,
+ x86_make_reg( file_REG32, reg_AX ),
+ make_xmm( registers[chan_index] ) );
+ }
+ else {
+ sse_pmovmskb(
+ func,
+ x86_make_reg( file_REG32, reg_DX ),
+ make_xmm( registers[chan_index] ) );
+ x86_or(
+ func,
+ x86_make_reg( file_REG32, reg_AX ),
+ x86_make_reg( file_REG32, reg_DX ) );
+ }
+ }
+ }
+
+ x86_or(
+ func,
+ get_temp(
+ TGSI_EXEC_TEMP_KILMASK_I,
+ TGSI_EXEC_TEMP_KILMASK_C ),
+ x86_make_reg( file_REG32, reg_AX ) );
- FOR_EACH_ENABLED_CHANNEL
- {
- emit_fetch (func, 0, &inst->FullSrcRegisters[0], chan_index);
- emit_fetch (func, 1, &inst->FullSrcRegisters[1], chan_index);
+ x86_pop(
+ func,
+ x86_make_reg( file_REG32, reg_DX ) );
+ x86_pop(
+ func,
+ x86_make_reg( file_REG32, reg_AX ) );
+}
- sse_cmpps (func,
- make_xmm (0),
- make_xmm (1),
- cc);
+#define FETCH( FUNC, INST, XMM, INDEX, CHAN )\
+ emit_fetch( FUNC, XMM, &(INST).FullSrcRegisters[INDEX], CHAN )
- sse_andps (func,
- make_xmm (0),
- get_temp (TGSI_EXEC_TEMP_ONE_I,
- TGSI_EXEC_TEMP_ONE_C));
+#define STORE( FUNC, INST, XMM, INDEX, CHAN )\
+ emit_store( FUNC, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN )
- emit_store (func, 0, &inst->FullDstRegisters[0], inst, chan_index);
- }
+static void
+emit_setcc(
+ struct x86_function *func,
+ struct tgsi_full_instruction *inst,
+ enum sse_cc cc )
+{
+ unsigned chan_index;
+
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( func, *inst, 0, 0, chan_index );
+ FETCH( func, *inst, 1, 1, chan_index );
+ sse_cmpps(
+ func,
+ make_xmm( 0 ),
+ make_xmm( 1 ),
+ cc );
+ sse_andps(
+ func,
+ make_xmm( 0 ),
+ get_temp(
+ TGSI_EXEC_TEMP_ONE_I,
+ TGSI_EXEC_TEMP_ONE_C ) );
+ STORE( func, *inst, 0, 0, chan_index );
+ }
}
static void
-emit_cmp (struct x86_function *func,
- struct tgsi_full_instruction *inst)
+emit_cmp(
+ struct x86_function *func,
+ struct tgsi_full_instruction *inst )
{
- unsigned chan_index;
-
- FOR_EACH_ENABLED_CHANNEL
- {
- emit_fetch (func, 0, &inst->FullSrcRegisters[0], chan_index);
- emit_fetch (func, 1, &inst->FullSrcRegisters[1], chan_index);
- emit_fetch (func, 2, &inst->FullSrcRegisters[2], chan_index);
-
- sse_cmpps (func,
- make_xmm (0),
- get_temp (TGSI_EXEC_TEMP_00000000_I,
- TGSI_EXEC_TEMP_00000000_C),
- cc_LessThan);
-
- sse_andps (func,
- make_xmm (1),
- make_xmm (0));
- sse_andnps (func,
- make_xmm (0),
- make_xmm (2));
- sse_orps (func,
- make_xmm (0),
- make_xmm (1));
-
- emit_store (func, 0, &inst->FullDstRegisters[0], inst, chan_index);
- }
+ unsigned chan_index;
+
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( func, *inst, 0, 0, chan_index );
+ FETCH( func, *inst, 1, 1, chan_index );
+ FETCH( func, *inst, 2, 2, chan_index );
+ sse_cmpps(
+ func,
+ make_xmm( 0 ),
+ get_temp(
+ TGSI_EXEC_TEMP_00000000_I,
+ TGSI_EXEC_TEMP_00000000_C ),
+ cc_LessThan );
+ sse_andps(
+ func,
+ make_xmm( 1 ),
+ make_xmm( 0 ) );
+ sse_andnps(
+ func,
+ make_xmm( 0 ),
+ make_xmm( 2 ) );
+ sse_orps(
+ func,
+ make_xmm( 0 ),
+ make_xmm( 1 ) );
+ STORE( func, *inst, 0, 0, chan_index );
+ }
}
static void
-emit_f2it( struct x86_function *func,
- unsigned xmm )
+emit_f2it(
+ struct x86_function *func,
+ unsigned xmm )
{
- sse2_cvttps2dq( func, make_xmm( xmm ), make_xmm( xmm ) );
+ sse2_cvttps2dq(
+ func,
+ make_xmm( xmm ),
+ make_xmm( xmm ) );
}
static void
-emit_instruction (struct x86_function *func,
- struct tgsi_full_instruction *inst)
+emit_instruction(
+ struct x86_function *func,
+ struct tgsi_full_instruction *inst )
{
- unsigned chan_index;
+ unsigned chan_index;
- switch (inst->Instruction.Opcode) {
+ switch( inst->Instruction.Opcode ) {
case TGSI_OPCODE_ARL:
- FOR_EACH_ENABLED_CHANNEL {
- FETCH( 0, 0, chan_index );
- emit_f2it( func, 0 );
- STORE( 0, 0, chan_index );
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( func, *inst, 0, 0, chan_index );
+ emit_f2it( func, 0 );
+ STORE( func, *inst, 0, 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_MOV:
+ /* TGSI_OPCODE_SWZ */
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( func, *inst, 0, 0, chan_index );
+ STORE( func, *inst, 0, 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_LIT:
+ if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
+ IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
+ emit_tempf(
+ func,
+ 0,
+ TGSI_EXEC_TEMP_ONE_I,
+ TGSI_EXEC_TEMP_ONE_C);
+ if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
+ STORE( func, *inst, 0, 0, CHAN_X );
+ }
+ if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
+ STORE( func, *inst, 0, 0, CHAN_W );
+ }
+ }
+ if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
+ IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
+ if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
+ FETCH( func, *inst, 0, 0, CHAN_X );
+ sse_maxps(
+ func,
+ make_xmm( 0 ),
+ get_temp(
+ TGSI_EXEC_TEMP_00000000_I,
+ TGSI_EXEC_TEMP_00000000_C ) );
+ STORE( func, *inst, 0, 0, CHAN_Y );
+ }
+ if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
+ FETCH( func, *inst, 1, 0, CHAN_Y );
+ sse_maxps(
+ func,
+ make_xmm( 1 ),
+ get_temp(
+ TGSI_EXEC_TEMP_00000000_I,
+ TGSI_EXEC_TEMP_00000000_C ) );
+ FETCH( func, *inst, 2, 0, CHAN_W );
+ sse_minps(
+ func,
+ make_xmm( 2 ),
+ get_temp(
+ TGSI_EXEC_TEMP_128_I,
+ TGSI_EXEC_TEMP_128_C ) );
+ sse_maxps(
+ func,
+ make_xmm( 2 ),
+ get_temp(
+ TGSI_EXEC_TEMP_MINUS_128_I,
+ TGSI_EXEC_TEMP_MINUS_128_C ) );
+ emit_pow( func, 1, 2 );
+ FETCH( func, *inst, 0, 0, CHAN_X );
+ sse_xorps(
+ func,
+ make_xmm( 2 ),
+ make_xmm( 2 ) );
+ sse_cmpps(
+ func,
+ make_xmm( 2 ),
+ make_xmm( 0 ),
+ cc_LessThanEqual );
+ sse_andps(
+ func,
+ make_xmm( 2 ),
+ make_xmm( 1 ) );
+ STORE( func, *inst, 2, 0, CHAN_Z );
+ }
+ }
+ break;
+
+ case TGSI_OPCODE_RCP:
+ /* TGSI_OPCODE_RECIP */
+ FETCH( func, *inst, 0, 0, CHAN_X );
+ emit_rcp( func, 0, 0 );
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( func, *inst, 0, 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_RSQ:
+ /* TGSI_OPCODE_RECIPSQRT */
+ FETCH( func, *inst, 0, 0, CHAN_X );
+ emit_rsqrt( func, 0, 0 );
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( func, *inst, 0, 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_EXP:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_LOG:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_MUL:
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( func, *inst, 0, 0, chan_index );
+ FETCH( func, *inst, 1, 1, chan_index );
+ emit_mul( func, 0, 1 );
+ STORE( func, *inst, 0, 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_ADD:
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( func, *inst, 0, 0, chan_index );
+ FETCH( func, *inst, 1, 1, chan_index );
+ emit_add( func, 0, 1 );
+ STORE( func, *inst, 0, 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_DP3:
+ /* TGSI_OPCODE_DOT3 */
+ FETCH( func, *inst, 0, 0, CHAN_X );
+ FETCH( func, *inst, 1, 1, CHAN_X );
+ emit_mul( func, 0, 1 );
+ FETCH( func, *inst, 1, 0, CHAN_Y );
+ FETCH( func, *inst, 2, 1, CHAN_Y );
+ emit_mul( func, 1, 2 );
+ emit_add( func, 0, 1 );
+ FETCH( func, *inst, 1, 0, CHAN_Z );
+ FETCH( func, *inst, 2, 1, CHAN_Z );
+ emit_mul( func, 1, 2 );
+ emit_add( func, 0, 1 );
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( func, *inst, 0, 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_DP4:
+ /* TGSI_OPCODE_DOT4 */
+ FETCH( func, *inst, 0, 0, CHAN_X );
+ FETCH( func, *inst, 1, 1, CHAN_X );
+ emit_mul( func, 0, 1 );
+ FETCH( func, *inst, 1, 0, CHAN_Y );
+ FETCH( func, *inst, 2, 1, CHAN_Y );
+ emit_mul( func, 1, 2 );
+ emit_add( func, 0, 1 );
+ FETCH( func, *inst, 1, 0, CHAN_Z );
+ FETCH( func, *inst, 2, 1, CHAN_Z );
+ emit_mul(func, 1, 2 );
+ emit_add(func, 0, 1 );
+ FETCH( func, *inst, 1, 0, CHAN_W );
+ FETCH( func, *inst, 2, 1, CHAN_W );
+ emit_mul( func, 1, 2 );
+ emit_add( func, 0, 1 );
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( func, *inst, 0, 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_DST:
+ IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
+ emit_tempf(
+ func,
+ 0,
+ TGSI_EXEC_TEMP_ONE_I,
+ TGSI_EXEC_TEMP_ONE_C );
+ STORE( func, *inst, 0, 0, CHAN_X );
+ }
+ IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
+ FETCH( func, *inst, 0, 0, CHAN_Y );
+ FETCH( func, *inst, 1, 1, CHAN_Y );
+ emit_mul( func, 0, 1 );
+ STORE( func, *inst, 0, 0, CHAN_Y );
}
+ IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
+ FETCH( func, *inst, 0, 0, CHAN_Z );
+ STORE( func, *inst, 0, 0, CHAN_Z );
+ }
+ IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
+ FETCH( func, *inst, 0, 1, CHAN_W );
+ STORE( func, *inst, 0, 0, CHAN_W );
+ }
+ break;
+
+ case TGSI_OPCODE_MIN:
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( func, *inst, 0, 0, chan_index );
+ FETCH( func, *inst, 1, 1, chan_index );
+ sse_minps(
+ func,
+ make_xmm( 0 ),
+ make_xmm( 1 ) );
+ STORE( func, *inst, 0, 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_MAX:
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( func, *inst, 0, 0, chan_index );
+ FETCH( func, *inst, 1, 1, chan_index );
+ sse_maxps(
+ func,
+ make_xmm( 0 ),
+ make_xmm( 1 ) );
+ STORE( func, *inst, 0, 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_SLT:
+ /* TGSI_OPCODE_SETLT */
+ emit_setcc( func, inst, cc_LessThan );
+ break;
+
+ case TGSI_OPCODE_SGE:
+ /* TGSI_OPCODE_SETGE */
+ emit_setcc( func, inst, cc_NotLessThan );
+ break;
+
+ case TGSI_OPCODE_MAD:
+ /* TGSI_OPCODE_MADD */
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( func, *inst, 0, 0, chan_index );
+ FETCH( func, *inst, 1, 1, chan_index );
+ FETCH( func, *inst, 2, 2, chan_index );
+ emit_mul( func, 0, 1 );
+ emit_add( func, 0, 2 );
+ STORE( func, *inst, 0, 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_SUB:
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( func, *inst, 0, 0, chan_index );
+ FETCH( func, *inst, 1, 1, chan_index );
+ emit_sub( func, 0, 1 );
+ STORE( func, *inst, 0, 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_LERP:
+ /* TGSI_OPCODE_LRP */
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( func, *inst, 0, 0, chan_index );
+ FETCH( func, *inst, 1, 1, chan_index );
+ FETCH( func, *inst, 2, 2, chan_index );
+ emit_sub( func, 1, 2 );
+ emit_mul( func, 0, 1 );
+ emit_add( func, 0, 2 );
+ STORE( func, *inst, 0, 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_CND:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_CND0:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_DOT2ADD:
+ /* TGSI_OPCODE_DP2A */
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_INDEX:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_NEGATE:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_FRAC:
+ /* TGSI_OPCODE_FRC */
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( func, *inst, 0, 0, chan_index );
+ emit_frc( func, 0 );
+ STORE( func, *inst, 0, 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_CLAMP:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_FLOOR:
+ /* TGSI_OPCODE_FLR */
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( func, *inst, 0, 0, chan_index );
+ emit_flr( func, 0 );
+ STORE( func, *inst, 0, 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_ROUND:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_EXPBASE2:
+ /* TGSI_OPCODE_EX2 */
+ FETCH( func, *inst, 0, 0, CHAN_X );
+ emit_ex2( func, 0 );
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( func, *inst, 0, 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_LOGBASE2:
+ /* TGSI_OPCODE_LG2 */
+ FETCH( func, *inst, 0, 0, CHAN_X );
+ emit_lg2( func, 0 );
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( func, *inst, 0, 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_POWER:
+ /* TGSI_OPCODE_POW */
+ FETCH( func, *inst, 0, 0, CHAN_X );
+ FETCH( func, *inst, 1, 1, CHAN_X );
+ emit_pow( func, 0, 1 );
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( func, *inst, 0, 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_CROSSPRODUCT:
+ /* TGSI_OPCODE_XPD */
+ if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
+ IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
+ FETCH( func, *inst, 1, 1, CHAN_Z );
+ FETCH( func, *inst, 3, 0, CHAN_Z );
+ }
+ if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
+ IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
+ FETCH( func, *inst, 0, 0, CHAN_Y );
+ FETCH( func, *inst, 4, 1, CHAN_Y );
+ }
+ IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
+ emit_mov( func, 2, 0 );
+ emit_mul( func, 2, 1 );
+ emit_mov( func, 5, 3 );
+ emit_mul( func, 5, 4 );
+ emit_sub( func, 2, 5 );
+ STORE( func, *inst, 2, 0, CHAN_X );
+ }
+ if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
+ IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
+ FETCH( func, *inst, 2, 1, CHAN_X );
+ FETCH( func, *inst, 5, 0, CHAN_X );
+ }
+ IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
+ emit_mul( func, 3, 2 );
+ emit_mul( func, 1, 5 );
+ emit_sub( func, 3, 1 );
+ STORE( func, *inst, 3, 0, CHAN_Y );
+ }
+ IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
+ emit_mul( func, 5, 4 );
+ emit_mul( func, 0, 2 );
+ emit_sub( func, 5, 0 );
+ STORE( func, *inst, 5, 0, CHAN_Z );
+ }
+ IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
+ FETCH( func, *inst, 0, TGSI_EXEC_TEMP_ONE_I, TGSI_EXEC_TEMP_ONE_C );
+ STORE( func, *inst, 0, 0, CHAN_W );
+ }
+ break;
+
+ case TGSI_OPCODE_MULTIPLYMATRIX:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_ABS:
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( func, *inst, 0, 0, chan_index );
+ emit_abs( func, 0) ;
+
+ STORE( func, *inst, 0, 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_RCC:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_DPH:
+ FETCH( func, *inst, 0, 0, CHAN_X );
+ FETCH( func, *inst, 1, 1, CHAN_X );
+ emit_mul( func, 0, 1 );
+ FETCH( func, *inst, 1, 0, CHAN_Y );
+ FETCH( func, *inst, 2, 1, CHAN_Y );
+ emit_mul( func, 1, 2 );
+ emit_add( func, 0, 1 );
+ FETCH( func, *inst, 1, 0, CHAN_Z );
+ FETCH( func, *inst, 2, 1, CHAN_Z );
+ emit_mul( func, 1, 2 );
+ emit_add( func, 0, 1 );
+ FETCH( func, *inst, 1, 1, CHAN_W );
+ emit_add( func, 0, 1 );
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( func, *inst, 0, 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_COS:
+ FETCH( func, *inst, 0, 0, CHAN_X );
+ emit_cos( func, 0 );
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( func, *inst, 0, 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_DDX:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_DDY:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_KIL:
+ emit_kil( func, &inst->FullSrcRegisters[0] );
+ break;
+
+ case TGSI_OPCODE_PK2H:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_PK2US:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_PK4B:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_PK4UB:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_RFL:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_SEQ:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_SFL:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_SGT:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_SIN:
+ FETCH( func, *inst, 0, 0, CHAN_X );
+ emit_sin( func, 0 );
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( func, *inst, 0, 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_SLE:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_SNE:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_STR:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_TEX:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_TXD:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_UP2H:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_UP2US:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_UP4B:
+ assert( 0 );
break;
- case TGSI_OPCODE_MOV:
- /* TGSI_OPCODE_SWZ */
- FOR_EACH_ENABLED_CHANNEL
- {
- FETCH(0, 0, chan_index);
- STORE(0, 0, chan_index);
- }
- break;
-
- case TGSI_OPCODE_LIT:
- if (IS_CHANNEL_ENABLED(CHAN_X) || IS_CHANNEL_ENABLED(CHAN_W))
- {
- emit_tempf (func, 0, TGSI_EXEC_TEMP_ONE_I, TGSI_EXEC_TEMP_ONE_C);
- if (IS_CHANNEL_ENABLED(CHAN_X))
- STORE(0, 0, CHAN_X);
- if (IS_CHANNEL_ENABLED(CHAN_W))
- STORE(0, 0, CHAN_W);
- }
- if (IS_CHANNEL_ENABLED(CHAN_Y) || IS_CHANNEL_ENABLED(CHAN_Z))
- {
- if (IS_CHANNEL_ENABLED(CHAN_Y))
- {
- FETCH(0, 0, CHAN_X);
- sse_maxps (func,
- make_xmm (0),
- get_temp (TGSI_EXEC_TEMP_00000000_I,
- TGSI_EXEC_TEMP_00000000_C));
- STORE(0, 0, CHAN_Y);
- }
- if (IS_CHANNEL_ENABLED(CHAN_Z))
- {
- FETCH(1, 0, CHAN_Y);
- sse_maxps (func,
- make_xmm (1),
- get_temp (TGSI_EXEC_TEMP_00000000_I,
- TGSI_EXEC_TEMP_00000000_C));
-
- FETCH(2, 0, CHAN_W);
- sse_minps (func,
- make_xmm (2),
- get_temp (TGSI_EXEC_TEMP_128_I,
- TGSI_EXEC_TEMP_128_C));
- sse_maxps (func,
- make_xmm (2),
- get_temp (TGSI_EXEC_TEMP_MINUS_128_I,
- TGSI_EXEC_TEMP_MINUS_128_C));
-
- emit_pow (func, 1, 2);
-
- FETCH(0, 0, CHAN_X);
- sse_xorps (func, make_xmm (2), make_xmm (2));
- sse_cmpps (func,
- make_xmm (2),
- make_xmm (0),
- cc_LessThanEqual);
- sse_andps (func,
- make_xmm (2),
- make_xmm (1));
-
- emit_store (func, 2, &inst->FullDstRegisters[0], inst, CHAN_Z);
- }
- }
- break;
-
- case TGSI_OPCODE_RCP:
- /* TGSI_OPCODE_RECIP */
- FETCH(0, 0, CHAN_X);
- emit_rcp (func, 0, 0);
-
- FOR_EACH_ENABLED_CHANNEL
- {
- STORE(0, 0, chan_index);
- }
- break;
-
- case TGSI_OPCODE_RSQ:
- /* TGSI_OPCODE_RECIPSQRT */
- FETCH(0, 0, CHAN_X);
- emit_rsqrt (func, 0, 0);
-
- FOR_EACH_ENABLED_CHANNEL
- {
- STORE(0, 0, chan_index);
- }
- break;
-
- case TGSI_OPCODE_EXP:
- assert (0);
- break;
-
- case TGSI_OPCODE_LOG:
- assert (0);
- break;
-
- case TGSI_OPCODE_MUL:
- FOR_EACH_ENABLED_CHANNEL
- {
- FETCH(0, 0, chan_index);
- FETCH(1, 1, chan_index);
- emit_mul (func, 0, 1);
-
- STORE(0, 0, chan_index);
- }
- break;
-
- case TGSI_OPCODE_ADD:
- FOR_EACH_ENABLED_CHANNEL
- {
- FETCH(0, 0, chan_index);
- FETCH(1, 1, chan_index);
- emit_add (func, 0, 1);
-
- STORE(0, 0, chan_index);
- }
- break;
-
- case TGSI_OPCODE_DP3:
- /* TGSI_OPCODE_DOT3 */
- FETCH(0, 0, CHAN_X);
- FETCH(1, 1, CHAN_X);
- emit_mul (func, 0, 1);
-
- FETCH(1, 0, CHAN_Y);
- FETCH(2, 1, CHAN_Y);
- emit_mul (func, 1, 2);
- emit_add (func, 0, 1);
-
- FETCH(1, 0, CHAN_Z);
- FETCH(2, 1, CHAN_Z);
- emit_mul (func, 1, 2);
- emit_add (func, 0, 1);
-
- FOR_EACH_ENABLED_CHANNEL
- {
- STORE(0, 0, chan_index);
- }
- break;
-
- case TGSI_OPCODE_DP4:
- /* TGSI_OPCODE_DOT4 */
- FETCH(0, 0, CHAN_X);
- FETCH(1, 1, CHAN_X);
- emit_mul (func, 0, 1);
-
- FETCH(1, 0, CHAN_Y);
- FETCH(2, 1, CHAN_Y);
- emit_mul (func, 1, 2);
- emit_add (func, 0, 1);
-
- FETCH(1, 0, CHAN_Z);
- FETCH(2, 1, CHAN_Z);
- emit_mul (func, 1, 2);
- emit_add (func, 0, 1);
-
- FETCH(1, 0, CHAN_W);
- FETCH(2, 1, CHAN_W);
- emit_mul (func, 1, 2);
- emit_add (func, 0, 1);
-
- FOR_EACH_ENABLED_CHANNEL
- {
- STORE(0, 0, chan_index);
- }
- break;
-
- case TGSI_OPCODE_DST:
- IF_IS_CHANNEL_ENABLED(CHAN_X)
- {
- emit_tempf (func,
- 0,
- TGSI_EXEC_TEMP_ONE_I,
- TGSI_EXEC_TEMP_ONE_C);
- STORE(0, 0, CHAN_X);
- }
- IF_IS_CHANNEL_ENABLED(CHAN_Y)
- {
- FETCH(0, 0, CHAN_Y);
- FETCH(1, 1, CHAN_Y);
- emit_mul (func, 0, 1);
- STORE(0, 0, CHAN_Y);
- }
- IF_IS_CHANNEL_ENABLED(CHAN_Z)
- {
- FETCH(0, 0, CHAN_Z);
- STORE(0, 0, CHAN_Z);
- }
- IF_IS_CHANNEL_ENABLED(CHAN_W)
- {
- FETCH(0, 1, CHAN_W);
- STORE(0, 0, CHAN_W);
- }
- break;
-
- case TGSI_OPCODE_MIN:
- FOR_EACH_ENABLED_CHANNEL
- {
- FETCH(0, 0, chan_index);
- FETCH(1, 1, chan_index);
- sse_minps (func,
- make_xmm (0),
- make_xmm (1));
- STORE(0, 0, chan_index);
- }
- break;
-
- case TGSI_OPCODE_MAX:
- FOR_EACH_ENABLED_CHANNEL
- {
- FETCH(0, 0, chan_index);
- FETCH(1, 1, chan_index);
- sse_maxps (func,
- make_xmm (0),
- make_xmm (1));
- STORE(0, 0, chan_index);
- }
- break;
-
- case TGSI_OPCODE_SLT:
- /* TGSI_OPCODE_SETLT */
- emit_setcc (func, inst, cc_LessThan);
- break;
-
- case TGSI_OPCODE_SGE:
- /* TGSI_OPCODE_SETGE */
- emit_setcc (func, inst, cc_NotLessThan);
- break;
-
- case TGSI_OPCODE_MAD:
- /* TGSI_OPCODE_MADD */
- FOR_EACH_ENABLED_CHANNEL
- {
- FETCH(0, 0, chan_index);
- FETCH(1, 1, chan_index);
- FETCH(2, 2, chan_index);
- emit_mul (func, 0, 1);
- emit_add (func, 0, 2);
-
- STORE(0, 0, chan_index);
- }
- break;
-
- case TGSI_OPCODE_SUB:
- FOR_EACH_ENABLED_CHANNEL
- {
- FETCH(0, 0, chan_index);
- FETCH(1, 1, chan_index);
- emit_sub (func, 0, 1);
-
- STORE(0, 0, chan_index);
- }
- break;
-
- case TGSI_OPCODE_LERP:
- /* TGSI_OPCODE_LRP */
- FOR_EACH_ENABLED_CHANNEL
- {
- FETCH(0, 0, chan_index);
- FETCH(1, 1, chan_index);
- FETCH(2, 2, chan_index);
- emit_sub (func, 1, 2);
- emit_mul (func, 0, 1);
- emit_add (func, 0, 2);
-
- STORE(0, 0, chan_index);
- }
- break;
-
- case TGSI_OPCODE_CND:
- assert (0);
- break;
-
- case TGSI_OPCODE_CND0:
- assert (0);
- break;
-
- case TGSI_OPCODE_DOT2ADD:
- /* TGSI_OPCODE_DP2A */
- assert (0);
- break;
-
- case TGSI_OPCODE_INDEX:
- assert (0);
- break;
-
- case TGSI_OPCODE_NEGATE:
- assert (0);
- break;
-
- case TGSI_OPCODE_FRAC:
- /* TGSI_OPCODE_FRC */
- FOR_EACH_ENABLED_CHANNEL
- {
- FETCH(0, 0, chan_index);
- emit_frc (func, 0);
- STORE(0, 0, chan_index);
- }
- break;
-
- case TGSI_OPCODE_CLAMP:
- assert (0);
- break;
-
- case TGSI_OPCODE_FLOOR:
- /* TGSI_OPCODE_FLR */
- FOR_EACH_ENABLED_CHANNEL
- {
- FETCH(0, 0, chan_index);
- emit_flr (func, 0);
- STORE(0, 0, chan_index);
- }
- break;
-
- case TGSI_OPCODE_ROUND:
- assert (0);
- break;
-
- case TGSI_OPCODE_EXPBASE2:
- /* TGSI_OPCODE_EX2 */
- FETCH(0, 0, CHAN_X);
- emit_ex2 (func, 0);
-
- FOR_EACH_ENABLED_CHANNEL
- {
- STORE(0, 0, chan_index);
- }
- break;
-
- case TGSI_OPCODE_LOGBASE2:
- /* TGSI_OPCODE_LG2 */
- FETCH(0, 0, CHAN_X);
- emit_lg2 (func, 0);
-
- FOR_EACH_ENABLED_CHANNEL
- {
- STORE(0, 0, chan_index);
- }
- break;
-
- case TGSI_OPCODE_POWER:
- /* TGSI_OPCODE_POW */
- FETCH(0, 0, CHAN_X);
- FETCH(1, 1, CHAN_X);
- emit_pow (func, 0, 1);
-
- FOR_EACH_ENABLED_CHANNEL
- {
- STORE(0, 0, chan_index);
- }
- break;
-
- case TGSI_OPCODE_CROSSPRODUCT:
- /* TGSI_OPCODE_XPD */
- if (IS_CHANNEL_ENABLED(CHAN_X) || IS_CHANNEL_ENABLED(CHAN_Y))
- {
- FETCH(1, 1, CHAN_Z);
- FETCH(3, 0, CHAN_Z);
- }
- if (IS_CHANNEL_ENABLED(CHAN_X) || IS_CHANNEL_ENABLED(CHAN_Z))
- {
- FETCH(0, 0, CHAN_Y);
- FETCH(4, 1, CHAN_Y);
- }
- IF_IS_CHANNEL_ENABLED(CHAN_X)
- {
- emit_mov (func, 2, 0);
- emit_mul (func, 2, 1);
- emit_mov (func, 5, 3);
- emit_mul (func, 5, 4);
- emit_sub (func, 2, 5);
- STORE(2, 0, CHAN_X);
- }
-
- if (IS_CHANNEL_ENABLED(CHAN_Y) || IS_CHANNEL_ENABLED(CHAN_Z))
- {
- FETCH(2, 1, CHAN_X);
- FETCH(5, 0, CHAN_X);
- }
- IF_IS_CHANNEL_ENABLED(CHAN_Y)
- {
- emit_mul (func, 3, 2);
- emit_mul (func, 1, 5);
- emit_sub (func, 3, 1);
- STORE(3, 0, CHAN_Y);
- }
-
- IF_IS_CHANNEL_ENABLED(CHAN_Z)
- {
- emit_mul (func, 5, 4);
- emit_mul (func, 0, 2);
- emit_sub (func, 5, 0);
- STORE(5, 0, CHAN_Z);
- }
-
- IF_IS_CHANNEL_ENABLED(CHAN_W)
- {
- FETCH(0, TGSI_EXEC_TEMP_ONE_I, TGSI_EXEC_TEMP_ONE_C);
- STORE(0, 0, CHAN_W);
- }
- break;
-
- case TGSI_OPCODE_MULTIPLYMATRIX:
- assert (0);
- break;
-
- case TGSI_OPCODE_ABS:
- FOR_EACH_ENABLED_CHANNEL
- {
- FETCH(0, 0, chan_index);
- emit_abs (func, 0);
-
- STORE(0, 0, chan_index);
- }
- break;
-
- case TGSI_OPCODE_RCC:
- assert (0);
- break;
-
- case TGSI_OPCODE_DPH:
- FETCH(0, 0, CHAN_X);
- FETCH(1, 1, CHAN_X);
- emit_mul (func, 0, 1);
-
- FETCH(1, 0, CHAN_Y);
- FETCH(2, 1, CHAN_Y);
- emit_mul (func, 1, 2);
- emit_add (func, 0, 1);
-
- FETCH(1, 0, CHAN_Z);
- FETCH(2, 1, CHAN_Z);
- emit_mul (func, 1, 2);
- emit_add (func, 0, 1);
-
- FETCH(1, 1, CHAN_W);
- emit_add (func, 0, 1);
-
- FOR_EACH_ENABLED_CHANNEL
- {
- STORE(0, 0, chan_index);
- }
- break;
-
- case TGSI_OPCODE_COS:
- FETCH(0, 0, CHAN_X);
- emit_cos (func, 0);
-
- FOR_EACH_ENABLED_CHANNEL
- {
- STORE(0, 0, chan_index);
- }
- break;
-
- case TGSI_OPCODE_DDX:
- assert (0);
- break;
-
- case TGSI_OPCODE_DDY:
- assert (0);
- break;
-
- case TGSI_OPCODE_KIL:
- emit_kil (func, &inst->FullSrcRegisters[0]);
- break;
-
- case TGSI_OPCODE_PK2H:
- assert (0);
- break;
-
- case TGSI_OPCODE_PK2US:
- assert (0);
- break;
-
- case TGSI_OPCODE_PK4B:
- assert (0);
- break;
-
- case TGSI_OPCODE_PK4UB:
- assert (0);
- break;
-
- case TGSI_OPCODE_RFL:
- assert (0);
- break;
-
- case TGSI_OPCODE_SEQ:
- assert (0);
- break;
-
- case TGSI_OPCODE_SFL:
- assert (0);
- break;
-
- case TGSI_OPCODE_SGT:
- assert (0);
- break;
-
- case TGSI_OPCODE_SIN:
- FETCH(0, 0, CHAN_X);
- emit_sin (func, 0);
-
- FOR_EACH_ENABLED_CHANNEL
- {
- STORE(0, 0, chan_index);
- }
- break;
-
- case TGSI_OPCODE_SLE:
- assert (0);
- break;
-
- case TGSI_OPCODE_SNE:
- assert (0);
- break;
-
- case TGSI_OPCODE_STR:
- assert (0);
- break;
-
- case TGSI_OPCODE_TEX:
- assert (0);
- break;
-
- case TGSI_OPCODE_TXD:
- assert (0);
- break;
-
- case TGSI_OPCODE_UP2H:
- assert (0);
- break;
-
- case TGSI_OPCODE_UP2US:
- assert (0);
- break;
-
- case TGSI_OPCODE_UP4B:
- assert (0);
- break;
-
- case TGSI_OPCODE_UP4UB:
- assert (0);
- break;
-
- case TGSI_OPCODE_X2D:
- assert (0);
- break;
-
- case TGSI_OPCODE_ARA:
- assert (0);
- break;
-
- case TGSI_OPCODE_ARR:
- assert (0);
- break;
-
- case TGSI_OPCODE_BRA:
- assert (0);
- break;
-
- case TGSI_OPCODE_CAL:
- assert (0);
- break;
-
- case TGSI_OPCODE_RET:
- assert (0);
- break;
-
- case TGSI_OPCODE_SSG:
- assert (0);
- break;
-
- case TGSI_OPCODE_CMP:
- emit_cmp (func, inst);
- break;
-
- case TGSI_OPCODE_SCS:
- IF_IS_CHANNEL_ENABLED(CHAN_X)
- {
- FETCH(0, 0, CHAN_X);
- emit_cos (func, 0);
- STORE(0, 0, CHAN_X);
- }
-
- IF_IS_CHANNEL_ENABLED(CHAN_Y)
- {
- FETCH(0, 0, CHAN_Y);
- emit_sin (func, 0);
- STORE(0, 0, CHAN_Y);
- }
-
- IF_IS_CHANNEL_ENABLED(CHAN_Z)
- {
- FETCH(0, TGSI_EXEC_TEMP_00000000_I, TGSI_EXEC_TEMP_00000000_C);
- STORE(0, 0, CHAN_Z);
- }
-
- IF_IS_CHANNEL_ENABLED(CHAN_W)
- {
- FETCH(0, TGSI_EXEC_TEMP_ONE_I, TGSI_EXEC_TEMP_ONE_C);
- STORE(0, 0, CHAN_W);
- }
- break;
-
- case TGSI_OPCODE_TXB:
- assert (0);
- break;
-
- case TGSI_OPCODE_NRM:
- assert (0);
- break;
-
- case TGSI_OPCODE_DIV:
- assert (0);
- break;
-
- case TGSI_OPCODE_DP2:
- assert (0);
- break;
-
- case TGSI_OPCODE_TXL:
- assert (0);
- break;
-
- case TGSI_OPCODE_BRK:
- assert (0);
- break;
-
- case TGSI_OPCODE_IF:
- assert (0);
- break;
-
- case TGSI_OPCODE_LOOP:
- assert (0);
- break;
-
- case TGSI_OPCODE_REP:
- assert (0);
- break;
-
- case TGSI_OPCODE_ELSE:
- assert (0);
- break;
-
- case TGSI_OPCODE_ENDIF:
- assert (0);
- break;
-
- case TGSI_OPCODE_ENDLOOP:
- assert (0);
- break;
-
- case TGSI_OPCODE_ENDREP:
- assert (0);
- break;
-
- case TGSI_OPCODE_PUSHA:
- assert (0);
- break;
-
- case TGSI_OPCODE_POPA:
- assert (0);
- break;
-
- case TGSI_OPCODE_CEIL:
- assert (0);
- break;
-
- case TGSI_OPCODE_I2F:
- assert (0);
- break;
-
- case TGSI_OPCODE_NOT:
- assert (0);
- break;
-
- case TGSI_OPCODE_TRUNC:
- assert (0);
- break;
-
- case TGSI_OPCODE_SHL:
- assert (0);
- break;
-
- case TGSI_OPCODE_SHR:
- assert (0);
- break;
-
- case TGSI_OPCODE_AND:
- assert (0);
- break;
-
- case TGSI_OPCODE_OR:
- assert (0);
- break;
-
- case TGSI_OPCODE_MOD:
- assert (0);
- break;
-
- case TGSI_OPCODE_XOR:
- assert (0);
- break;
+ case TGSI_OPCODE_UP4UB:
+ assert( 0 );
+ break;
- case TGSI_OPCODE_SAD:
- assert (0);
- break;
+ case TGSI_OPCODE_X2D:
+ assert( 0 );
+ break;
- case TGSI_OPCODE_TXF:
- assert (0);
- break;
+ case TGSI_OPCODE_ARA:
+ assert( 0 );
+ break;
- case TGSI_OPCODE_TXQ:
- assert (0);
- break;
+ case TGSI_OPCODE_ARR:
+ assert( 0 );
+ break;
- case TGSI_OPCODE_CONT:
- assert (0);
- break;
+ case TGSI_OPCODE_BRA:
+ assert( 0 );
+ break;
- case TGSI_OPCODE_EMIT:
- assert (0);
- break;
+ case TGSI_OPCODE_CAL:
+ assert( 0 );
+ break;
- case TGSI_OPCODE_ENDPRIM:
- assert (0);
- break;
+ case TGSI_OPCODE_RET:
+ assert( 0 );
+ break;
- default:
- assert (0);
- }
+ case TGSI_OPCODE_SSG:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_CMP:
+ emit_cmp (func, inst);
+ break;
+
+ case TGSI_OPCODE_SCS:
+ IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
+ FETCH( func, *inst, 0, 0, CHAN_X );
+ emit_cos( func, 0 );
+ STORE( func, *inst, 0, 0, CHAN_X );
+ }
+ IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
+ FETCH( func, *inst, 0, 0, CHAN_Y );
+ emit_sin( func, 0 );
+ STORE( func, *inst, 0, 0, CHAN_Y );
+ }
+ IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
+ FETCH( func, *inst, 0, TGSI_EXEC_TEMP_00000000_I, TGSI_EXEC_TEMP_00000000_C );
+ STORE( func, *inst, 0, 0, CHAN_Z );
+ }
+ IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
+ FETCH( func, *inst, 0, TGSI_EXEC_TEMP_ONE_I, TGSI_EXEC_TEMP_ONE_C );
+ STORE( func, *inst, 0, 0, CHAN_W );
+ }
+ break;
+
+ case TGSI_OPCODE_TXB:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_NRM:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_DIV:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_DP2:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_TXL:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_BRK:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_IF:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_LOOP:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_REP:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_ELSE:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_ENDIF:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_ENDLOOP:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_ENDREP:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_PUSHA:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_POPA:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_CEIL:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_I2F:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_NOT:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_TRUNC:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_SHL:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_SHR:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_AND:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_OR:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_MOD:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_XOR:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_SAD:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_TXF:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_TXQ:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_CONT:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_EMIT:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_ENDPRIM:
+ assert( 0 );
+ break;
+
+ default:
+ assert( 0 );
+ }
}
-GLboolean
-tgsi_emit_sse (struct tgsi_token *tokens,
- struct x86_function *function)
+unsigned
+tgsi_emit_sse(
+ struct tgsi_token *tokens,
+ struct x86_function *func )
{
- struct tgsi_parse_context parse;
+ struct tgsi_parse_context parse;
- x86_init_func (function);
+ x86_init_func( func );
- x86_mov (function, get_input_base (), get_argument (0));
- x86_mov (function, get_output_base (), get_argument (1));
- x86_mov (function, get_const_base (), get_argument (2));
- x86_mov (function, get_temp_base (), get_argument (3));
+ x86_mov(
+ func,
+ get_input_base(),
+ get_argument( 0 ) );
+ x86_mov(
+ func,
+ get_output_base(),
+ get_argument( 1 ) );
+ x86_mov(
+ func,
+ get_const_base(),
+ get_argument( 2 ) );
+ x86_mov(
+ func,
+ get_temp_base(),
+ get_argument( 3 ) );
- tgsi_parse_init (&parse, tokens);
+ tgsi_parse_init( &parse, tokens );
- while (!tgsi_parse_end_of_tokens (&parse))
- {
- tgsi_parse_token (&parse);
+ while( !tgsi_parse_end_of_tokens( &parse ) ) {
+ tgsi_parse_token( &parse );
- switch (parse.FullToken.Token.Type)
- {
- case TGSI_TOKEN_TYPE_DECLARATION:
- break;
+ switch( parse.FullToken.Token.Type ) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ break;
- case TGSI_TOKEN_TYPE_INSTRUCTION:
- emit_instruction (function, &parse.FullToken.FullInstruction);
- break;
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ emit_instruction(
+ func,
+ &parse.FullToken.FullInstruction );
+ break;
- default:
- assert (0);
- }
- }
+ default:
+ assert( 0 );
+ }
+ }
- tgsi_parse_free (&parse);
+ tgsi_parse_free( &parse );
#ifdef WIN32
- x86_retw (function, 16);
+ x86_retw( func, 16 );
#else
- x86_ret (function);
+ x86_ret( func );
#endif
- return GL_FALSE;
+ return 1;
}
#endif
diff --git a/src/mesa/x86/rtasm/x86sse.c b/src/mesa/x86/rtasm/x86sse.c
index 3ea37bb5e7..f5b0ccdb9b 100644
--- a/src/mesa/x86/rtasm/x86sse.c
+++ b/src/mesa/x86/rtasm/x86sse.c
@@ -502,6 +502,14 @@ void sse_addss( struct x86_function *p,
emit_modrm( p, dst, src );
}
+void sse_andnps( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ emit_2ub(p, X86_TWOB, 0x55);
+ emit_modrm( p, dst, src );
+}
+
void sse_andps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
@@ -510,6 +518,13 @@ void sse_andps( struct x86_function *p,
emit_modrm( p, dst, src );
}
+void sse_rsqrtps( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ emit_2ub(p, X86_TWOB, 0x52);
+ emit_modrm( p, dst, src );
+}
void sse_rsqrtss( struct x86_function *p,
struct x86_reg dst,
@@ -538,6 +553,21 @@ void sse_movlhps( struct x86_function *p,
emit_modrm( p, dst, src );
}
+void sse_orps( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ emit_2ub(p, X86_TWOB, 0x56);
+ emit_modrm( p, dst, src );
+}
+
+void sse_xorps( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ emit_2ub(p, X86_TWOB, 0x57);
+ emit_modrm( p, dst, src );
+}
void sse_cvtps2pi( struct x86_function *p,
struct x86_reg dst,
@@ -576,6 +606,14 @@ void sse_cmpps( struct x86_function *p,
emit_1ub(p, cc);
}
+void sse_pmovmskb( struct x86_function *p,
+ struct x86_reg dest,
+ struct x86_reg src)
+{
+ emit_3ub(p, 0x66, X86_TWOB, 0xD7);
+ emit_modrm(p, dest, src);
+}
+
/***********************************************************************
* SSE2 instructions
*/
@@ -593,6 +631,14 @@ void sse2_pshufd( struct x86_function *p,
emit_1ub(p, shuf);
}
+void sse2_cvttps2dq( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ emit_3ub( p, 0xF3, X86_TWOB, 0x5B );
+ emit_modrm( p, dst, src );
+}
+
void sse2_cvtps2dq( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
@@ -625,6 +671,14 @@ void sse2_packuswb( struct x86_function *p,
emit_modrm( p, dst, src );
}
+void sse2_rcpps( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ emit_2ub(p, X86_TWOB, 0x53);
+ emit_modrm( p, dst, src );
+}
+
void sse2_rcpss( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
diff --git a/src/mesa/x86/rtasm/x86sse.h b/src/mesa/x86/rtasm/x86sse.h
index 66fb852ac9..c6236395b2 100644
--- a/src/mesa/x86/rtasm/x86sse.h
+++ b/src/mesa/x86/rtasm/x86sse.h
@@ -142,17 +142,20 @@ void mmx_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg sr
void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, GLubyte shuf );
+void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_addps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_addss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_divss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_andnps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_andps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_cmpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src, GLubyte cc );
void sse_maxps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
@@ -167,9 +170,13 @@ void sse_movss( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
void sse_movups( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_mulps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_mulss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_orps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_xorps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_subps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_rsqrtps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, GLubyte shuf );
+void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src );
void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_and( struct x86_function *p, struct x86_reg dst, struct x86_reg src );