summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary/rtasm
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/auxiliary/rtasm')
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_cpu.c23
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_x86sse.c339
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_x86sse.h4
3 files changed, 327 insertions, 39 deletions
diff --git a/src/gallium/auxiliary/rtasm/rtasm_cpu.c b/src/gallium/auxiliary/rtasm/rtasm_cpu.c
index eb3359750b..f01e12faa0 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_cpu.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_cpu.c
@@ -26,14 +26,29 @@
**************************************************************************/
+#include "pipe/p_debug.h"
#include "rtasm_cpu.h"
+static boolean rtasm_sse_enabled(void)
+{
+ static boolean firsttime = 1;
+ static boolean enabled;
+
+ /* This gets called quite often at the moment:
+ */
+ if (firsttime) {
+ enabled = !debug_get_bool_option("GALLIUM_NOSSE", FALSE);
+ firsttime = FALSE;
+ }
+ return enabled;
+}
+
int rtasm_cpu_has_sse(void)
{
/* FIXME: actually detect this at run-time */
-#if defined(__i386__) || defined(__386__)
- return 1;
+#if defined(__i386__) || defined(__386__) || defined(i386)
+ return rtasm_sse_enabled();
#else
return 0;
#endif
@@ -42,8 +57,8 @@ int rtasm_cpu_has_sse(void)
int rtasm_cpu_has_sse2(void)
{
/* FIXME: actually detect this at run-time */
-#if defined(__i386__) || defined(__386__)
- return 1;
+#if defined(__i386__) || defined(__386__) || defined(i386)
+ return rtasm_sse_enabled();
#else
return 0;
#endif
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
index aea8b28e58..3cd45d7dd9 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
@@ -33,15 +33,114 @@
#define DISASSEM 0
#define X86_TWOB 0x0f
-static unsigned char *cptr( void (*label)() )
-{
- return (unsigned char *) label;
-}
+
+#define DUMP_SSE 0
+
+#if DUMP_SSE
+
+static void
+_print_reg(
+ struct x86_reg reg )
+{
+ if (reg.mod != mod_REG)
+ debug_printf( "[" );
+
+ switch( reg.file ) {
+ case file_REG32:
+ switch( reg.idx ) {
+ case reg_AX: debug_printf( "EAX" ); break;
+ case reg_CX: debug_printf( "ECX" ); break;
+ case reg_DX: debug_printf( "EDX" ); break;
+ case reg_BX: debug_printf( "EBX" ); break;
+ case reg_SP: debug_printf( "ESP" ); break;
+ case reg_BP: debug_printf( "EBP" ); break;
+ case reg_SI: debug_printf( "ESI" ); break;
+ case reg_DI: debug_printf( "EDI" ); break;
+ }
+ break;
+ case file_MMX:
+ debug_printf( "MMX%u", reg.idx );
+ break;
+ case file_XMM:
+ debug_printf( "XMM%u", reg.idx );
+ break;
+ case file_x87:
+ debug_printf( "fp%u", reg.idx );
+ break;
+ }
+
+ if (reg.mod == mod_DISP8 ||
+ reg.mod == mod_DISP32)
+ debug_printf("+%d", reg.disp);
+
+ if (reg.mod != mod_REG)
+ debug_printf( "]" );
+}
+
+
+#define DUMP_START() debug_printf( "\n" )
+#define DUMP_END() debug_printf( "\n" )
+
+#define DUMP() do { \
+ const char *foo = __FUNCTION__; \
+ while (*foo && *foo != '_') \
+ foo++; \
+ if (*foo) \
+ foo++; \
+ debug_printf( "\n% 15s ", foo ); \
+} while (0)
+
+#define DUMP_I( I ) do { \
+ DUMP(); \
+ debug_printf( "%u", I ); \
+} while( 0 )
+
+#define DUMP_R( R0 ) do { \
+ DUMP(); \
+ _print_reg( R0 ); \
+} while( 0 )
+
+#define DUMP_RR( R0, R1 ) do { \
+ DUMP(); \
+ _print_reg( R0 ); \
+ debug_printf( ", " ); \
+ _print_reg( R1 ); \
+} while( 0 )
+
+#define DUMP_RI( R0, I ) do { \
+ DUMP(); \
+ _print_reg( R0 ); \
+ debug_printf( ", %u", I ); \
+} while( 0 )
+
+#define DUMP_RRI( R0, R1, I ) do { \
+ DUMP(); \
+ _print_reg( R0 ); \
+ debug_printf( ", " ); \
+ _print_reg( R1 ); \
+ debug_printf( ", %u", I ); \
+} while( 0 )
+
+#else
+
+#define DUMP_START()
+#define DUMP_END()
+#define DUMP( )
+#define DUMP_I( I )
+#define DUMP_R( R0 )
+#define DUMP_RR( R0, R1 )
+#define DUMP_RI( R0, I )
+#define DUMP_RRI( R0, R1, I )
+
+#endif
static void do_realloc( struct x86_function *p )
{
- if (p->size == 0) {
+ if (p->store == p->error_overflow) {
+ p->csr = p->store;
+ }
+ else if (p->size == 0) {
p->size = 1024;
p->store = rtasm_exec_malloc(p->size);
p->csr = p->store;
@@ -51,10 +150,22 @@ static void do_realloc( struct x86_function *p )
unsigned char *tmp = p->store;
p->size *= 2;
p->store = rtasm_exec_malloc(p->size);
- memcpy(p->store, tmp, used);
- p->csr = p->store + used;
+
+ if (p->store) {
+ memcpy(p->store, tmp, used);
+ p->csr = p->store + used;
+ }
+ else {
+ p->csr = p->store;
+ }
+
rtasm_exec_free(tmp);
}
+
+ if (p->store == NULL) {
+ p->store = p->csr = p->error_overflow;
+ p->size = sizeof(p->error_overflow);
+ }
}
/* Emit bytes to the instruction stream:
@@ -253,6 +364,7 @@ void x86_jcc( struct x86_function *p,
unsigned char *label )
{
intptr_t offset = pointer_to_intptr( label ) - (pointer_to_intptr( x86_get_label(p) ) + 2);
+ DUMP_I(cc);
if (offset <= 127 && offset >= -128) {
emit_1ub(p, 0x70 + cc);
@@ -270,6 +382,7 @@ void x86_jcc( struct x86_function *p,
unsigned char *x86_jcc_forward( struct x86_function *p,
enum x86_cc cc )
{
+ DUMP_I(cc);
emit_2ub(p, 0x0f, 0x80 + cc);
emit_1i(p, 0);
return x86_get_label(p);
@@ -277,6 +390,7 @@ unsigned char *x86_jcc_forward( struct x86_function *p,
unsigned char *x86_jmp_forward( struct x86_function *p)
{
+ DUMP();
emit_1ub(p, 0xe9);
emit_1i(p, 0);
return x86_get_label(p);
@@ -284,6 +398,8 @@ unsigned char *x86_jmp_forward( struct x86_function *p)
unsigned char *x86_call_forward( struct x86_function *p)
{
+ DUMP();
+
emit_1ub(p, 0xe8);
emit_1i(p, 0);
return x86_get_label(p);
@@ -299,23 +415,31 @@ void x86_fixup_fwd_jump( struct x86_function *p,
void x86_jmp( struct x86_function *p, unsigned char *label)
{
+ DUMP_I( label );
emit_1ub(p, 0xe9);
emit_1i(p, pointer_to_intptr( label ) - pointer_to_intptr( x86_get_label(p) ) - 4);
}
#if 0
+static unsigned char *cptr( void (*label)() )
+{
+ return (unsigned char *) label;
+}
+
/* This doesn't work once we start reallocating & copying the
* generated code on buffer fills, because the call is relative to the
* current pc.
*/
void x86_call( struct x86_function *p, void (*label)())
{
+ DUMP_I( label );
emit_1ub(p, 0xe8);
emit_1i(p, cptr(label) - x86_get_label(p) - 4);
}
#else
void x86_call( struct x86_function *p, struct x86_reg reg)
{
+ DUMP_R( reg );
emit_1ub(p, 0xff);
emit_modrm_noreg(p, 2, reg);
}
@@ -328,6 +452,7 @@ void x86_call( struct x86_function *p, struct x86_reg reg)
*/
void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm )
{
+ DUMP_RI( dst, imm );
assert(dst.mod == mod_REG);
emit_1ub(p, 0xb8 + dst.idx);
emit_1i(p, imm);
@@ -336,6 +461,7 @@ void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm )
void x86_push( struct x86_function *p,
struct x86_reg reg )
{
+ DUMP_R( reg );
assert(reg.mod == mod_REG);
emit_1ub(p, 0x50 + reg.idx);
p->stack_offset += 4;
@@ -344,6 +470,7 @@ void x86_push( struct x86_function *p,
void x86_pop( struct x86_function *p,
struct x86_reg reg )
{
+ DUMP_R( reg );
assert(reg.mod == mod_REG);
emit_1ub(p, 0x58 + reg.idx);
p->stack_offset -= 4;
@@ -352,6 +479,7 @@ void x86_pop( struct x86_function *p,
void x86_inc( struct x86_function *p,
struct x86_reg reg )
{
+ DUMP_R( reg );
assert(reg.mod == mod_REG);
emit_1ub(p, 0x40 + reg.idx);
}
@@ -359,17 +487,20 @@ void x86_inc( struct x86_function *p,
void x86_dec( struct x86_function *p,
struct x86_reg reg )
{
+ DUMP_R( reg );
assert(reg.mod == mod_REG);
emit_1ub(p, 0x48 + reg.idx);
}
void x86_ret( struct x86_function *p )
{
+ DUMP();
emit_1ub(p, 0xc3);
}
void x86_sahf( struct x86_function *p )
{
+ DUMP();
emit_1ub(p, 0x9e);
}
@@ -377,6 +508,7 @@ void x86_mov( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_op_modrm( p, 0x8b, 0x89, dst, src );
}
@@ -384,6 +516,7 @@ void x86_xor( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_op_modrm( p, 0x33, 0x31, dst, src );
}
@@ -391,6 +524,7 @@ void x86_cmp( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_op_modrm( p, 0x3b, 0x39, dst, src );
}
@@ -398,6 +532,7 @@ void x86_lea( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_1ub(p, 0x8d);
emit_modrm( p, dst, src );
}
@@ -406,6 +541,7 @@ void x86_test( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_1ub(p, 0x85);
emit_modrm( p, dst, src );
}
@@ -414,20 +550,36 @@ void x86_add( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_op_modrm(p, 0x03, 0x01, dst, src );
}
+/* Calculate EAX * src, results in EDX:EAX.
+ */
void x86_mul( struct x86_function *p,
struct x86_reg src )
{
- assert (src.file == file_REG32 && src.mod == mod_REG);
- emit_op_modrm(p, 0xf7, 0, x86_make_reg (file_REG32, reg_SP), src );
+ DUMP_R( src );
+ emit_1ub(p, 0xf7);
+ emit_modrm_noreg(p, 4, src );
+}
+
+
+void x86_imul( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_2ub(p, X86_TWOB, 0xAF);
+ emit_modrm(p, dst, src);
}
+
void x86_sub( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_op_modrm(p, 0x2b, 0x29, dst, src );
}
@@ -435,6 +587,7 @@ void x86_or( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_op_modrm( p, 0x0b, 0x09, dst, src );
}
@@ -442,6 +595,7 @@ void x86_and( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_op_modrm( p, 0x23, 0x21, dst, src );
}
@@ -456,6 +610,7 @@ void sse_movss( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_2ub(p, 0xF3, X86_TWOB);
emit_op_modrm( p, 0x10, 0x11, dst, src );
}
@@ -464,6 +619,7 @@ void sse_movaps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_1ub(p, X86_TWOB);
emit_op_modrm( p, 0x28, 0x29, dst, src );
}
@@ -472,6 +628,7 @@ void sse_movups( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_1ub(p, X86_TWOB);
emit_op_modrm( p, 0x10, 0x11, dst, src );
}
@@ -480,6 +637,7 @@ void sse_movhps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
assert(dst.mod != mod_REG || src.mod != mod_REG);
emit_1ub(p, X86_TWOB);
emit_op_modrm( p, 0x16, 0x17, dst, src ); /* cf movlhps */
@@ -489,6 +647,7 @@ void sse_movlps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
assert(dst.mod != mod_REG || src.mod != mod_REG);
emit_1ub(p, X86_TWOB);
emit_op_modrm( p, 0x12, 0x13, dst, src ); /* cf movhlps */
@@ -498,6 +657,7 @@ void sse_maxps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_2ub(p, X86_TWOB, 0x5F);
emit_modrm( p, dst, src );
}
@@ -506,6 +666,7 @@ void sse_maxss( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_3ub(p, 0xF3, X86_TWOB, 0x5F);
emit_modrm( p, dst, src );
}
@@ -514,6 +675,7 @@ void sse_divss( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_3ub(p, 0xF3, X86_TWOB, 0x5E);
emit_modrm( p, dst, src );
}
@@ -522,6 +684,7 @@ void sse_minps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_2ub(p, X86_TWOB, 0x5D);
emit_modrm( p, dst, src );
}
@@ -530,6 +693,7 @@ void sse_subps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_2ub(p, X86_TWOB, 0x5C);
emit_modrm( p, dst, src );
}
@@ -538,6 +702,7 @@ void sse_mulps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_2ub(p, X86_TWOB, 0x59);
emit_modrm( p, dst, src );
}
@@ -546,6 +711,7 @@ void sse_mulss( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_3ub(p, 0xF3, X86_TWOB, 0x59);
emit_modrm( p, dst, src );
}
@@ -554,6 +720,7 @@ void sse_addps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_2ub(p, X86_TWOB, 0x58);
emit_modrm( p, dst, src );
}
@@ -562,6 +729,7 @@ void sse_addss( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_3ub(p, 0xF3, X86_TWOB, 0x58);
emit_modrm( p, dst, src );
}
@@ -570,6 +738,7 @@ void sse_andnps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_2ub(p, X86_TWOB, 0x55);
emit_modrm( p, dst, src );
}
@@ -578,6 +747,7 @@ void sse_andps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_2ub(p, X86_TWOB, 0x54);
emit_modrm( p, dst, src );
}
@@ -586,6 +756,7 @@ void sse_rsqrtps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_2ub(p, X86_TWOB, 0x52);
emit_modrm( p, dst, src );
}
@@ -594,6 +765,7 @@ void sse_rsqrtss( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_3ub(p, 0xF3, X86_TWOB, 0x52);
emit_modrm( p, dst, src );
@@ -603,6 +775,7 @@ void sse_movhlps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
assert(dst.mod == mod_REG && src.mod == mod_REG);
emit_2ub(p, X86_TWOB, 0x12);
emit_modrm( p, dst, src );
@@ -612,6 +785,7 @@ void sse_movlhps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
assert(dst.mod == mod_REG && src.mod == mod_REG);
emit_2ub(p, X86_TWOB, 0x16);
emit_modrm( p, dst, src );
@@ -621,6 +795,7 @@ void sse_orps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_2ub(p, X86_TWOB, 0x56);
emit_modrm( p, dst, src );
}
@@ -629,6 +804,7 @@ void sse_xorps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_2ub(p, X86_TWOB, 0x57);
emit_modrm( p, dst, src );
}
@@ -637,6 +813,7 @@ void sse_cvtps2pi( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
assert(dst.file == file_MMX &&
(src.file == file_XMM || src.mod != mod_REG));
@@ -646,36 +823,48 @@ void sse_cvtps2pi( struct x86_function *p,
emit_modrm( p, dst, src );
}
+void sse2_cvtdq2ps( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_2ub(p, X86_TWOB, 0x5b);
+ emit_modrm( p, dst, src );
+}
+
/* Shufps can also be used to implement a reduced swizzle when dest ==
* arg0.
*/
void sse_shufps( struct x86_function *p,
- struct x86_reg dest,
- struct x86_reg arg0,
+ struct x86_reg dst,
+ struct x86_reg src,
unsigned char shuf)
{
+ DUMP_RRI( dst, src, shuf );
emit_2ub(p, X86_TWOB, 0xC6);
- emit_modrm(p, dest, arg0);
+ emit_modrm(p, dst, src);
emit_1ub(p, shuf);
}
void sse_cmpps( struct x86_function *p,
- struct x86_reg dest,
- struct x86_reg arg0,
+ struct x86_reg dst,
+ struct x86_reg src,
unsigned char cc)
{
+ DUMP_RRI( dst, src, cc );
emit_2ub(p, X86_TWOB, 0xC2);
- emit_modrm(p, dest, arg0);
+ emit_modrm(p, dst, src);
emit_1ub(p, cc);
}
void sse_pmovmskb( struct x86_function *p,
- struct x86_reg dest,
+ struct x86_reg dst,
struct x86_reg src)
{
- emit_3ub(p, 0x66, X86_TWOB, 0xD7);
- emit_modrm(p, dest, src);
+ DUMP_RR( dst, src );
+ emit_3ub(p, 0x66, X86_TWOB, 0xD7);
+ emit_modrm(p, dst, src);
}
/***********************************************************************
@@ -686,12 +875,13 @@ void sse_pmovmskb( struct x86_function *p,
* Perform a reduced swizzle:
*/
void sse2_pshufd( struct x86_function *p,
- struct x86_reg dest,
- struct x86_reg arg0,
+ struct x86_reg dst,
+ struct x86_reg src,
unsigned char shuf)
{
+ DUMP_RRI( dst, src, shuf );
emit_3ub(p, 0x66, X86_TWOB, 0x70);
- emit_modrm(p, dest, arg0);
+ emit_modrm(p, dst, src);
emit_1ub(p, shuf);
}
@@ -699,6 +889,7 @@ void sse2_cvttps2dq( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_3ub( p, 0xF3, X86_TWOB, 0x5B );
emit_modrm( p, dst, src );
}
@@ -707,6 +898,7 @@ void sse2_cvtps2dq( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_3ub(p, 0x66, X86_TWOB, 0x5B);
emit_modrm( p, dst, src );
}
@@ -715,6 +907,7 @@ void sse2_packssdw( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_3ub(p, 0x66, X86_TWOB, 0x6B);
emit_modrm( p, dst, src );
}
@@ -723,6 +916,7 @@ void sse2_packsswb( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_3ub(p, 0x66, X86_TWOB, 0x63);
emit_modrm( p, dst, src );
}
@@ -731,14 +925,26 @@ void sse2_packuswb( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_3ub(p, 0x66, X86_TWOB, 0x67);
emit_modrm( p, dst, src );
}
+void sse2_punpcklbw( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_3ub(p, 0x66, X86_TWOB, 0x60);
+ emit_modrm( p, dst, src );
+}
+
+
void sse2_rcpps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_2ub(p, X86_TWOB, 0x53);
emit_modrm( p, dst, src );
}
@@ -747,6 +953,7 @@ void sse2_rcpss( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_3ub(p, 0xF3, X86_TWOB, 0x53);
emit_modrm( p, dst, src );
}
@@ -755,6 +962,7 @@ void sse2_movd( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
emit_2ub(p, 0x66, X86_TWOB);
emit_op_modrm( p, 0x6e, 0x7e, dst, src );
}
@@ -767,30 +975,35 @@ void sse2_movd( struct x86_function *p,
*/
void x87_fist( struct x86_function *p, struct x86_reg dst )
{
+ DUMP_R( dst );
emit_1ub(p, 0xdb);
emit_modrm_noreg(p, 2, dst);
}
void x87_fistp( struct x86_function *p, struct x86_reg dst )
{
+ DUMP_R( dst );
emit_1ub(p, 0xdb);
emit_modrm_noreg(p, 3, dst);
}
void x87_fild( struct x86_function *p, struct x86_reg arg )
{
+ DUMP_R( arg );
emit_1ub(p, 0xdf);
emit_modrm_noreg(p, 0, arg);
}
void x87_fldz( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xee);
}
void x87_fldcw( struct x86_function *p, struct x86_reg arg )
{
+ DUMP_R( arg );
assert(arg.file == file_REG32);
assert(arg.mod != mod_REG);
emit_1ub(p, 0xd9);
@@ -799,26 +1012,31 @@ void x87_fldcw( struct x86_function *p, struct x86_reg arg )
void x87_fld1( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xe8);
}
void x87_fldl2e( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xea);
}
void x87_fldln2( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xed);
}
void x87_fwait( struct x86_function *p )
{
+ DUMP();
emit_1ub(p, 0x9b);
}
void x87_fnclex( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xdb, 0xe2);
}
@@ -855,49 +1073,55 @@ static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86
assert(0);
}
-void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
+void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
- x87_arith_op(p, dst, arg,
+ DUMP_RR( dst, src );
+ x87_arith_op(p, dst, src,
0xd8, 0xc8,
0xdc, 0xc8,
4);
}
-void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
+void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
- x87_arith_op(p, dst, arg,
+ DUMP_RR( dst, src );
+ x87_arith_op(p, dst, src,
0xd8, 0xe0,
0xdc, 0xe8,
4);
}
-void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
+void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
- x87_arith_op(p, dst, arg,
+ DUMP_RR( dst, src );
+ x87_arith_op(p, dst, src,
0xd8, 0xe8,
0xdc, 0xe0,
5);
}
-void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
+void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
- x87_arith_op(p, dst, arg,
+ DUMP_RR( dst, src );
+ x87_arith_op(p, dst, src,
0xd8, 0xc0,
0xdc, 0xc0,
0);
}
-void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
+void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
- x87_arith_op(p, dst, arg,
+ DUMP_RR( dst, src );
+ x87_arith_op(p, dst, src,
0xd8, 0xf0,
0xdc, 0xf8,
6);
}
-void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
+void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
- x87_arith_op(p, dst, arg,
+ DUMP_RR( dst, src );
+ x87_arith_op(p, dst, src,
0xd8, 0xf8,
0xdc, 0xf0,
7);
@@ -905,6 +1129,7 @@ void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
void x87_fmulp( struct x86_function *p, struct x86_reg dst )
{
+ DUMP_R( dst );
assert(dst.file == file_x87);
assert(dst.idx >= 1);
emit_2ub(p, 0xde, 0xc8+dst.idx);
@@ -912,6 +1137,7 @@ void x87_fmulp( struct x86_function *p, struct x86_reg dst )
void x87_fsubp( struct x86_function *p, struct x86_reg dst )
{
+ DUMP_R( dst );
assert(dst.file == file_x87);
assert(dst.idx >= 1);
emit_2ub(p, 0xde, 0xe8+dst.idx);
@@ -919,6 +1145,7 @@ void x87_fsubp( struct x86_function *p, struct x86_reg dst )
void x87_fsubrp( struct x86_function *p, struct x86_reg dst )
{
+ DUMP_R( dst );
assert(dst.file == file_x87);
assert(dst.idx >= 1);
emit_2ub(p, 0xde, 0xe0+dst.idx);
@@ -926,6 +1153,7 @@ void x87_fsubrp( struct x86_function *p, struct x86_reg dst )
void x87_faddp( struct x86_function *p, struct x86_reg dst )
{
+ DUMP_R( dst );
assert(dst.file == file_x87);
assert(dst.idx >= 1);
emit_2ub(p, 0xde, 0xc0+dst.idx);
@@ -933,6 +1161,7 @@ void x87_faddp( struct x86_function *p, struct x86_reg dst )
void x87_fdivp( struct x86_function *p, struct x86_reg dst )
{
+ DUMP_R( dst );
assert(dst.file == file_x87);
assert(dst.idx >= 1);
emit_2ub(p, 0xde, 0xf8+dst.idx);
@@ -940,6 +1169,7 @@ void x87_fdivp( struct x86_function *p, struct x86_reg dst )
void x87_fdivrp( struct x86_function *p, struct x86_reg dst )
{
+ DUMP_R( dst );
assert(dst.file == file_x87);
assert(dst.idx >= 1);
emit_2ub(p, 0xde, 0xf0+dst.idx);
@@ -947,70 +1177,83 @@ void x87_fdivrp( struct x86_function *p, struct x86_reg dst )
void x87_fucom( struct x86_function *p, struct x86_reg arg )
{
+ DUMP_R( arg );
assert(arg.file == file_x87);
emit_2ub(p, 0xdd, 0xe0+arg.idx);
}
void x87_fucomp( struct x86_function *p, struct x86_reg arg )
{
+ DUMP_R( arg );
assert(arg.file == file_x87);
emit_2ub(p, 0xdd, 0xe8+arg.idx);
}
void x87_fucompp( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xda, 0xe9);
}
void x87_fxch( struct x86_function *p, struct x86_reg arg )
{
+ DUMP_R( arg );
assert(arg.file == file_x87);
emit_2ub(p, 0xd9, 0xc8+arg.idx);
}
void x87_fabs( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xe1);
}
void x87_fchs( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xe0);
}
void x87_fcos( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xff);
}
void x87_fprndint( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xfc);
}
void x87_fscale( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xfd);
}
void x87_fsin( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xfe);
}
void x87_fsincos( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xfb);
}
void x87_fsqrt( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xfa);
}
void x87_fxtract( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xf4);
}
@@ -1020,6 +1263,7 @@ void x87_fxtract( struct x86_function *p )
*/
void x87_f2xm1( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xf0);
}
@@ -1028,6 +1272,7 @@ void x87_f2xm1( struct x86_function *p )
*/
void x87_fyl2x( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xf1);
}
@@ -1038,12 +1283,14 @@ void x87_fyl2x( struct x86_function *p )
*/
void x87_fyl2xp1( struct x86_function *p )
{
+ DUMP();
emit_2ub(p, 0xd9, 0xf9);
}
void x87_fld( struct x86_function *p, struct x86_reg arg )
{
+ DUMP_R( arg );
if (arg.file == file_x87)
emit_2ub(p, 0xd9, 0xc0 + arg.idx);
else {
@@ -1054,6 +1301,7 @@ void x87_fld( struct x86_function *p, struct x86_reg arg )
void x87_fst( struct x86_function *p, struct x86_reg dst )
{
+ DUMP_R( dst );
if (dst.file == file_x87)
emit_2ub(p, 0xdd, 0xd0 + dst.idx);
else {
@@ -1064,6 +1312,7 @@ void x87_fst( struct x86_function *p, struct x86_reg dst )
void x87_fstp( struct x86_function *p, struct x86_reg dst )
{
+ DUMP_R( dst );
if (dst.file == file_x87)
emit_2ub(p, 0xdd, 0xd8 + dst.idx);
else {
@@ -1074,6 +1323,7 @@ void x87_fstp( struct x86_function *p, struct x86_reg dst )
void x87_fcom( struct x86_function *p, struct x86_reg dst )
{
+ DUMP_R( dst );
if (dst.file == file_x87)
emit_2ub(p, 0xd8, 0xd0 + dst.idx);
else {
@@ -1084,6 +1334,7 @@ void x87_fcom( struct x86_function *p, struct x86_reg dst )
void x87_fcomp( struct x86_function *p, struct x86_reg dst )
{
+ DUMP_R( dst );
if (dst.file == file_x87)
emit_2ub(p, 0xd8, 0xd8 + dst.idx);
else {
@@ -1095,6 +1346,7 @@ void x87_fcomp( struct x86_function *p, struct x86_reg dst )
void x87_fnstsw( struct x86_function *p, struct x86_reg dst )
{
+ DUMP_R( dst );
assert(dst.file == file_REG32);
if (dst.idx == reg_AX &&
@@ -1115,6 +1367,7 @@ void x87_fnstsw( struct x86_function *p, struct x86_reg dst )
void mmx_emms( struct x86_function *p )
{
+ DUMP();
assert(p->need_emms);
emit_2ub(p, 0x0f, 0x77);
p->need_emms = 0;
@@ -1124,6 +1377,7 @@ void mmx_packssdw( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
assert(dst.file == file_MMX &&
(src.file == file_MMX || src.mod != mod_REG));
@@ -1137,6 +1391,7 @@ void mmx_packuswb( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
assert(dst.file == file_MMX &&
(src.file == file_MMX || src.mod != mod_REG));
@@ -1150,6 +1405,7 @@ void mmx_movd( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
p->need_emms = 1;
emit_1ub(p, X86_TWOB);
emit_op_modrm( p, 0x6e, 0x7e, dst, src );
@@ -1159,6 +1415,7 @@ void mmx_movq( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
+ DUMP_RR( dst, src );
p->need_emms = 1;
emit_1ub(p, X86_TWOB);
emit_op_modrm( p, 0x6f, 0x7f, dst, src );
@@ -1186,18 +1443,25 @@ void x86_init_func( struct x86_function *p )
p->size = 0;
p->store = NULL;
p->csr = p->store;
+ DUMP_START();
}
void x86_init_func_size( struct x86_function *p, unsigned code_size )
{
p->size = code_size;
p->store = rtasm_exec_malloc(code_size);
+ if (p->store == NULL) {
+ p->store = p->error_overflow;
+ }
p->csr = p->store;
+ DUMP_START();
}
void x86_release_func( struct x86_function *p )
{
- rtasm_exec_free(p->store);
+ if (p->store && p->store != p->error_overflow)
+ rtasm_exec_free(p->store);
+
p->store = NULL;
p->csr = NULL;
p->size = 0;
@@ -1206,9 +1470,14 @@ void x86_release_func( struct x86_function *p )
void (*x86_get_func( struct x86_function *p ))(void)
{
+ DUMP_END();
if (DISASSEM && p->store)
debug_printf("disassemble %p %p\n", p->store, p->csr);
- return (void (*)(void)) p->store;
+
+ if (p->store == p->error_overflow)
+ return (void (*)(void)) NULL;
+ else
+ return (void (*)(void)) p->store;
}
#else
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
index 606b41eb35..695a1cef4e 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
@@ -43,6 +43,7 @@ struct x86_function {
unsigned char *csr;
unsigned stack_offset;
int need_emms;
+ unsigned char error_overflow[4];
const char *fn;
};
@@ -165,6 +166,7 @@ void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg sr
void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_cvtdq2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
@@ -202,6 +204,7 @@ void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src
void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
unsigned char shuf );
void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src );
+void sse2_punpcklbw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_and( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
@@ -211,6 +214,7 @@ void x86_inc( struct x86_function *p, struct x86_reg reg );
void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_mul( struct x86_function *p, struct x86_reg src );
+void x86_imul( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_pop( struct x86_function *p, struct x86_reg reg );
void x86_push( struct x86_function *p, struct x86_reg reg );