From 39ea0308425ad04618061129c63c22ac0efb0692 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 19 Feb 2008 12:00:48 +0900 Subject: Rename rtasm files. --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 1196 ++++++++++++++++++++++++++++ 1 file changed, 1196 insertions(+) create mode 100644 src/gallium/auxiliary/rtasm/rtasm_x86sse.c (limited to 'src/gallium/auxiliary/rtasm/rtasm_x86sse.c') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c new file mode 100644 index 0000000000..3c885a9fff --- /dev/null +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -0,0 +1,1196 @@ +#if defined(__i386__) || defined(__386__) + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" + +#include "rtasm_execmem.h" +#include "rtasm_x86sse.h" + +#define DISASSEM 0 +#define X86_TWOB 0x0f + +static unsigned char *cptr( void (*label)() ) +{ + return (unsigned char *)(unsigned long)label; +} + + +static void do_realloc( struct x86_function *p ) +{ + if (p->size == 0) { + p->size = 1024; + p->store = rtasm_exec_malloc(p->size); + p->csr = p->store; + } + else { + unsigned used = p->csr - p->store; + unsigned char *tmp = p->store; + p->size *= 2; + p->store = rtasm_exec_malloc(p->size); + memcpy(p->store, tmp, used); + p->csr = p->store + used; + rtasm_exec_free(tmp); + } +} + +/* Emit bytes to the instruction stream: + */ +static unsigned char *reserve( struct x86_function *p, int bytes ) +{ + if (p->csr + bytes - p->store > p->size) + do_realloc(p); + + { + unsigned char *csr = p->csr; + p->csr += bytes; + return csr; + } +} + + + +static void emit_1b( struct x86_function *p, char b0 ) +{ + char *csr = (char *)reserve(p, 1); + *csr = b0; +} + +static void emit_1i( struct x86_function *p, int i0 ) +{ + int *icsr = (int *)reserve(p, sizeof(i0)); + *icsr = i0; +} + +static void emit_1ub( struct x86_function *p, unsigned char b0 ) +{ + unsigned char *csr = reserve(p, 1); + *csr++ = b0; +} + +static void emit_2ub( struct x86_function *p, unsigned char b0, unsigned char b1 ) +{ + unsigned char *csr = reserve(p, 2); + *csr++ = b0; + *csr++ = b1; +} + +static void emit_3ub( struct x86_function *p, unsigned char b0, unsigned char b1, unsigned char b2 ) +{ + unsigned char *csr = reserve(p, 3); + *csr++ = b0; + *csr++ = b1; + *csr++ = b2; +} + + +/* Build a modRM byte + possible displacement. No treatment of SIB + * indexing. BZZT - no way to encode an absolute address. + */ +static void emit_modrm( struct x86_function *p, + struct x86_reg reg, + struct x86_reg regmem ) +{ + unsigned char val = 0; + + assert(reg.mod == mod_REG); + + val |= regmem.mod << 6; /* mod field */ + val |= reg.idx << 3; /* reg field */ + val |= regmem.idx; /* r/m field */ + + emit_1ub(p, val); + + /* Oh-oh we've stumbled into the SIB thing. + */ + if (regmem.file == file_REG32 && + regmem.idx == reg_SP) { + emit_1ub(p, 0x24); /* simplistic! */ + } + + switch (regmem.mod) { + case mod_REG: + case mod_INDIRECT: + break; + case mod_DISP8: + emit_1b(p, regmem.disp); + break; + case mod_DISP32: + emit_1i(p, regmem.disp); + break; + default: + assert(0); + break; + } +} + + +static void emit_modrm_noreg( struct x86_function *p, + unsigned op, + struct x86_reg regmem ) +{ + struct x86_reg dummy = x86_make_reg(file_REG32, op); + emit_modrm(p, dummy, regmem); +} + +/* Many x86 instructions have two opcodes to cope with the situations + * where the destination is a register or memory reference + * respectively. This function selects the correct opcode based on + * the arguments presented. + */ +static void emit_op_modrm( struct x86_function *p, + unsigned char op_dst_is_reg, + unsigned char op_dst_is_mem, + struct x86_reg dst, + struct x86_reg src ) +{ + switch (dst.mod) { + case mod_REG: + emit_1ub(p, op_dst_is_reg); + emit_modrm(p, dst, src); + break; + case mod_INDIRECT: + case mod_DISP32: + case mod_DISP8: + assert(src.mod == mod_REG); + emit_1ub(p, op_dst_is_mem); + emit_modrm(p, src, dst); + break; + default: + assert(0); + break; + } +} + + + + + + + +/* Create and manipulate registers and regmem values: + */ +struct x86_reg x86_make_reg( enum x86_reg_file file, + enum x86_reg_name idx ) +{ + struct x86_reg reg; + + reg.file = file; + reg.idx = idx; + reg.mod = mod_REG; + reg.disp = 0; + + return reg; +} + +struct x86_reg x86_make_disp( struct x86_reg reg, + int disp ) +{ + assert(reg.file == file_REG32); + + if (reg.mod == mod_REG) + reg.disp = disp; + else + reg.disp += disp; + + if (reg.disp == 0) + reg.mod = mod_INDIRECT; + else if (reg.disp <= 127 && reg.disp >= -128) + reg.mod = mod_DISP8; + else + reg.mod = mod_DISP32; + + return reg; +} + +struct x86_reg x86_deref( struct x86_reg reg ) +{ + return x86_make_disp(reg, 0); +} + +struct x86_reg x86_get_base_reg( struct x86_reg reg ) +{ + return x86_make_reg( reg.file, reg.idx ); +} + +unsigned char *x86_get_label( struct x86_function *p ) +{ + return p->csr; +} + + + +/*********************************************************************** + * x86 instructions + */ + + +void x86_jcc( struct x86_function *p, + enum x86_cc cc, + unsigned char *label ) +{ + int offset = label - (x86_get_label(p) + 2); + + if (offset <= 127 && offset >= -128) { + emit_1ub(p, 0x70 + cc); + emit_1b(p, (char) offset); + } + else { + offset = label - (x86_get_label(p) + 6); + emit_2ub(p, 0x0f, 0x80 + cc); + emit_1i(p, offset); + } +} + +/* Always use a 32bit offset for forward jumps: + */ +unsigned char *x86_jcc_forward( struct x86_function *p, + enum x86_cc cc ) +{ + emit_2ub(p, 0x0f, 0x80 + cc); + emit_1i(p, 0); + return x86_get_label(p); +} + +unsigned char *x86_jmp_forward( struct x86_function *p) +{ + emit_1ub(p, 0xe9); + emit_1i(p, 0); + return x86_get_label(p); +} + +unsigned char *x86_call_forward( struct x86_function *p) +{ + emit_1ub(p, 0xe8); + emit_1i(p, 0); + return x86_get_label(p); +} + +/* Fixup offset from forward jump: + */ +void x86_fixup_fwd_jump( struct x86_function *p, + unsigned char *fixup ) +{ + *(int *)(fixup - 4) = x86_get_label(p) - fixup; +} + +void x86_jmp( struct x86_function *p, unsigned char *label) +{ + emit_1ub(p, 0xe9); + emit_1i(p, label - x86_get_label(p) - 4); +} + +#if 0 +/* This doesn't work once we start reallocating & copying the + * generated code on buffer fills, because the call is relative to the + * current pc. + */ +void x86_call( struct x86_function *p, void (*label)()) +{ + emit_1ub(p, 0xe8); + emit_1i(p, cptr(label) - x86_get_label(p) - 4); +} +#else +void x86_call( struct x86_function *p, struct x86_reg reg) +{ + emit_1ub(p, 0xff); + emit_modrm(p, reg, reg); +} +#endif + + +/* michal: + * Temporary. As I need immediate operands, and dont want to mess with the codegen, + * I load the immediate into general purpose register and use it. + */ +void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + assert(dst.mod == mod_REG); + emit_1ub(p, 0xb8 + dst.idx); + emit_1i(p, imm); +} + +void x86_push( struct x86_function *p, + struct x86_reg reg ) +{ + assert(reg.mod == mod_REG); + emit_1ub(p, 0x50 + reg.idx); + p->stack_offset += 4; +} + +void x86_pop( struct x86_function *p, + struct x86_reg reg ) +{ + assert(reg.mod == mod_REG); + emit_1ub(p, 0x58 + reg.idx); + p->stack_offset -= 4; +} + +void x86_inc( struct x86_function *p, + struct x86_reg reg ) +{ + assert(reg.mod == mod_REG); + emit_1ub(p, 0x40 + reg.idx); +} + +void x86_dec( struct x86_function *p, + struct x86_reg reg ) +{ + assert(reg.mod == mod_REG); + emit_1ub(p, 0x48 + reg.idx); +} + +void x86_ret( struct x86_function *p ) +{ + emit_1ub(p, 0xc3); +} + +void x86_sahf( struct x86_function *p ) +{ + emit_1ub(p, 0x9e); +} + +void x86_mov( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_op_modrm( p, 0x8b, 0x89, dst, src ); +} + +void x86_xor( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_op_modrm( p, 0x33, 0x31, dst, src ); +} + +void x86_cmp( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_op_modrm( p, 0x3b, 0x39, dst, src ); +} + +void x86_lea( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_1ub(p, 0x8d); + emit_modrm( p, dst, src ); +} + +void x86_test( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_1ub(p, 0x85); + emit_modrm( p, dst, src ); +} + +void x86_add( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_op_modrm(p, 0x03, 0x01, dst, src ); +} + +void x86_mul( struct x86_function *p, + struct x86_reg src ) +{ + assert (src.file == file_REG32 && src.mod == mod_REG); + emit_op_modrm(p, 0xf7, 0, x86_make_reg (file_REG32, reg_SP), src ); +} + +void x86_sub( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_op_modrm(p, 0x2b, 0x29, dst, src ); +} + +void x86_or( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_op_modrm( p, 0x0b, 0x09, dst, src ); +} + +void x86_and( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_op_modrm( p, 0x23, 0x21, dst, src ); +} + + + +/*********************************************************************** + * SSE instructions + */ + + +void sse_movss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, 0xF3, X86_TWOB); + emit_op_modrm( p, 0x10, 0x11, dst, src ); +} + +void sse_movaps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_1ub(p, X86_TWOB); + emit_op_modrm( p, 0x28, 0x29, dst, src ); +} + +void sse_movups( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_1ub(p, X86_TWOB); + emit_op_modrm( p, 0x10, 0x11, dst, src ); +} + +void sse_movhps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + assert(dst.mod != mod_REG || src.mod != mod_REG); + emit_1ub(p, X86_TWOB); + emit_op_modrm( p, 0x16, 0x17, dst, src ); /* cf movlhps */ +} + +void sse_movlps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + assert(dst.mod != mod_REG || src.mod != mod_REG); + emit_1ub(p, X86_TWOB); + emit_op_modrm( p, 0x12, 0x13, dst, src ); /* cf movhlps */ +} + +void sse_maxps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x5F); + emit_modrm( p, dst, src ); +} + +void sse_maxss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0xF3, X86_TWOB, 0x5F); + emit_modrm( p, dst, src ); +} + +void sse_divss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0xF3, X86_TWOB, 0x5E); + emit_modrm( p, dst, src ); +} + +void sse_minps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x5D); + emit_modrm( p, dst, src ); +} + +void sse_subps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x5C); + emit_modrm( p, dst, src ); +} + +void sse_mulps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x59); + emit_modrm( p, dst, src ); +} + +void sse_mulss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0xF3, X86_TWOB, 0x59); + emit_modrm( p, dst, src ); +} + +void sse_addps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x58); + emit_modrm( p, dst, src ); +} + +void sse_addss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0xF3, X86_TWOB, 0x58); + emit_modrm( p, dst, src ); +} + +void sse_andnps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x55); + emit_modrm( p, dst, src ); +} + +void sse_andps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x54); + emit_modrm( p, dst, src ); +} + +void sse_rsqrtps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x52); + emit_modrm( p, dst, src ); +} + +void sse_rsqrtss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0xF3, X86_TWOB, 0x52); + emit_modrm( p, dst, src ); + +} + +void sse_movhlps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + assert(dst.mod == mod_REG && src.mod == mod_REG); + emit_2ub(p, X86_TWOB, 0x12); + emit_modrm( p, dst, src ); +} + +void sse_movlhps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + assert(dst.mod == mod_REG && src.mod == mod_REG); + emit_2ub(p, X86_TWOB, 0x16); + emit_modrm( p, dst, src ); +} + +void sse_orps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x56); + emit_modrm( p, dst, src ); +} + +void sse_xorps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x57); + emit_modrm( p, dst, src ); +} + +void sse_cvtps2pi( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + assert(dst.file == file_MMX && + (src.file == file_XMM || src.mod != mod_REG)); + + p->need_emms = 1; + + emit_2ub(p, X86_TWOB, 0x2d); + emit_modrm( p, dst, src ); +} + + +/* Shufps can also be used to implement a reduced swizzle when dest == + * arg0. + */ +void sse_shufps( struct x86_function *p, + struct x86_reg dest, + struct x86_reg arg0, + unsigned char shuf) +{ + emit_2ub(p, X86_TWOB, 0xC6); + emit_modrm(p, dest, arg0); + emit_1ub(p, shuf); +} + +void sse_cmpps( struct x86_function *p, + struct x86_reg dest, + struct x86_reg arg0, + unsigned char cc) +{ + emit_2ub(p, X86_TWOB, 0xC2); + emit_modrm(p, dest, arg0); + emit_1ub(p, cc); +} + +void sse_pmovmskb( struct x86_function *p, + struct x86_reg dest, + struct x86_reg src) +{ + emit_3ub(p, 0x66, X86_TWOB, 0xD7); + emit_modrm(p, dest, src); +} + +/*********************************************************************** + * SSE2 instructions + */ + +/** + * Perform a reduced swizzle: + */ +void sse2_pshufd( struct x86_function *p, + struct x86_reg dest, + struct x86_reg arg0, + unsigned char shuf) +{ + emit_3ub(p, 0x66, X86_TWOB, 0x70); + emit_modrm(p, dest, arg0); + emit_1ub(p, shuf); +} + +void sse2_cvttps2dq( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub( p, 0xF3, X86_TWOB, 0x5B ); + emit_modrm( p, dst, src ); +} + +void sse2_cvtps2dq( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0x66, X86_TWOB, 0x5B); + emit_modrm( p, dst, src ); +} + +void sse2_packssdw( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0x66, X86_TWOB, 0x6B); + emit_modrm( p, dst, src ); +} + +void sse2_packsswb( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0x66, X86_TWOB, 0x63); + emit_modrm( p, dst, src ); +} + +void sse2_packuswb( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0x66, X86_TWOB, 0x67); + emit_modrm( p, dst, src ); +} + +void sse2_rcpps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x53); + emit_modrm( p, dst, src ); +} + +void sse2_rcpss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0xF3, X86_TWOB, 0x53); + emit_modrm( p, dst, src ); +} + +void sse2_movd( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, 0x66, X86_TWOB); + emit_op_modrm( p, 0x6e, 0x7e, dst, src ); +} + + + + +/*********************************************************************** + * x87 instructions + */ +void x87_fist( struct x86_function *p, struct x86_reg dst ) +{ + emit_1ub(p, 0xdb); + emit_modrm_noreg(p, 2, dst); +} + +void x87_fistp( struct x86_function *p, struct x86_reg dst ) +{ + emit_1ub(p, 0xdb); + emit_modrm_noreg(p, 3, dst); +} + +void x87_fild( struct x86_function *p, struct x86_reg arg ) +{ + emit_1ub(p, 0xdf); + emit_modrm_noreg(p, 0, arg); +} + +void x87_fldz( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xee); +} + + +void x87_fldcw( struct x86_function *p, struct x86_reg arg ) +{ + assert(arg.file == file_REG32); + assert(arg.mod != mod_REG); + emit_1ub(p, 0xd9); + emit_modrm_noreg(p, 5, arg); +} + +void x87_fld1( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xe8); +} + +void x87_fldl2e( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xea); +} + +void x87_fldln2( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xed); +} + +void x87_fwait( struct x86_function *p ) +{ + emit_1ub(p, 0x9b); +} + +void x87_fnclex( struct x86_function *p ) +{ + emit_2ub(p, 0xdb, 0xe2); +} + +void x87_fclex( struct x86_function *p ) +{ + x87_fwait(p); + x87_fnclex(p); +} + + +static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86_reg arg, + unsigned char dst0ub0, + unsigned char dst0ub1, + unsigned char arg0ub0, + unsigned char arg0ub1, + unsigned char argmem_noreg) +{ + assert(dst.file == file_x87); + + if (arg.file == file_x87) { + if (dst.idx == 0) + emit_2ub(p, dst0ub0, dst0ub1+arg.idx); + else if (arg.idx == 0) + emit_2ub(p, arg0ub0, arg0ub1+arg.idx); + else + assert(0); + } + else if (dst.idx == 0) { + assert(arg.file == file_REG32); + emit_1ub(p, 0xd8); + emit_modrm_noreg(p, argmem_noreg, arg); + } + else + assert(0); +} + +void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +{ + x87_arith_op(p, dst, arg, + 0xd8, 0xc8, + 0xdc, 0xc8, + 4); +} + +void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +{ + x87_arith_op(p, dst, arg, + 0xd8, 0xe0, + 0xdc, 0xe8, + 4); +} + +void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +{ + x87_arith_op(p, dst, arg, + 0xd8, 0xe8, + 0xdc, 0xe0, + 5); +} + +void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +{ + x87_arith_op(p, dst, arg, + 0xd8, 0xc0, + 0xdc, 0xc0, + 0); +} + +void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +{ + x87_arith_op(p, dst, arg, + 0xd8, 0xf0, + 0xdc, 0xf8, + 6); +} + +void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +{ + x87_arith_op(p, dst, arg, + 0xd8, 0xf8, + 0xdc, 0xf0, + 7); +} + +void x87_fmulp( struct x86_function *p, struct x86_reg dst ) +{ + assert(dst.file == file_x87); + assert(dst.idx >= 1); + emit_2ub(p, 0xde, 0xc8+dst.idx); +} + +void x87_fsubp( struct x86_function *p, struct x86_reg dst ) +{ + assert(dst.file == file_x87); + assert(dst.idx >= 1); + emit_2ub(p, 0xde, 0xe8+dst.idx); +} + +void x87_fsubrp( struct x86_function *p, struct x86_reg dst ) +{ + assert(dst.file == file_x87); + assert(dst.idx >= 1); + emit_2ub(p, 0xde, 0xe0+dst.idx); +} + +void x87_faddp( struct x86_function *p, struct x86_reg dst ) +{ + assert(dst.file == file_x87); + assert(dst.idx >= 1); + emit_2ub(p, 0xde, 0xc0+dst.idx); +} + +void x87_fdivp( struct x86_function *p, struct x86_reg dst ) +{ + assert(dst.file == file_x87); + assert(dst.idx >= 1); + emit_2ub(p, 0xde, 0xf8+dst.idx); +} + +void x87_fdivrp( struct x86_function *p, struct x86_reg dst ) +{ + assert(dst.file == file_x87); + assert(dst.idx >= 1); + emit_2ub(p, 0xde, 0xf0+dst.idx); +} + +void x87_fucom( struct x86_function *p, struct x86_reg arg ) +{ + assert(arg.file == file_x87); + emit_2ub(p, 0xdd, 0xe0+arg.idx); +} + +void x87_fucomp( struct x86_function *p, struct x86_reg arg ) +{ + assert(arg.file == file_x87); + emit_2ub(p, 0xdd, 0xe8+arg.idx); +} + +void x87_fucompp( struct x86_function *p ) +{ + emit_2ub(p, 0xda, 0xe9); +} + +void x87_fxch( struct x86_function *p, struct x86_reg arg ) +{ + assert(arg.file == file_x87); + emit_2ub(p, 0xd9, 0xc8+arg.idx); +} + +void x87_fabs( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xe1); +} + +void x87_fchs( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xe0); +} + +void x87_fcos( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xff); +} + + +void x87_fprndint( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xfc); +} + +void x87_fscale( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xfd); +} + +void x87_fsin( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xfe); +} + +void x87_fsincos( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xfb); +} + +void x87_fsqrt( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xfa); +} + +void x87_fxtract( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xf4); +} + +/* st0 = (2^st0)-1 + * + * Restrictions: -1.0 <= st0 <= 1.0 + */ +void x87_f2xm1( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xf0); +} + +/* st1 = st1 * log2(st0); + * pop_stack; + */ +void x87_fyl2x( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xf1); +} + +/* st1 = st1 * log2(st0 + 1.0); + * pop_stack; + * + * A fast operation, with restrictions: -.29 < st0 < .29 + */ +void x87_fyl2xp1( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xf9); +} + + +void x87_fld( struct x86_function *p, struct x86_reg arg ) +{ + if (arg.file == file_x87) + emit_2ub(p, 0xd9, 0xc0 + arg.idx); + else { + emit_1ub(p, 0xd9); + emit_modrm_noreg(p, 0, arg); + } +} + +void x87_fst( struct x86_function *p, struct x86_reg dst ) +{ + if (dst.file == file_x87) + emit_2ub(p, 0xdd, 0xd0 + dst.idx); + else { + emit_1ub(p, 0xd9); + emit_modrm_noreg(p, 2, dst); + } +} + +void x87_fstp( struct x86_function *p, struct x86_reg dst ) +{ + if (dst.file == file_x87) + emit_2ub(p, 0xdd, 0xd8 + dst.idx); + else { + emit_1ub(p, 0xd9); + emit_modrm_noreg(p, 3, dst); + } +} + +void x87_fcom( struct x86_function *p, struct x86_reg dst ) +{ + if (dst.file == file_x87) + emit_2ub(p, 0xd8, 0xd0 + dst.idx); + else { + emit_1ub(p, 0xd8); + emit_modrm_noreg(p, 2, dst); + } +} + +void x87_fcomp( struct x86_function *p, struct x86_reg dst ) +{ + if (dst.file == file_x87) + emit_2ub(p, 0xd8, 0xd8 + dst.idx); + else { + emit_1ub(p, 0xd8); + emit_modrm_noreg(p, 3, dst); + } +} + + +void x87_fnstsw( struct x86_function *p, struct x86_reg dst ) +{ + assert(dst.file == file_REG32); + + if (dst.idx == reg_AX && + dst.mod == mod_REG) + emit_2ub(p, 0xdf, 0xe0); + else { + emit_1ub(p, 0xdd); + emit_modrm_noreg(p, 7, dst); + } +} + + + + +/*********************************************************************** + * MMX instructions + */ + +void mmx_emms( struct x86_function *p ) +{ + assert(p->need_emms); + emit_2ub(p, 0x0f, 0x77); + p->need_emms = 0; +} + +void mmx_packssdw( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + assert(dst.file == file_MMX && + (src.file == file_MMX || src.mod != mod_REG)); + + p->need_emms = 1; + + emit_2ub(p, X86_TWOB, 0x6b); + emit_modrm( p, dst, src ); +} + +void mmx_packuswb( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + assert(dst.file == file_MMX && + (src.file == file_MMX || src.mod != mod_REG)); + + p->need_emms = 1; + + emit_2ub(p, X86_TWOB, 0x67); + emit_modrm( p, dst, src ); +} + +void mmx_movd( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + p->need_emms = 1; + emit_1ub(p, X86_TWOB); + emit_op_modrm( p, 0x6e, 0x7e, dst, src ); +} + +void mmx_movq( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + p->need_emms = 1; + emit_1ub(p, X86_TWOB); + emit_op_modrm( p, 0x6f, 0x7f, dst, src ); +} + + +/*********************************************************************** + * Helper functions + */ + + +/* Retreive a reference to one of the function arguments, taking into + * account any push/pop activity: + */ +struct x86_reg x86_fn_arg( struct x86_function *p, + unsigned arg ) +{ + return x86_make_disp(x86_make_reg(file_REG32, reg_SP), + p->stack_offset + arg * 4); /* ??? */ +} + + +void x86_init_func( struct x86_function *p ) +{ + p->size = 0; + p->store = NULL; + p->csr = p->store; +} + +void x86_init_func_size( struct x86_function *p, unsigned code_size ) +{ + p->size = code_size; + p->store = rtasm_exec_malloc(code_size); + p->csr = p->store; +} + +void x86_release_func( struct x86_function *p ) +{ + rtasm_exec_free(p->store); + p->store = NULL; + p->csr = NULL; + p->size = 0; +} + + +void (*x86_get_func( struct x86_function *p ))(void) +{ + if (DISASSEM && p->store) + debug_printf("disassemble %p %p\n", p->store, p->csr); + return (void (*)(void)) (unsigned long) p->store; +} + +#else + +void x86sse_dummy( void ) +{ +} + +#endif -- cgit v1.2.3 From d2f6c9ab10656f6ecda131a6785a60565026d249 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 19 Feb 2008 12:05:32 +0900 Subject: Add copyright headers to all rtasm source files. --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 23 +++++++++++++++++++++++ src/gallium/auxiliary/rtasm/rtasm_x86sse.h | 28 +++++++++++++++++++++++++--- 2 files changed, 48 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary/rtasm/rtasm_x86sse.c') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index 3c885a9fff..b332192a62 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -1,3 +1,26 @@ +/************************************************************************** + * + * Copyright (C) 1999-2005 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + #if defined(__i386__) || defined(__386__) #include "pipe/p_compiler.h" diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index c2aa416492..e4576001bf 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -1,6 +1,28 @@ - -#ifndef _X86SSE_H_ -#define _X86SSE_H_ +/************************************************************************** + * + * Copyright (C) 1999-2005 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef _RTASM_X86SSE_H_ +#define _RTASM_X86SSE_H_ #if defined(__i386__) || defined(__386__) -- cgit v1.2.3 From 5480a6bc13a555f99a89fc801cfe153182697dda Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 19 Feb 2008 18:57:25 +0900 Subject: Fix windows build. --- src/gallium/auxiliary/rtasm/rtasm_execmem.c | 3 ++- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 2 +- src/gallium/auxiliary/rtasm/rtasm_x86sse.h | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary/rtasm/rtasm_x86sse.c') diff --git a/src/gallium/auxiliary/rtasm/rtasm_execmem.c b/src/gallium/auxiliary/rtasm/rtasm_execmem.c index 9c78fa5626..300c1c2d9d 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_execmem.c +++ b/src/gallium/auxiliary/rtasm/rtasm_execmem.c @@ -33,6 +33,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_debug.h" #include "pipe/p_thread.h" +#include "pipe/p_util.h" #include "rtasm_execmem.h" @@ -118,7 +119,7 @@ rtasm_exec_free(void *addr) */ void * -rtasm_exec_malloc(GLuint size) +rtasm_exec_malloc(size_t size) { return MALLOC( size ); } diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index b332192a62..dcbf76f600 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -21,7 +21,7 @@ * **************************************************************************/ -#if defined(__i386__) || defined(__386__) +#if defined(__i386__) || defined(__386__) || defined(i386) #include "pipe/p_compiler.h" #include "pipe/p_debug.h" diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index e4576001bf..606b41eb35 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -24,7 +24,7 @@ #ifndef _RTASM_X86SSE_H_ #define _RTASM_X86SSE_H_ -#if defined(__i386__) || defined(__386__) +#if defined(__i386__) || defined(__386__) || defined(i386) /* It is up to the caller to ensure that instructions issued are * suitable for the host cpu. There are no checks made in this module -- cgit v1.2.3 From 57060bc1fa82e4e93d2affafecd98219be2f991f Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 20 Feb 2008 22:10:27 +0100 Subject: gallium: Silence compiler warnings on Windows. --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'src/gallium/auxiliary/rtasm/rtasm_x86sse.c') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index dcbf76f600..4d33950e99 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -25,6 +25,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_debug.h" +#include "pipe/p_pointer.h" #include "rtasm_execmem.h" #include "rtasm_x86sse.h" @@ -34,7 +35,7 @@ static unsigned char *cptr( void (*label)() ) { - return (unsigned char *)(unsigned long)label; + return (unsigned char *) label; } @@ -46,7 +47,7 @@ static void do_realloc( struct x86_function *p ) p->csr = p->store; } else { - unsigned used = p->csr - p->store; + uintptr_t used = pointer_to_uintptr( p->csr ) - pointer_to_uintptr( p->store ); unsigned char *tmp = p->store; p->size *= 2; p->store = rtasm_exec_malloc(p->size); @@ -60,7 +61,7 @@ static void do_realloc( struct x86_function *p ) */ static unsigned char *reserve( struct x86_function *p, int bytes ) { - if (p->csr + bytes - p->store > p->size) + if (p->csr + bytes - p->store > (int) p->size) do_realloc(p); { @@ -135,7 +136,7 @@ static void emit_modrm( struct x86_function *p, case mod_INDIRECT: break; case mod_DISP8: - emit_1b(p, regmem.disp); + emit_1b(p, (char) regmem.disp); break; case mod_DISP32: emit_1i(p, regmem.disp); @@ -251,14 +252,14 @@ void x86_jcc( struct x86_function *p, enum x86_cc cc, unsigned char *label ) { - int offset = label - (x86_get_label(p) + 2); + intptr_t offset = pointer_to_intptr( label ) - (pointer_to_intptr( x86_get_label(p) ) + 2); if (offset <= 127 && offset >= -128) { emit_1ub(p, 0x70 + cc); emit_1b(p, (char) offset); } else { - offset = label - (x86_get_label(p) + 6); + offset = pointer_to_intptr( label ) - (pointer_to_intptr( x86_get_label(p) ) + 6); emit_2ub(p, 0x0f, 0x80 + cc); emit_1i(p, offset); } @@ -293,13 +294,13 @@ unsigned char *x86_call_forward( struct x86_function *p) void x86_fixup_fwd_jump( struct x86_function *p, unsigned char *fixup ) { - *(int *)(fixup - 4) = x86_get_label(p) - fixup; + *(int *)(fixup - 4) = pointer_to_intptr( x86_get_label(p) ) - pointer_to_intptr( fixup ); } void x86_jmp( struct x86_function *p, unsigned char *label) { emit_1ub(p, 0xe9); - emit_1i(p, label - x86_get_label(p) - 4); + emit_1i(p, pointer_to_intptr( label ) - pointer_to_intptr( x86_get_label(p) ) - 4); } #if 0 @@ -1207,7 +1208,7 @@ void (*x86_get_func( struct x86_function *p ))(void) { if (DISASSEM && p->store) debug_printf("disassemble %p %p\n", p->store, p->csr); - return (void (*)(void)) (unsigned long) p->store; + return (void (*)(void)) p->store; } #else -- cgit v1.2.3 From 4d184cc33131b440f9aafbcdd2d657050411db49 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 11 Apr 2008 13:20:52 -0600 Subject: gallium: fix broken x86_call() --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary/rtasm/rtasm_x86sse.c') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index 4d33950e99..aea8b28e58 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -317,7 +317,7 @@ void x86_call( struct x86_function *p, void (*label)()) void x86_call( struct x86_function *p, struct x86_reg reg) { emit_1ub(p, 0xff); - emit_modrm(p, reg, reg); + emit_modrm_noreg(p, 2, reg); } #endif -- cgit v1.2.3 From 5b97c762ed9882dd922f48c2fbf13b14ad86a96e Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 18 Apr 2008 17:32:39 +0100 Subject: rtasm: add a couple more insns, clean up x86_mul --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 34 ++++++++++++++++++++++++------ src/gallium/auxiliary/rtasm/rtasm_x86sse.h | 2 ++ 2 files changed, 29 insertions(+), 7 deletions(-) (limited to 'src/gallium/auxiliary/rtasm/rtasm_x86sse.c') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index aea8b28e58..5c25fa155d 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -33,11 +33,6 @@ #define DISASSEM 0 #define X86_TWOB 0x0f -static unsigned char *cptr( void (*label)() ) -{ - return (unsigned char *) label; -} - static void do_realloc( struct x86_function *p ) { @@ -304,6 +299,11 @@ void x86_jmp( struct x86_function *p, unsigned char *label) } #if 0 +static unsigned char *cptr( void (*label)() ) +{ + return (unsigned char *) label; +} + /* This doesn't work once we start reallocating & copying the * generated code on buffer fills, because the call is relative to the * current pc. @@ -417,11 +417,14 @@ void x86_add( struct x86_function *p, emit_op_modrm(p, 0x03, 0x01, dst, src ); } +/* Calculate EAX * src, results in EDX:EAX. + */ void x86_mul( struct x86_function *p, struct x86_reg src ) { - assert (src.file == file_REG32 && src.mod == mod_REG); - emit_op_modrm(p, 0xf7, 0, x86_make_reg (file_REG32, reg_SP), src ); +// assert (src.file == file_REG32 && src.mod == mod_REG); + emit_1ub(p, 0xf7); + emit_modrm_noreg(p, 4, src ); } void x86_sub( struct x86_function *p, @@ -646,6 +649,14 @@ void sse_cvtps2pi( struct x86_function *p, emit_modrm( p, dst, src ); } +void sse2_cvtdq2ps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x5b); + emit_modrm( p, dst, src ); +} + /* Shufps can also be used to implement a reduced swizzle when dest == * arg0. @@ -735,6 +746,15 @@ void sse2_packuswb( struct x86_function *p, emit_modrm( p, dst, src ); } +void sse2_punpcklbw( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0x66, X86_TWOB, 0x60); + emit_modrm( p, dst, src ); +} + + void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index 606b41eb35..dfde661f46 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -165,6 +165,7 @@ void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg sr void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_cvtdq2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); @@ -202,6 +203,7 @@ void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, unsigned char shuf ); void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src ); +void sse2_punpcklbw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_and( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -- cgit v1.2.3 From 363f7abf2000c1cf5993ae8f83ba81b2054bf6e0 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 18 Apr 2008 18:30:41 +0100 Subject: rtasm: add x86_imul --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 11 ++++++++++- src/gallium/auxiliary/rtasm/rtasm_x86sse.h | 1 + 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary/rtasm/rtasm_x86sse.c') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index 5c25fa155d..7f8cc23d15 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -422,11 +422,20 @@ void x86_add( struct x86_function *p, void x86_mul( struct x86_function *p, struct x86_reg src ) { -// assert (src.file == file_REG32 && src.mod == mod_REG); emit_1ub(p, 0xf7); emit_modrm_noreg(p, 4, src ); } + +void x86_imul( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0xAF); + emit_modrm(p, dst, src); +} + + void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index dfde661f46..5e99ceea70 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -213,6 +213,7 @@ void x86_inc( struct x86_function *p, struct x86_reg reg ); void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_mul( struct x86_function *p, struct x86_reg src ); +void x86_imul( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_pop( struct x86_function *p, struct x86_reg reg ); void x86_push( struct x86_function *p, struct x86_reg reg ); -- cgit v1.2.3 From b6c9d2ef2cfadbbe3e7aa94f21fd0da36d089952 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 21 Apr 2008 12:37:41 +0100 Subject: rtasm: add dump facility for x86 (from tgsi_sse2.c) --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 267 ++++++++++++++++++++++++++--- 1 file changed, 243 insertions(+), 24 deletions(-) (limited to 'src/gallium/auxiliary/rtasm/rtasm_x86sse.c') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index 7f8cc23d15..f2c08c96a6 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -34,6 +34,116 @@ #define X86_TWOB 0x0f +#define DUMP_SSE 0 + +#if DUMP_SSE + +static void +_print_reg( + struct x86_reg reg ) +{ + if (reg.mod != mod_REG) + debug_printf( "[" ); + + switch( reg.file ) { + case file_REG32: + switch( reg.idx ) { + case reg_AX: debug_printf( "EAX" ); break; + case reg_CX: debug_printf( "ECX" ); break; + case reg_DX: debug_printf( "EDX" ); break; + case reg_BX: debug_printf( "EBX" ); break; + case reg_SP: debug_printf( "ESP" ); break; + case reg_BP: debug_printf( "EBP" ); break; + case reg_SI: debug_printf( "ESI" ); break; + case reg_DI: debug_printf( "EDI" ); break; + } + break; + case file_MMX: + debug_printf( "MMX%u", reg.idx ); + break; + case file_XMM: + debug_printf( "XMM%u", reg.idx ); + break; + case file_x87: + debug_printf( "fp%u", reg.idx ); + break; + } + + if (reg.mod == mod_DISP8 || + reg.mod == mod_DISP32) + debug_printf("+%d", reg.disp); + + if (reg.mod != mod_REG) + debug_printf( "]" ); +} + +static void +_fill( + const char *op ) +{ + unsigned count = 10 - strlen( op ); + + while( count-- ) { + debug_printf( " " ); + } +} + +#define DUMP_START() debug_printf( "\nsse-dump start ----------------" ) +#define DUMP_END() debug_printf( "\nsse-dump end ----------------\n" ) +#define DUMP( OP ) debug_printf( "\n%s", OP ) + +#define DUMP_I( OP, I ) do { \ + debug_printf( "\n%s", OP ); \ + _fill( OP ); \ + debug_printf( "%u", I ); \ +} while( 0 ) + +#define DUMP_R( OP, R0 ) do { \ + debug_printf( "\n%s", OP ); \ + _fill( OP ); \ + _print_reg( R0 ); \ +} while( 0 ) + +#define DUMP_RR( OP, R0, R1 ) do { \ + debug_printf( "\n%s", OP ); \ + _fill( OP ); \ + _print_reg( R0 ); \ + debug_printf( ", " ); \ + _print_reg( R1 ); \ +} while( 0 ) + +#define DUMP_RI( OP, R0, I ) do { \ + debug_printf( "\n%s", OP ); \ + _fill( OP ); \ + _print_reg( R0 ); \ + debug_printf( ", " ); \ + debug_printf( "%u", I ); \ +} while( 0 ) + +#define DUMP_RRI( OP, R0, R1, I ) do { \ + debug_printf( "\n%s", OP ); \ + _fill( OP ); \ + _print_reg( R0 ); \ + debug_printf( ", " ); \ + _print_reg( R1 ); \ + debug_printf( ", " ); \ + debug_printf( "%u", I ); \ +} while( 0 ) + +#else + +#define DUMP_START() +#define DUMP_END() +#define DUMP( OP ) +#define DUMP_I( OP, I ) +#define DUMP_R( OP, R0 ) +#define DUMP_RR( OP, R0, R1 ) +#define DUMP_RI( OP, R0, I ) +#define DUMP_RRI( OP, R0, R1, I ) + +#endif + + static void do_realloc( struct x86_function *p ) { if (p->size == 0) { @@ -272,6 +382,7 @@ unsigned char *x86_jcc_forward( struct x86_function *p, unsigned char *x86_jmp_forward( struct x86_function *p) { + DUMP( __FUNCTION__ ); emit_1ub(p, 0xe9); emit_1i(p, 0); return x86_get_label(p); @@ -279,6 +390,8 @@ unsigned char *x86_jmp_forward( struct x86_function *p) unsigned char *x86_call_forward( struct x86_function *p) { + DUMP( __FUNCTION__ ); + emit_1ub(p, 0xe8); emit_1i(p, 0); return x86_get_label(p); @@ -294,6 +407,7 @@ void x86_fixup_fwd_jump( struct x86_function *p, void x86_jmp( struct x86_function *p, unsigned char *label) { + DUMP_I( __FUNCTION__, label ); emit_1ub(p, 0xe9); emit_1i(p, pointer_to_intptr( label ) - pointer_to_intptr( x86_get_label(p) ) - 4); } @@ -310,12 +424,14 @@ static unsigned char *cptr( void (*label)() ) */ void x86_call( struct x86_function *p, void (*label)()) { + DUMP_I( __FUNCTION__, label ); emit_1ub(p, 0xe8); emit_1i(p, cptr(label) - x86_get_label(p) - 4); } #else void x86_call( struct x86_function *p, struct x86_reg reg) { + DUMP_R( __FUNCTION__, reg ); emit_1ub(p, 0xff); emit_modrm_noreg(p, 2, reg); } @@ -328,6 +444,7 @@ void x86_call( struct x86_function *p, struct x86_reg reg) */ void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ) { + DUMP_RI( __FUNCTION__, dst, imm ); assert(dst.mod == mod_REG); emit_1ub(p, 0xb8 + dst.idx); emit_1i(p, imm); @@ -336,6 +453,7 @@ void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ) void x86_push( struct x86_function *p, struct x86_reg reg ) { + DUMP_R( __FUNCTION__, reg ); assert(reg.mod == mod_REG); emit_1ub(p, 0x50 + reg.idx); p->stack_offset += 4; @@ -344,6 +462,7 @@ void x86_push( struct x86_function *p, void x86_pop( struct x86_function *p, struct x86_reg reg ) { + DUMP_R( __FUNCTION__, reg ); assert(reg.mod == mod_REG); emit_1ub(p, 0x58 + reg.idx); p->stack_offset -= 4; @@ -352,6 +471,7 @@ void x86_pop( struct x86_function *p, void x86_inc( struct x86_function *p, struct x86_reg reg ) { + DUMP_R( __FUNCTION__, reg ); assert(reg.mod == mod_REG); emit_1ub(p, 0x40 + reg.idx); } @@ -359,17 +479,20 @@ void x86_inc( struct x86_function *p, void x86_dec( struct x86_function *p, struct x86_reg reg ) { + DUMP_R( __FUNCTION__, reg ); assert(reg.mod == mod_REG); emit_1ub(p, 0x48 + reg.idx); } void x86_ret( struct x86_function *p ) { + DUMP( __FUNCTION__ ); emit_1ub(p, 0xc3); } void x86_sahf( struct x86_function *p ) { + DUMP( __FUNCTION__ ); emit_1ub(p, 0x9e); } @@ -377,6 +500,7 @@ void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_op_modrm( p, 0x8b, 0x89, dst, src ); } @@ -384,6 +508,7 @@ void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_op_modrm( p, 0x33, 0x31, dst, src ); } @@ -391,6 +516,7 @@ void x86_cmp( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_op_modrm( p, 0x3b, 0x39, dst, src ); } @@ -398,6 +524,7 @@ void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_1ub(p, 0x8d); emit_modrm( p, dst, src ); } @@ -406,6 +533,7 @@ void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_1ub(p, 0x85); emit_modrm( p, dst, src ); } @@ -414,6 +542,7 @@ void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_op_modrm(p, 0x03, 0x01, dst, src ); } @@ -422,6 +551,7 @@ void x86_add( struct x86_function *p, void x86_mul( struct x86_function *p, struct x86_reg src ) { + DUMP_R( __FUNCTION__, src ); emit_1ub(p, 0xf7); emit_modrm_noreg(p, 4, src ); } @@ -431,6 +561,7 @@ void x86_imul( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_2ub(p, X86_TWOB, 0xAF); emit_modrm(p, dst, src); } @@ -440,6 +571,7 @@ void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_op_modrm(p, 0x2b, 0x29, dst, src ); } @@ -447,6 +579,7 @@ void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_op_modrm( p, 0x0b, 0x09, dst, src ); } @@ -454,6 +587,7 @@ void x86_and( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_op_modrm( p, 0x23, 0x21, dst, src ); } @@ -468,6 +602,7 @@ void sse_movss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_2ub(p, 0xF3, X86_TWOB); emit_op_modrm( p, 0x10, 0x11, dst, src ); } @@ -476,6 +611,7 @@ void sse_movaps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_1ub(p, X86_TWOB); emit_op_modrm( p, 0x28, 0x29, dst, src ); } @@ -484,6 +620,7 @@ void sse_movups( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_1ub(p, X86_TWOB); emit_op_modrm( p, 0x10, 0x11, dst, src ); } @@ -492,6 +629,7 @@ void sse_movhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); assert(dst.mod != mod_REG || src.mod != mod_REG); emit_1ub(p, X86_TWOB); emit_op_modrm( p, 0x16, 0x17, dst, src ); /* cf movlhps */ @@ -501,6 +639,7 @@ void sse_movlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); assert(dst.mod != mod_REG || src.mod != mod_REG); emit_1ub(p, X86_TWOB); emit_op_modrm( p, 0x12, 0x13, dst, src ); /* cf movhlps */ @@ -510,6 +649,7 @@ void sse_maxps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_2ub(p, X86_TWOB, 0x5F); emit_modrm( p, dst, src ); } @@ -518,6 +658,7 @@ void sse_maxss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_3ub(p, 0xF3, X86_TWOB, 0x5F); emit_modrm( p, dst, src ); } @@ -526,6 +667,7 @@ void sse_divss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_3ub(p, 0xF3, X86_TWOB, 0x5E); emit_modrm( p, dst, src ); } @@ -534,6 +676,7 @@ void sse_minps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_2ub(p, X86_TWOB, 0x5D); emit_modrm( p, dst, src ); } @@ -542,6 +685,7 @@ void sse_subps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_2ub(p, X86_TWOB, 0x5C); emit_modrm( p, dst, src ); } @@ -550,6 +694,7 @@ void sse_mulps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_2ub(p, X86_TWOB, 0x59); emit_modrm( p, dst, src ); } @@ -558,6 +703,7 @@ void sse_mulss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_3ub(p, 0xF3, X86_TWOB, 0x59); emit_modrm( p, dst, src ); } @@ -566,6 +712,7 @@ void sse_addps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_2ub(p, X86_TWOB, 0x58); emit_modrm( p, dst, src ); } @@ -574,6 +721,7 @@ void sse_addss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_3ub(p, 0xF3, X86_TWOB, 0x58); emit_modrm( p, dst, src ); } @@ -582,6 +730,7 @@ void sse_andnps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_2ub(p, X86_TWOB, 0x55); emit_modrm( p, dst, src ); } @@ -590,6 +739,7 @@ void sse_andps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_2ub(p, X86_TWOB, 0x54); emit_modrm( p, dst, src ); } @@ -598,6 +748,7 @@ void sse_rsqrtps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_2ub(p, X86_TWOB, 0x52); emit_modrm( p, dst, src ); } @@ -606,6 +757,7 @@ void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_3ub(p, 0xF3, X86_TWOB, 0x52); emit_modrm( p, dst, src ); @@ -615,6 +767,7 @@ void sse_movhlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); assert(dst.mod == mod_REG && src.mod == mod_REG); emit_2ub(p, X86_TWOB, 0x12); emit_modrm( p, dst, src ); @@ -624,6 +777,7 @@ void sse_movlhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); assert(dst.mod == mod_REG && src.mod == mod_REG); emit_2ub(p, X86_TWOB, 0x16); emit_modrm( p, dst, src ); @@ -633,6 +787,7 @@ void sse_orps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_2ub(p, X86_TWOB, 0x56); emit_modrm( p, dst, src ); } @@ -641,6 +796,7 @@ void sse_xorps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_2ub(p, X86_TWOB, 0x57); emit_modrm( p, dst, src ); } @@ -649,6 +805,7 @@ void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); assert(dst.file == file_MMX && (src.file == file_XMM || src.mod != mod_REG)); @@ -662,6 +819,7 @@ void sse2_cvtdq2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_2ub(p, X86_TWOB, 0x5b); emit_modrm( p, dst, src ); } @@ -671,31 +829,34 @@ void sse2_cvtdq2ps( struct x86_function *p, * arg0. */ void sse_shufps( struct x86_function *p, - struct x86_reg dest, - struct x86_reg arg0, + struct x86_reg dst, + struct x86_reg src, unsigned char shuf) { + DUMP_RRI( __FUNCTION__, dst, src, shuf ); emit_2ub(p, X86_TWOB, 0xC6); - emit_modrm(p, dest, arg0); + emit_modrm(p, dst, src); emit_1ub(p, shuf); } void sse_cmpps( struct x86_function *p, - struct x86_reg dest, - struct x86_reg arg0, + struct x86_reg dst, + struct x86_reg src, unsigned char cc) { + DUMP_RRI( "CMPPS", dst, src, cc ); emit_2ub(p, X86_TWOB, 0xC2); - emit_modrm(p, dest, arg0); + emit_modrm(p, dst, src); emit_1ub(p, cc); } void sse_pmovmskb( struct x86_function *p, - struct x86_reg dest, + struct x86_reg dst, struct x86_reg src) { - emit_3ub(p, 0x66, X86_TWOB, 0xD7); - emit_modrm(p, dest, src); + DUMP_RR( __FUNCTION__, dst, src ); + emit_3ub(p, 0x66, X86_TWOB, 0xD7); + emit_modrm(p, dst, src); } /*********************************************************************** @@ -706,12 +867,13 @@ void sse_pmovmskb( struct x86_function *p, * Perform a reduced swizzle: */ void sse2_pshufd( struct x86_function *p, - struct x86_reg dest, - struct x86_reg arg0, + struct x86_reg dst, + struct x86_reg src, unsigned char shuf) { + DUMP_RRI( __FUNCTION__, dst, src, shuf ); emit_3ub(p, 0x66, X86_TWOB, 0x70); - emit_modrm(p, dest, arg0); + emit_modrm(p, dst, src); emit_1ub(p, shuf); } @@ -719,6 +881,7 @@ void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_3ub( p, 0xF3, X86_TWOB, 0x5B ); emit_modrm( p, dst, src ); } @@ -727,6 +890,7 @@ void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_3ub(p, 0x66, X86_TWOB, 0x5B); emit_modrm( p, dst, src ); } @@ -735,6 +899,7 @@ void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_3ub(p, 0x66, X86_TWOB, 0x6B); emit_modrm( p, dst, src ); } @@ -743,6 +908,7 @@ void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_3ub(p, 0x66, X86_TWOB, 0x63); emit_modrm( p, dst, src ); } @@ -751,6 +917,7 @@ void sse2_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_3ub(p, 0x66, X86_TWOB, 0x67); emit_modrm( p, dst, src ); } @@ -759,6 +926,7 @@ void sse2_punpcklbw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_3ub(p, 0x66, X86_TWOB, 0x60); emit_modrm( p, dst, src ); } @@ -768,6 +936,7 @@ void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_2ub(p, X86_TWOB, 0x53); emit_modrm( p, dst, src ); } @@ -776,6 +945,7 @@ void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_3ub(p, 0xF3, X86_TWOB, 0x53); emit_modrm( p, dst, src ); } @@ -784,6 +954,7 @@ void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_2ub(p, 0x66, X86_TWOB); emit_op_modrm( p, 0x6e, 0x7e, dst, src ); } @@ -796,30 +967,35 @@ void sse2_movd( struct x86_function *p, */ void x87_fist( struct x86_function *p, struct x86_reg dst ) { + DUMP_R( __FUNCTION__, dst ); emit_1ub(p, 0xdb); emit_modrm_noreg(p, 2, dst); } void x87_fistp( struct x86_function *p, struct x86_reg dst ) { + DUMP_R( __FUNCTION__, dst ); emit_1ub(p, 0xdb); emit_modrm_noreg(p, 3, dst); } void x87_fild( struct x86_function *p, struct x86_reg arg ) { + DUMP_R( __FUNCTION__, arg ); emit_1ub(p, 0xdf); emit_modrm_noreg(p, 0, arg); } void x87_fldz( struct x86_function *p ) { + DUMP( __FUNCTION__ ); emit_2ub(p, 0xd9, 0xee); } void x87_fldcw( struct x86_function *p, struct x86_reg arg ) { + DUMP_R( __FUNCTION__, arg ); assert(arg.file == file_REG32); assert(arg.mod != mod_REG); emit_1ub(p, 0xd9); @@ -828,26 +1004,31 @@ void x87_fldcw( struct x86_function *p, struct x86_reg arg ) void x87_fld1( struct x86_function *p ) { + DUMP( __FUNCTION__ ); emit_2ub(p, 0xd9, 0xe8); } void x87_fldl2e( struct x86_function *p ) { + DUMP( __FUNCTION__ ); emit_2ub(p, 0xd9, 0xea); } void x87_fldln2( struct x86_function *p ) { + DUMP( __FUNCTION__ ); emit_2ub(p, 0xd9, 0xed); } void x87_fwait( struct x86_function *p ) { + DUMP( __FUNCTION__ ); emit_1ub(p, 0x9b); } void x87_fnclex( struct x86_function *p ) { + DUMP( __FUNCTION__ ); emit_2ub(p, 0xdb, 0xe2); } @@ -884,49 +1065,55 @@ static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86 assert(0); } -void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - x87_arith_op(p, dst, arg, + DUMP_RR( __FUNCTION__, dst, src ); + x87_arith_op(p, dst, src, 0xd8, 0xc8, 0xdc, 0xc8, 4); } -void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - x87_arith_op(p, dst, arg, + DUMP_RR( __FUNCTION__, dst, src ); + x87_arith_op(p, dst, src, 0xd8, 0xe0, 0xdc, 0xe8, 4); } -void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - x87_arith_op(p, dst, arg, + DUMP_RR( __FUNCTION__, dst, src ); + x87_arith_op(p, dst, src, 0xd8, 0xe8, 0xdc, 0xe0, 5); } -void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - x87_arith_op(p, dst, arg, + DUMP_RR( __FUNCTION__, dst, src ); + x87_arith_op(p, dst, src, 0xd8, 0xc0, 0xdc, 0xc0, 0); } -void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - x87_arith_op(p, dst, arg, + DUMP_RR( __FUNCTION__, dst, src ); + x87_arith_op(p, dst, src, 0xd8, 0xf0, 0xdc, 0xf8, 6); } -void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - x87_arith_op(p, dst, arg, + DUMP_RR( __FUNCTION__, dst, src ); + x87_arith_op(p, dst, src, 0xd8, 0xf8, 0xdc, 0xf0, 7); @@ -934,6 +1121,7 @@ void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) void x87_fmulp( struct x86_function *p, struct x86_reg dst ) { + DUMP_R( __FUNCTION__, dst ); assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xc8+dst.idx); @@ -941,6 +1129,7 @@ void x87_fmulp( struct x86_function *p, struct x86_reg dst ) void x87_fsubp( struct x86_function *p, struct x86_reg dst ) { + DUMP_R( __FUNCTION__, dst ); assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xe8+dst.idx); @@ -948,6 +1137,7 @@ void x87_fsubp( struct x86_function *p, struct x86_reg dst ) void x87_fsubrp( struct x86_function *p, struct x86_reg dst ) { + DUMP_R( __FUNCTION__, dst ); assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xe0+dst.idx); @@ -955,6 +1145,7 @@ void x87_fsubrp( struct x86_function *p, struct x86_reg dst ) void x87_faddp( struct x86_function *p, struct x86_reg dst ) { + DUMP_R( __FUNCTION__, dst ); assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xc0+dst.idx); @@ -962,6 +1153,7 @@ void x87_faddp( struct x86_function *p, struct x86_reg dst ) void x87_fdivp( struct x86_function *p, struct x86_reg dst ) { + DUMP_R( __FUNCTION__, dst ); assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xf8+dst.idx); @@ -969,6 +1161,7 @@ void x87_fdivp( struct x86_function *p, struct x86_reg dst ) void x87_fdivrp( struct x86_function *p, struct x86_reg dst ) { + DUMP_R( __FUNCTION__, dst ); assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xf0+dst.idx); @@ -976,70 +1169,83 @@ void x87_fdivrp( struct x86_function *p, struct x86_reg dst ) void x87_fucom( struct x86_function *p, struct x86_reg arg ) { + DUMP_R( __FUNCTION__, arg ); assert(arg.file == file_x87); emit_2ub(p, 0xdd, 0xe0+arg.idx); } void x87_fucomp( struct x86_function *p, struct x86_reg arg ) { + DUMP_R( __FUNCTION__, arg ); assert(arg.file == file_x87); emit_2ub(p, 0xdd, 0xe8+arg.idx); } void x87_fucompp( struct x86_function *p ) { + DUMP( __FUNCTION__ ); emit_2ub(p, 0xda, 0xe9); } void x87_fxch( struct x86_function *p, struct x86_reg arg ) { + DUMP_R( __FUNCTION__, arg ); assert(arg.file == file_x87); emit_2ub(p, 0xd9, 0xc8+arg.idx); } void x87_fabs( struct x86_function *p ) { + DUMP( __FUNCTION__ ); emit_2ub(p, 0xd9, 0xe1); } void x87_fchs( struct x86_function *p ) { + DUMP( __FUNCTION__ ); emit_2ub(p, 0xd9, 0xe0); } void x87_fcos( struct x86_function *p ) { + DUMP( __FUNCTION__ ); emit_2ub(p, 0xd9, 0xff); } void x87_fprndint( struct x86_function *p ) { + DUMP( __FUNCTION__ ); emit_2ub(p, 0xd9, 0xfc); } void x87_fscale( struct x86_function *p ) { + DUMP( __FUNCTION__ ); emit_2ub(p, 0xd9, 0xfd); } void x87_fsin( struct x86_function *p ) { + DUMP( __FUNCTION__ ); emit_2ub(p, 0xd9, 0xfe); } void x87_fsincos( struct x86_function *p ) { + DUMP( __FUNCTION__ ); emit_2ub(p, 0xd9, 0xfb); } void x87_fsqrt( struct x86_function *p ) { + DUMP( __FUNCTION__ ); emit_2ub(p, 0xd9, 0xfa); } void x87_fxtract( struct x86_function *p ) { + DUMP( __FUNCTION__ ); emit_2ub(p, 0xd9, 0xf4); } @@ -1049,6 +1255,7 @@ void x87_fxtract( struct x86_function *p ) */ void x87_f2xm1( struct x86_function *p ) { + DUMP( __FUNCTION__ ); emit_2ub(p, 0xd9, 0xf0); } @@ -1057,6 +1264,7 @@ void x87_f2xm1( struct x86_function *p ) */ void x87_fyl2x( struct x86_function *p ) { + DUMP( __FUNCTION__ ); emit_2ub(p, 0xd9, 0xf1); } @@ -1067,12 +1275,14 @@ void x87_fyl2x( struct x86_function *p ) */ void x87_fyl2xp1( struct x86_function *p ) { + DUMP( __FUNCTION__ ); emit_2ub(p, 0xd9, 0xf9); } void x87_fld( struct x86_function *p, struct x86_reg arg ) { + DUMP_R( __FUNCTION__, arg ); if (arg.file == file_x87) emit_2ub(p, 0xd9, 0xc0 + arg.idx); else { @@ -1083,6 +1293,7 @@ void x87_fld( struct x86_function *p, struct x86_reg arg ) void x87_fst( struct x86_function *p, struct x86_reg dst ) { + DUMP_R( __FUNCTION__, dst ); if (dst.file == file_x87) emit_2ub(p, 0xdd, 0xd0 + dst.idx); else { @@ -1093,6 +1304,7 @@ void x87_fst( struct x86_function *p, struct x86_reg dst ) void x87_fstp( struct x86_function *p, struct x86_reg dst ) { + DUMP_R( __FUNCTION__, dst ); if (dst.file == file_x87) emit_2ub(p, 0xdd, 0xd8 + dst.idx); else { @@ -1103,6 +1315,7 @@ void x87_fstp( struct x86_function *p, struct x86_reg dst ) void x87_fcom( struct x86_function *p, struct x86_reg dst ) { + DUMP_R( __FUNCTION__, dst ); if (dst.file == file_x87) emit_2ub(p, 0xd8, 0xd0 + dst.idx); else { @@ -1113,6 +1326,7 @@ void x87_fcom( struct x86_function *p, struct x86_reg dst ) void x87_fcomp( struct x86_function *p, struct x86_reg dst ) { + DUMP_R( __FUNCTION__, dst ); if (dst.file == file_x87) emit_2ub(p, 0xd8, 0xd8 + dst.idx); else { @@ -1124,6 +1338,7 @@ void x87_fcomp( struct x86_function *p, struct x86_reg dst ) void x87_fnstsw( struct x86_function *p, struct x86_reg dst ) { + DUMP_R( __FUNCTION__, dst ); assert(dst.file == file_REG32); if (dst.idx == reg_AX && @@ -1153,6 +1368,7 @@ void mmx_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); assert(dst.file == file_MMX && (src.file == file_MMX || src.mod != mod_REG)); @@ -1166,6 +1382,7 @@ void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); assert(dst.file == file_MMX && (src.file == file_MMX || src.mod != mod_REG)); @@ -1179,6 +1396,7 @@ void mmx_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); p->need_emms = 1; emit_1ub(p, X86_TWOB); emit_op_modrm( p, 0x6e, 0x7e, dst, src ); @@ -1188,6 +1406,7 @@ void mmx_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); p->need_emms = 1; emit_1ub(p, X86_TWOB); emit_op_modrm( p, 0x6f, 0x7f, dst, src ); -- cgit v1.2.3 From b17e123a8f20239e8e1fc6816ccf115d9ec57471 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 21 Apr 2008 19:09:38 +0100 Subject: rtasm: propogate errors in x86 emit --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 34 +++++++++++++++++++++++++----- src/gallium/auxiliary/rtasm/rtasm_x86sse.h | 1 + 2 files changed, 30 insertions(+), 5 deletions(-) (limited to 'src/gallium/auxiliary/rtasm/rtasm_x86sse.c') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index f2c08c96a6..c2fe0e40f5 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -146,7 +146,10 @@ _fill( static void do_realloc( struct x86_function *p ) { - if (p->size == 0) { + if (p->store == p->error_overflow) { + p->csr = p->store; + } + else if (p->size == 0) { p->size = 1024; p->store = rtasm_exec_malloc(p->size); p->csr = p->store; @@ -156,10 +159,22 @@ static void do_realloc( struct x86_function *p ) unsigned char *tmp = p->store; p->size *= 2; p->store = rtasm_exec_malloc(p->size); - memcpy(p->store, tmp, used); - p->csr = p->store + used; + + if (p->store) { + memcpy(p->store, tmp, used); + p->csr = p->store + used; + } + else { + p->csr = p->store; + } + rtasm_exec_free(tmp); } + + if (p->store == NULL) { + p->store = p->csr = p->error_overflow; + p->size = 4; + } } /* Emit bytes to the instruction stream: @@ -1440,12 +1455,17 @@ void x86_init_func_size( struct x86_function *p, unsigned code_size ) { p->size = code_size; p->store = rtasm_exec_malloc(code_size); + if (p->store == NULL) { + p->store = p->error_overflow; + } p->csr = p->store; } void x86_release_func( struct x86_function *p ) { - rtasm_exec_free(p->store); + if (p->store && p->store != p->error_overflow) + rtasm_exec_free(p->store); + p->store = NULL; p->csr = NULL; p->size = 0; @@ -1456,7 +1476,11 @@ void (*x86_get_func( struct x86_function *p ))(void) { if (DISASSEM && p->store) debug_printf("disassemble %p %p\n", p->store, p->csr); - return (void (*)(void)) p->store; + + if (p->store == p->error_overflow) + return (void (*)(void)) NULL; + else + return (void (*)(void)) p->store; } #else diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index 5e99ceea70..695a1cef4e 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -43,6 +43,7 @@ struct x86_function { unsigned char *csr; unsigned stack_offset; int need_emms; + unsigned char error_overflow[4]; const char *fn; }; -- cgit v1.2.3 From 73c2711bb186692b866720058a09f5eb05950213 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 21 Apr 2008 19:43:53 +0100 Subject: rtasm: clean up debug dumping a little --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 286 ++++++++++++++--------------- 1 file changed, 140 insertions(+), 146 deletions(-) (limited to 'src/gallium/auxiliary/rtasm/rtasm_x86sse.c') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index c2fe0e40f5..10796c540d 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -77,69 +77,60 @@ _print_reg( debug_printf( "]" ); } -static void -_fill( - const char *op ) -{ - unsigned count = 10 - strlen( op ); - while( count-- ) { - debug_printf( " " ); - } -} +#define DUMP_START() debug_printf( "\n" ) +#define DUMP_END() debug_printf( "\n" ) -#define DUMP_START() debug_printf( "\nsse-dump start ----------------" ) -#define DUMP_END() debug_printf( "\nsse-dump end ----------------\n" ) -#define DUMP( OP ) debug_printf( "\n%s", OP ) +#define DUMP() do { \ + const char *foo = __FUNCTION__; \ + while (*foo && *foo != '_') \ + foo++; \ + if (*foo) \ + foo++; \ + debug_printf( "\n% 15s ", foo ); \ +} while (0) -#define DUMP_I( OP, I ) do { \ - debug_printf( "\n%s", OP ); \ - _fill( OP ); \ +#define DUMP_I( I ) do { \ + DUMP(); \ debug_printf( "%u", I ); \ } while( 0 ) -#define DUMP_R( OP, R0 ) do { \ - debug_printf( "\n%s", OP ); \ - _fill( OP ); \ +#define DUMP_R( R0 ) do { \ + DUMP(); \ _print_reg( R0 ); \ } while( 0 ) -#define DUMP_RR( OP, R0, R1 ) do { \ - debug_printf( "\n%s", OP ); \ - _fill( OP ); \ +#define DUMP_RR( R0, R1 ) do { \ + DUMP(); \ _print_reg( R0 ); \ debug_printf( ", " ); \ _print_reg( R1 ); \ } while( 0 ) -#define DUMP_RI( OP, R0, I ) do { \ - debug_printf( "\n%s", OP ); \ - _fill( OP ); \ +#define DUMP_RI( R0, I ) do { \ + DUMP(); \ _print_reg( R0 ); \ - debug_printf( ", " ); \ - debug_printf( "%u", I ); \ + debug_printf( ", %u", I ); \ } while( 0 ) -#define DUMP_RRI( OP, R0, R1, I ) do { \ - debug_printf( "\n%s", OP ); \ - _fill( OP ); \ +#define DUMP_RRI( R0, R1, I ) do { \ + DUMP(); \ _print_reg( R0 ); \ debug_printf( ", " ); \ _print_reg( R1 ); \ - debug_printf( ", " ); \ - debug_printf( "%u", I ); \ + debug_printf( ", %u", I ); \ } while( 0 ) #else #define DUMP_START() #define DUMP_END() -#define DUMP( OP ) -#define DUMP_I( OP, I ) -#define DUMP_R( OP, R0 ) -#define DUMP_RR( OP, R0, R1 ) -#define DUMP_RI( OP, R0, I ) -#define DUMP_RRI( OP, R0, R1, I ) +#define DUMP( ) +#define DUMP_I( I ) +#define DUMP_R( R0 ) +#define DUMP_RR( R0, R1 ) +#define DUMP_RI( R0, I ) +#define DUMP_RRI( R0, R1, I ) #endif @@ -173,7 +164,7 @@ static void do_realloc( struct x86_function *p ) if (p->store == NULL) { p->store = p->csr = p->error_overflow; - p->size = 4; + p->size = sizeof(p->error_overflow); } } @@ -397,7 +388,7 @@ unsigned char *x86_jcc_forward( struct x86_function *p, unsigned char *x86_jmp_forward( struct x86_function *p) { - DUMP( __FUNCTION__ ); + DUMP(); emit_1ub(p, 0xe9); emit_1i(p, 0); return x86_get_label(p); @@ -405,7 +396,7 @@ unsigned char *x86_jmp_forward( struct x86_function *p) unsigned char *x86_call_forward( struct x86_function *p) { - DUMP( __FUNCTION__ ); + DUMP(); emit_1ub(p, 0xe8); emit_1i(p, 0); @@ -422,7 +413,7 @@ void x86_fixup_fwd_jump( struct x86_function *p, void x86_jmp( struct x86_function *p, unsigned char *label) { - DUMP_I( __FUNCTION__, label ); + DUMP_I( label ); emit_1ub(p, 0xe9); emit_1i(p, pointer_to_intptr( label ) - pointer_to_intptr( x86_get_label(p) ) - 4); } @@ -439,14 +430,14 @@ static unsigned char *cptr( void (*label)() ) */ void x86_call( struct x86_function *p, void (*label)()) { - DUMP_I( __FUNCTION__, label ); + DUMP_I( label ); emit_1ub(p, 0xe8); emit_1i(p, cptr(label) - x86_get_label(p) - 4); } #else void x86_call( struct x86_function *p, struct x86_reg reg) { - DUMP_R( __FUNCTION__, reg ); + DUMP_R( reg ); emit_1ub(p, 0xff); emit_modrm_noreg(p, 2, reg); } @@ -459,7 +450,7 @@ void x86_call( struct x86_function *p, struct x86_reg reg) */ void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ) { - DUMP_RI( __FUNCTION__, dst, imm ); + DUMP_RI( dst, imm ); assert(dst.mod == mod_REG); emit_1ub(p, 0xb8 + dst.idx); emit_1i(p, imm); @@ -468,7 +459,7 @@ void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ) void x86_push( struct x86_function *p, struct x86_reg reg ) { - DUMP_R( __FUNCTION__, reg ); + DUMP_R( reg ); assert(reg.mod == mod_REG); emit_1ub(p, 0x50 + reg.idx); p->stack_offset += 4; @@ -477,7 +468,7 @@ void x86_push( struct x86_function *p, void x86_pop( struct x86_function *p, struct x86_reg reg ) { - DUMP_R( __FUNCTION__, reg ); + DUMP_R( reg ); assert(reg.mod == mod_REG); emit_1ub(p, 0x58 + reg.idx); p->stack_offset -= 4; @@ -486,7 +477,7 @@ void x86_pop( struct x86_function *p, void x86_inc( struct x86_function *p, struct x86_reg reg ) { - DUMP_R( __FUNCTION__, reg ); + DUMP_R( reg ); assert(reg.mod == mod_REG); emit_1ub(p, 0x40 + reg.idx); } @@ -494,20 +485,20 @@ void x86_inc( struct x86_function *p, void x86_dec( struct x86_function *p, struct x86_reg reg ) { - DUMP_R( __FUNCTION__, reg ); + DUMP_R( reg ); assert(reg.mod == mod_REG); emit_1ub(p, 0x48 + reg.idx); } void x86_ret( struct x86_function *p ) { - DUMP( __FUNCTION__ ); + DUMP(); emit_1ub(p, 0xc3); } void x86_sahf( struct x86_function *p ) { - DUMP( __FUNCTION__ ); + DUMP(); emit_1ub(p, 0x9e); } @@ -515,7 +506,7 @@ void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_op_modrm( p, 0x8b, 0x89, dst, src ); } @@ -523,7 +514,7 @@ void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_op_modrm( p, 0x33, 0x31, dst, src ); } @@ -531,7 +522,7 @@ void x86_cmp( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_op_modrm( p, 0x3b, 0x39, dst, src ); } @@ -539,7 +530,7 @@ void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_1ub(p, 0x8d); emit_modrm( p, dst, src ); } @@ -548,7 +539,7 @@ void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_1ub(p, 0x85); emit_modrm( p, dst, src ); } @@ -557,7 +548,7 @@ void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_op_modrm(p, 0x03, 0x01, dst, src ); } @@ -566,7 +557,7 @@ void x86_add( struct x86_function *p, void x86_mul( struct x86_function *p, struct x86_reg src ) { - DUMP_R( __FUNCTION__, src ); + DUMP_R( src ); emit_1ub(p, 0xf7); emit_modrm_noreg(p, 4, src ); } @@ -576,7 +567,7 @@ void x86_imul( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_2ub(p, X86_TWOB, 0xAF); emit_modrm(p, dst, src); } @@ -586,7 +577,7 @@ void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_op_modrm(p, 0x2b, 0x29, dst, src ); } @@ -594,7 +585,7 @@ void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_op_modrm( p, 0x0b, 0x09, dst, src ); } @@ -602,7 +593,7 @@ void x86_and( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_op_modrm( p, 0x23, 0x21, dst, src ); } @@ -617,7 +608,7 @@ void sse_movss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_2ub(p, 0xF3, X86_TWOB); emit_op_modrm( p, 0x10, 0x11, dst, src ); } @@ -626,7 +617,7 @@ void sse_movaps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_1ub(p, X86_TWOB); emit_op_modrm( p, 0x28, 0x29, dst, src ); } @@ -635,7 +626,7 @@ void sse_movups( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_1ub(p, X86_TWOB); emit_op_modrm( p, 0x10, 0x11, dst, src ); } @@ -644,7 +635,7 @@ void sse_movhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); assert(dst.mod != mod_REG || src.mod != mod_REG); emit_1ub(p, X86_TWOB); emit_op_modrm( p, 0x16, 0x17, dst, src ); /* cf movlhps */ @@ -654,7 +645,7 @@ void sse_movlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); assert(dst.mod != mod_REG || src.mod != mod_REG); emit_1ub(p, X86_TWOB); emit_op_modrm( p, 0x12, 0x13, dst, src ); /* cf movhlps */ @@ -664,7 +655,7 @@ void sse_maxps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_2ub(p, X86_TWOB, 0x5F); emit_modrm( p, dst, src ); } @@ -673,7 +664,7 @@ void sse_maxss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_3ub(p, 0xF3, X86_TWOB, 0x5F); emit_modrm( p, dst, src ); } @@ -682,7 +673,7 @@ void sse_divss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_3ub(p, 0xF3, X86_TWOB, 0x5E); emit_modrm( p, dst, src ); } @@ -691,7 +682,7 @@ void sse_minps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_2ub(p, X86_TWOB, 0x5D); emit_modrm( p, dst, src ); } @@ -700,7 +691,7 @@ void sse_subps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_2ub(p, X86_TWOB, 0x5C); emit_modrm( p, dst, src ); } @@ -709,7 +700,7 @@ void sse_mulps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_2ub(p, X86_TWOB, 0x59); emit_modrm( p, dst, src ); } @@ -718,7 +709,7 @@ void sse_mulss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_3ub(p, 0xF3, X86_TWOB, 0x59); emit_modrm( p, dst, src ); } @@ -727,7 +718,7 @@ void sse_addps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_2ub(p, X86_TWOB, 0x58); emit_modrm( p, dst, src ); } @@ -736,7 +727,7 @@ void sse_addss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_3ub(p, 0xF3, X86_TWOB, 0x58); emit_modrm( p, dst, src ); } @@ -745,7 +736,7 @@ void sse_andnps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_2ub(p, X86_TWOB, 0x55); emit_modrm( p, dst, src ); } @@ -754,7 +745,7 @@ void sse_andps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_2ub(p, X86_TWOB, 0x54); emit_modrm( p, dst, src ); } @@ -763,7 +754,7 @@ void sse_rsqrtps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_2ub(p, X86_TWOB, 0x52); emit_modrm( p, dst, src ); } @@ -772,7 +763,7 @@ void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_3ub(p, 0xF3, X86_TWOB, 0x52); emit_modrm( p, dst, src ); @@ -782,7 +773,7 @@ void sse_movhlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); assert(dst.mod == mod_REG && src.mod == mod_REG); emit_2ub(p, X86_TWOB, 0x12); emit_modrm( p, dst, src ); @@ -792,7 +783,7 @@ void sse_movlhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); assert(dst.mod == mod_REG && src.mod == mod_REG); emit_2ub(p, X86_TWOB, 0x16); emit_modrm( p, dst, src ); @@ -802,7 +793,7 @@ void sse_orps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_2ub(p, X86_TWOB, 0x56); emit_modrm( p, dst, src ); } @@ -811,7 +802,7 @@ void sse_xorps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_2ub(p, X86_TWOB, 0x57); emit_modrm( p, dst, src ); } @@ -820,7 +811,7 @@ void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); assert(dst.file == file_MMX && (src.file == file_XMM || src.mod != mod_REG)); @@ -834,7 +825,7 @@ void sse2_cvtdq2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_2ub(p, X86_TWOB, 0x5b); emit_modrm( p, dst, src ); } @@ -848,7 +839,7 @@ void sse_shufps( struct x86_function *p, struct x86_reg src, unsigned char shuf) { - DUMP_RRI( __FUNCTION__, dst, src, shuf ); + DUMP_RRI( dst, src, shuf ); emit_2ub(p, X86_TWOB, 0xC6); emit_modrm(p, dst, src); emit_1ub(p, shuf); @@ -859,7 +850,7 @@ void sse_cmpps( struct x86_function *p, struct x86_reg src, unsigned char cc) { - DUMP_RRI( "CMPPS", dst, src, cc ); + DUMP_RRI( dst, src, cc ); emit_2ub(p, X86_TWOB, 0xC2); emit_modrm(p, dst, src); emit_1ub(p, cc); @@ -869,7 +860,7 @@ void sse_pmovmskb( struct x86_function *p, struct x86_reg dst, struct x86_reg src) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_3ub(p, 0x66, X86_TWOB, 0xD7); emit_modrm(p, dst, src); } @@ -886,7 +877,7 @@ void sse2_pshufd( struct x86_function *p, struct x86_reg src, unsigned char shuf) { - DUMP_RRI( __FUNCTION__, dst, src, shuf ); + DUMP_RRI( dst, src, shuf ); emit_3ub(p, 0x66, X86_TWOB, 0x70); emit_modrm(p, dst, src); emit_1ub(p, shuf); @@ -896,7 +887,7 @@ void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_3ub( p, 0xF3, X86_TWOB, 0x5B ); emit_modrm( p, dst, src ); } @@ -905,7 +896,7 @@ void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_3ub(p, 0x66, X86_TWOB, 0x5B); emit_modrm( p, dst, src ); } @@ -914,7 +905,7 @@ void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_3ub(p, 0x66, X86_TWOB, 0x6B); emit_modrm( p, dst, src ); } @@ -923,7 +914,7 @@ void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_3ub(p, 0x66, X86_TWOB, 0x63); emit_modrm( p, dst, src ); } @@ -932,7 +923,7 @@ void sse2_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_3ub(p, 0x66, X86_TWOB, 0x67); emit_modrm( p, dst, src ); } @@ -941,7 +932,7 @@ void sse2_punpcklbw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_3ub(p, 0x66, X86_TWOB, 0x60); emit_modrm( p, dst, src ); } @@ -951,7 +942,7 @@ void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_2ub(p, X86_TWOB, 0x53); emit_modrm( p, dst, src ); } @@ -960,7 +951,7 @@ void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_3ub(p, 0xF3, X86_TWOB, 0x53); emit_modrm( p, dst, src ); } @@ -969,7 +960,7 @@ void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_2ub(p, 0x66, X86_TWOB); emit_op_modrm( p, 0x6e, 0x7e, dst, src ); } @@ -982,35 +973,35 @@ void sse2_movd( struct x86_function *p, */ void x87_fist( struct x86_function *p, struct x86_reg dst ) { - DUMP_R( __FUNCTION__, dst ); + DUMP_R( dst ); emit_1ub(p, 0xdb); emit_modrm_noreg(p, 2, dst); } void x87_fistp( struct x86_function *p, struct x86_reg dst ) { - DUMP_R( __FUNCTION__, dst ); + DUMP_R( dst ); emit_1ub(p, 0xdb); emit_modrm_noreg(p, 3, dst); } void x87_fild( struct x86_function *p, struct x86_reg arg ) { - DUMP_R( __FUNCTION__, arg ); + DUMP_R( arg ); emit_1ub(p, 0xdf); emit_modrm_noreg(p, 0, arg); } void x87_fldz( struct x86_function *p ) { - DUMP( __FUNCTION__ ); + DUMP(); emit_2ub(p, 0xd9, 0xee); } void x87_fldcw( struct x86_function *p, struct x86_reg arg ) { - DUMP_R( __FUNCTION__, arg ); + DUMP_R( arg ); assert(arg.file == file_REG32); assert(arg.mod != mod_REG); emit_1ub(p, 0xd9); @@ -1019,31 +1010,31 @@ void x87_fldcw( struct x86_function *p, struct x86_reg arg ) void x87_fld1( struct x86_function *p ) { - DUMP( __FUNCTION__ ); + DUMP(); emit_2ub(p, 0xd9, 0xe8); } void x87_fldl2e( struct x86_function *p ) { - DUMP( __FUNCTION__ ); + DUMP(); emit_2ub(p, 0xd9, 0xea); } void x87_fldln2( struct x86_function *p ) { - DUMP( __FUNCTION__ ); + DUMP(); emit_2ub(p, 0xd9, 0xed); } void x87_fwait( struct x86_function *p ) { - DUMP( __FUNCTION__ ); + DUMP(); emit_1ub(p, 0x9b); } void x87_fnclex( struct x86_function *p ) { - DUMP( __FUNCTION__ ); + DUMP(); emit_2ub(p, 0xdb, 0xe2); } @@ -1082,7 +1073,7 @@ static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86 void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); x87_arith_op(p, dst, src, 0xd8, 0xc8, 0xdc, 0xc8, @@ -1091,7 +1082,7 @@ void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); x87_arith_op(p, dst, src, 0xd8, 0xe0, 0xdc, 0xe8, @@ -1100,7 +1091,7 @@ void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); x87_arith_op(p, dst, src, 0xd8, 0xe8, 0xdc, 0xe0, @@ -1109,7 +1100,7 @@ void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); x87_arith_op(p, dst, src, 0xd8, 0xc0, 0xdc, 0xc0, @@ -1118,7 +1109,7 @@ void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); x87_arith_op(p, dst, src, 0xd8, 0xf0, 0xdc, 0xf8, @@ -1127,7 +1118,7 @@ void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); x87_arith_op(p, dst, src, 0xd8, 0xf8, 0xdc, 0xf0, @@ -1136,7 +1127,7 @@ void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) void x87_fmulp( struct x86_function *p, struct x86_reg dst ) { - DUMP_R( __FUNCTION__, dst ); + DUMP_R( dst ); assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xc8+dst.idx); @@ -1144,7 +1135,7 @@ void x87_fmulp( struct x86_function *p, struct x86_reg dst ) void x87_fsubp( struct x86_function *p, struct x86_reg dst ) { - DUMP_R( __FUNCTION__, dst ); + DUMP_R( dst ); assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xe8+dst.idx); @@ -1152,7 +1143,7 @@ void x87_fsubp( struct x86_function *p, struct x86_reg dst ) void x87_fsubrp( struct x86_function *p, struct x86_reg dst ) { - DUMP_R( __FUNCTION__, dst ); + DUMP_R( dst ); assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xe0+dst.idx); @@ -1160,7 +1151,7 @@ void x87_fsubrp( struct x86_function *p, struct x86_reg dst ) void x87_faddp( struct x86_function *p, struct x86_reg dst ) { - DUMP_R( __FUNCTION__, dst ); + DUMP_R( dst ); assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xc0+dst.idx); @@ -1168,7 +1159,7 @@ void x87_faddp( struct x86_function *p, struct x86_reg dst ) void x87_fdivp( struct x86_function *p, struct x86_reg dst ) { - DUMP_R( __FUNCTION__, dst ); + DUMP_R( dst ); assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xf8+dst.idx); @@ -1176,7 +1167,7 @@ void x87_fdivp( struct x86_function *p, struct x86_reg dst ) void x87_fdivrp( struct x86_function *p, struct x86_reg dst ) { - DUMP_R( __FUNCTION__, dst ); + DUMP_R( dst ); assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xf0+dst.idx); @@ -1184,83 +1175,83 @@ void x87_fdivrp( struct x86_function *p, struct x86_reg dst ) void x87_fucom( struct x86_function *p, struct x86_reg arg ) { - DUMP_R( __FUNCTION__, arg ); + DUMP_R( arg ); assert(arg.file == file_x87); emit_2ub(p, 0xdd, 0xe0+arg.idx); } void x87_fucomp( struct x86_function *p, struct x86_reg arg ) { - DUMP_R( __FUNCTION__, arg ); + DUMP_R( arg ); assert(arg.file == file_x87); emit_2ub(p, 0xdd, 0xe8+arg.idx); } void x87_fucompp( struct x86_function *p ) { - DUMP( __FUNCTION__ ); + DUMP(); emit_2ub(p, 0xda, 0xe9); } void x87_fxch( struct x86_function *p, struct x86_reg arg ) { - DUMP_R( __FUNCTION__, arg ); + DUMP_R( arg ); assert(arg.file == file_x87); emit_2ub(p, 0xd9, 0xc8+arg.idx); } void x87_fabs( struct x86_function *p ) { - DUMP( __FUNCTION__ ); + DUMP(); emit_2ub(p, 0xd9, 0xe1); } void x87_fchs( struct x86_function *p ) { - DUMP( __FUNCTION__ ); + DUMP(); emit_2ub(p, 0xd9, 0xe0); } void x87_fcos( struct x86_function *p ) { - DUMP( __FUNCTION__ ); + DUMP(); emit_2ub(p, 0xd9, 0xff); } void x87_fprndint( struct x86_function *p ) { - DUMP( __FUNCTION__ ); + DUMP(); emit_2ub(p, 0xd9, 0xfc); } void x87_fscale( struct x86_function *p ) { - DUMP( __FUNCTION__ ); + DUMP(); emit_2ub(p, 0xd9, 0xfd); } void x87_fsin( struct x86_function *p ) { - DUMP( __FUNCTION__ ); + DUMP(); emit_2ub(p, 0xd9, 0xfe); } void x87_fsincos( struct x86_function *p ) { - DUMP( __FUNCTION__ ); + DUMP(); emit_2ub(p, 0xd9, 0xfb); } void x87_fsqrt( struct x86_function *p ) { - DUMP( __FUNCTION__ ); + DUMP(); emit_2ub(p, 0xd9, 0xfa); } void x87_fxtract( struct x86_function *p ) { - DUMP( __FUNCTION__ ); + DUMP(); emit_2ub(p, 0xd9, 0xf4); } @@ -1270,7 +1261,7 @@ void x87_fxtract( struct x86_function *p ) */ void x87_f2xm1( struct x86_function *p ) { - DUMP( __FUNCTION__ ); + DUMP(); emit_2ub(p, 0xd9, 0xf0); } @@ -1279,7 +1270,7 @@ void x87_f2xm1( struct x86_function *p ) */ void x87_fyl2x( struct x86_function *p ) { - DUMP( __FUNCTION__ ); + DUMP(); emit_2ub(p, 0xd9, 0xf1); } @@ -1290,14 +1281,14 @@ void x87_fyl2x( struct x86_function *p ) */ void x87_fyl2xp1( struct x86_function *p ) { - DUMP( __FUNCTION__ ); + DUMP(); emit_2ub(p, 0xd9, 0xf9); } void x87_fld( struct x86_function *p, struct x86_reg arg ) { - DUMP_R( __FUNCTION__, arg ); + DUMP_R( arg ); if (arg.file == file_x87) emit_2ub(p, 0xd9, 0xc0 + arg.idx); else { @@ -1308,7 +1299,7 @@ void x87_fld( struct x86_function *p, struct x86_reg arg ) void x87_fst( struct x86_function *p, struct x86_reg dst ) { - DUMP_R( __FUNCTION__, dst ); + DUMP_R( dst ); if (dst.file == file_x87) emit_2ub(p, 0xdd, 0xd0 + dst.idx); else { @@ -1319,7 +1310,7 @@ void x87_fst( struct x86_function *p, struct x86_reg dst ) void x87_fstp( struct x86_function *p, struct x86_reg dst ) { - DUMP_R( __FUNCTION__, dst ); + DUMP_R( dst ); if (dst.file == file_x87) emit_2ub(p, 0xdd, 0xd8 + dst.idx); else { @@ -1330,7 +1321,7 @@ void x87_fstp( struct x86_function *p, struct x86_reg dst ) void x87_fcom( struct x86_function *p, struct x86_reg dst ) { - DUMP_R( __FUNCTION__, dst ); + DUMP_R( dst ); if (dst.file == file_x87) emit_2ub(p, 0xd8, 0xd0 + dst.idx); else { @@ -1341,7 +1332,7 @@ void x87_fcom( struct x86_function *p, struct x86_reg dst ) void x87_fcomp( struct x86_function *p, struct x86_reg dst ) { - DUMP_R( __FUNCTION__, dst ); + DUMP_R( dst ); if (dst.file == file_x87) emit_2ub(p, 0xd8, 0xd8 + dst.idx); else { @@ -1353,7 +1344,7 @@ void x87_fcomp( struct x86_function *p, struct x86_reg dst ) void x87_fnstsw( struct x86_function *p, struct x86_reg dst ) { - DUMP_R( __FUNCTION__, dst ); + DUMP_R( dst ); assert(dst.file == file_REG32); if (dst.idx == reg_AX && @@ -1383,7 +1374,7 @@ void mmx_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); assert(dst.file == file_MMX && (src.file == file_MMX || src.mod != mod_REG)); @@ -1397,7 +1388,7 @@ void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); assert(dst.file == file_MMX && (src.file == file_MMX || src.mod != mod_REG)); @@ -1411,7 +1402,7 @@ void mmx_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); p->need_emms = 1; emit_1ub(p, X86_TWOB); emit_op_modrm( p, 0x6e, 0x7e, dst, src ); @@ -1421,7 +1412,7 @@ void mmx_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); p->need_emms = 1; emit_1ub(p, X86_TWOB); emit_op_modrm( p, 0x6f, 0x7f, dst, src ); @@ -1449,6 +1440,7 @@ void x86_init_func( struct x86_function *p ) p->size = 0; p->store = NULL; p->csr = p->store; + DUMP_START(); } void x86_init_func_size( struct x86_function *p, unsigned code_size ) @@ -1459,6 +1451,7 @@ void x86_init_func_size( struct x86_function *p, unsigned code_size ) p->store = p->error_overflow; } p->csr = p->store; + DUMP_START(); } void x86_release_func( struct x86_function *p ) @@ -1474,6 +1467,7 @@ void x86_release_func( struct x86_function *p ) void (*x86_get_func( struct x86_function *p ))(void) { + DUMP_END(); if (DISASSEM && p->store) debug_printf("disassemble %p %p\n", p->store, p->csr); -- cgit v1.2.3 From a945420ae6f96f0d7024f97e37ffd31329865a84 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 21 Apr 2008 19:48:21 +0100 Subject: rtasm: debug some missing funcs --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/gallium/auxiliary/rtasm/rtasm_x86sse.c') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index 10796c540d..3cd45d7dd9 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -364,6 +364,7 @@ void x86_jcc( struct x86_function *p, unsigned char *label ) { intptr_t offset = pointer_to_intptr( label ) - (pointer_to_intptr( x86_get_label(p) ) + 2); + DUMP_I(cc); if (offset <= 127 && offset >= -128) { emit_1ub(p, 0x70 + cc); @@ -381,6 +382,7 @@ void x86_jcc( struct x86_function *p, unsigned char *x86_jcc_forward( struct x86_function *p, enum x86_cc cc ) { + DUMP_I(cc); emit_2ub(p, 0x0f, 0x80 + cc); emit_1i(p, 0); return x86_get_label(p); @@ -1365,6 +1367,7 @@ void x87_fnstsw( struct x86_function *p, struct x86_reg dst ) void mmx_emms( struct x86_function *p ) { + DUMP(); assert(p->need_emms); emit_2ub(p, 0x0f, 0x77); p->need_emms = 0; -- cgit v1.2.3 From e3c415995706d2dda7c34a227e2e24d0745763ec Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 27 Apr 2008 21:09:45 +0900 Subject: rtasm: Implement x86_retw. --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 6 ++++++ src/gallium/auxiliary/rtasm/rtasm_x86sse.h | 1 + 2 files changed, 7 insertions(+) (limited to 'src/gallium/auxiliary/rtasm/rtasm_x86sse.c') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index 3cd45d7dd9..e6cbe9967f 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -498,6 +498,12 @@ void x86_ret( struct x86_function *p ) emit_1ub(p, 0xc3); } +void x86_retw( struct x86_function *p, unsigned short imm ) +{ + DUMP(); + emit_3ub(p, 0xc2, imm & 0xff, (imm >> 8) & 0xff); +} + void x86_sahf( struct x86_function *p ) { DUMP(); diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index 695a1cef4e..1962b07bc5 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -219,6 +219,7 @@ void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_pop( struct x86_function *p, struct x86_reg reg ); void x86_push( struct x86_function *p, struct x86_reg reg ); void x86_ret( struct x86_function *p ); +void x86_retw( struct x86_function *p, unsigned short imm ); void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -- cgit v1.2.3 From 58d3dff0d3115ddd5397b7f77b5bcf4f9ca616b6 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Mon, 28 Apr 2008 18:50:27 +0200 Subject: gallium: Generate SSE code to swizzle and unswizzle vs inputs and outputs. Change SSE_SWIZZLES #define to 0 to disable it. --- .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 3 +- src/gallium/auxiliary/draw/draw_vs_sse.c | 52 ++++++-- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 14 ++ src/gallium/auxiliary/rtasm/rtasm_x86sse.h | 2 + src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 142 ++++++++++++++++++++- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h | 4 +- src/gallium/drivers/softpipe/sp_fs_sse.c | 2 +- 7 files changed, 204 insertions(+), 15 deletions(-) (limited to 'src/gallium/auxiliary/rtasm/rtasm_x86sse.c') diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index f0763dad8d..4ec20493c4 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -109,9 +109,10 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, struct draw_context *draw = fpme->draw; struct draw_vertex_shader *shader = draw->vertex_shader; unsigned opt = fpme->opt; + unsigned alloc_count = align_int( fetch_count, 4 ); struct vertex_header *pipeline_verts = - (struct vertex_header *)MALLOC(fpme->vertex_size * fetch_count); + (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); if (!pipeline_verts) { /* Not much we can do here - just skip the rendering. diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index b1e9f67114..07f85bc448 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -47,14 +47,29 @@ #include "tgsi/util/tgsi_parse.h" #define SSE_MAX_VERTICES 4 +#define SSE_SWIZZLES 1 +#if SSE_SWIZZLES +typedef void (XSTDCALL *codegen_function) ( + const struct tgsi_exec_vector *input, + struct tgsi_exec_vector *output, + float (*constant)[4], + struct tgsi_exec_vector *temporary, + float (*immediates)[4], + const float (*aos_input)[4], + uint num_inputs, + uint input_stride, + float (*aos_output)[4], + uint num_outputs, + uint output_stride ); +#else typedef void (XSTDCALL *codegen_function) ( const struct tgsi_exec_vector *input, struct tgsi_exec_vector *output, float (*constant)[4], struct tgsi_exec_vector *temporary, float (*immediates)[4] ); - +#endif struct draw_sse_vertex_shader { struct draw_vertex_shader base; @@ -91,12 +106,31 @@ vs_sse_run_linear( struct draw_vertex_shader *base, { struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base; struct tgsi_exec_machine *machine = shader->machine; - unsigned int i, j; - unsigned slot; + unsigned int i; for (i = 0; i < count; i += MAX_TGSI_VERTICES) { unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i); +#if SSE_SWIZZLES + /* run compiled shader + */ + shader->func(machine->Inputs, + machine->Outputs, + (float (*)[4])constants, + machine->Temps, + shader->immediates, + input, + base->info.num_inputs, + input_stride, + output, + base->info.num_outputs, + output_stride ); + + input = (const float (*)[4])((const char *)input + input_stride * max_vertices); + output = (float (*)[4])((char *)output + output_stride * max_vertices); +#else + unsigned int j, slot; + /* Swizzle inputs. */ for (j = 0; j < max_vertices; j++) { @@ -105,10 +139,10 @@ vs_sse_run_linear( struct draw_vertex_shader *base, machine->Inputs[slot].xyzw[1].f[j] = input[slot][1]; machine->Inputs[slot].xyzw[2].f[j] = input[slot][2]; machine->Inputs[slot].xyzw[3].f[j] = input[slot][3]; - } + } input = (const float (*)[4])((const char *)input + input_stride); - } + } /* run compiled shader */ @@ -118,7 +152,6 @@ vs_sse_run_linear( struct draw_vertex_shader *base, machine->Temps, shader->immediates); - /* Unswizzle all output results. */ for (j = 0; j < max_vertices; j++) { @@ -127,10 +160,11 @@ vs_sse_run_linear( struct draw_vertex_shader *base, output[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; output[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; output[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; - } + } output = (float (*)[4])((char *)output + output_stride); - } + } +#endif } } @@ -176,7 +210,7 @@ draw_create_vs_sse(struct draw_context *draw, x86_init_func( &vs->sse2_program ); if (!tgsi_emit_sse2( (struct tgsi_token *) vs->base.state.tokens, - &vs->sse2_program, vs->immediates )) + &vs->sse2_program, vs->immediates, SSE_SWIZZLES )) goto fail; vs->func = (codegen_function) x86_get_func( &vs->sse2_program ); diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index e6cbe9967f..d7e2230557 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -853,6 +853,20 @@ void sse_shufps( struct x86_function *p, emit_1ub(p, shuf); } +void sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_2ub( p, X86_TWOB, 0x15 ); + emit_modrm( p, dst, src ); +} + +void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_2ub( p, X86_TWOB, 0x14 ); + emit_modrm( p, dst, src ); +} + void sse_cmpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src, diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index 1962b07bc5..ad79b1facf 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -203,6 +203,8 @@ void sse_rsqrtps( struct x86_function *p, struct x86_reg dst, struct x86_reg src void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, unsigned char shuf ); +void sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src ); void sse2_punpcklbw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index 9061e00b63..86ca16c246 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -1788,7 +1788,6 @@ emit_instruction( break; case TGSI_OPCODE_RET: - case TGSI_OPCODE_END: #ifdef WIN32 emit_retw( func, 16 ); #else @@ -1796,6 +1795,9 @@ emit_instruction( #endif break; + case TGSI_OPCODE_END: + break; + case TGSI_OPCODE_SSG: return 0; break; @@ -2027,6 +2029,127 @@ emit_declaration( } } +static void aos_to_soa( struct x86_function *func, uint aos, uint soa, uint num, uint stride ) +{ + struct x86_reg soa_input; + struct x86_reg aos_input; + struct x86_reg num_inputs; + struct x86_reg temp; + unsigned char *inner_loop; + + soa_input = x86_make_reg( file_REG32, reg_AX ); + aos_input = x86_make_reg( file_REG32, reg_BX ); + num_inputs = x86_make_reg( file_REG32, reg_CX ); + temp = x86_make_reg( file_REG32, reg_DX ); + + /* Save EBX */ + x86_push( func, x86_make_reg( file_REG32, reg_BX ) ); + + x86_mov( func, soa_input, get_argument( soa + 1 ) ); + x86_mov( func, aos_input, get_argument( aos + 1 ) ); + x86_mov( func, num_inputs, get_argument( num + 1 ) ); + + inner_loop = x86_get_label( func ); + + x86_mov( func, temp, get_argument( stride + 1 ) ); + x86_push( func, aos_input ); + sse_movlps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) ); + sse_movlps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) ); + x86_add( func, aos_input, temp ); + sse_movhps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) ); + sse_movhps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) ); + x86_add( func, aos_input, temp ); + sse_movlps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) ); + sse_movlps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) ); + x86_add( func, aos_input, temp ); + sse_movhps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) ); + sse_movhps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) ); + x86_pop( func, aos_input ); + + sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) ); + sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) ); + sse_shufps( func, make_xmm( 0 ), make_xmm( 1 ), 0x88 ); + sse_shufps( func, make_xmm( 2 ), make_xmm( 1 ), 0xdd ); + sse_shufps( func, make_xmm( 3 ), make_xmm( 4 ), 0x88 ); + sse_shufps( func, make_xmm( 5 ), make_xmm( 4 ), 0xdd ); + + sse_movups( func, x86_make_disp( soa_input, 0 ), make_xmm( 0 ) ); + sse_movups( func, x86_make_disp( soa_input, 16 ), make_xmm( 2 ) ); + sse_movups( func, x86_make_disp( soa_input, 32 ), make_xmm( 3 ) ); + sse_movups( func, x86_make_disp( soa_input, 48 ), make_xmm( 5 ) ); + + /* Advance to next input */ + x86_mov_reg_imm( func, temp, 16 ); + x86_add( func, aos_input, temp ); + x86_mov_reg_imm( func, temp, 64 ); + x86_add( func, soa_input, temp ); + x86_dec( func, num_inputs ); + x86_jcc( func, cc_NE, inner_loop ); + + /* Restore EBX */ + x86_pop( func, x86_make_reg( file_REG32, reg_BX ) ); +} + +static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num, uint stride ) +{ + struct x86_reg soa_output; + struct x86_reg aos_output; + struct x86_reg num_outputs; + struct x86_reg temp; + unsigned char *inner_loop; + + soa_output = x86_make_reg( file_REG32, reg_AX ); + aos_output = x86_make_reg( file_REG32, reg_BX ); + num_outputs = x86_make_reg( file_REG32, reg_CX ); + temp = x86_make_reg( file_REG32, reg_DX ); + + /* Save EBX */ + x86_push( func, x86_make_reg( file_REG32, reg_BX ) ); + + x86_mov( func, soa_output, get_argument( soa + 1 ) ); + x86_mov( func, aos_output, get_argument( aos + 1 ) ); + x86_mov( func, num_outputs, get_argument( num + 1 ) ); + + inner_loop = x86_get_label( func ); + + sse_movups( func, make_xmm( 0 ), x86_make_disp( soa_output, 0 ) ); + sse_movups( func, make_xmm( 1 ), x86_make_disp( soa_output, 16 ) ); + sse_movups( func, make_xmm( 3 ), x86_make_disp( soa_output, 32 ) ); + sse_movups( func, make_xmm( 4 ), x86_make_disp( soa_output, 48 ) ); + + sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) ); + sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) ); + sse_unpcklps( func, make_xmm( 0 ), make_xmm( 1 ) ); + sse_unpckhps( func, make_xmm( 2 ), make_xmm( 1 ) ); + sse_unpcklps( func, make_xmm( 3 ), make_xmm( 4 ) ); + sse_unpckhps( func, make_xmm( 5 ), make_xmm( 4 ) ); + + x86_mov( func, temp, get_argument( stride + 1 ) ); + x86_push( func, aos_output ); + sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) ); + sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) ); + x86_add( func, aos_output, temp ); + sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) ); + sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) ); + x86_add( func, aos_output, temp ); + sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) ); + sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) ); + x86_add( func, aos_output, temp ); + sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) ); + sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) ); + x86_pop( func, aos_output ); + + /* Advance to next output */ + x86_mov_reg_imm( func, temp, 16 ); + x86_add( func, aos_output, temp ); + x86_mov_reg_imm( func, temp, 64 ); + x86_add( func, soa_output, temp ); + x86_dec( func, num_outputs ); + x86_jcc( func, cc_NE, inner_loop ); + + /* Restore EBX */ + x86_pop( func, x86_make_reg( file_REG32, reg_BX ) ); +} /** * Translate a TGSI vertex/fragment shader to SSE2 code. @@ -2048,7 +2171,8 @@ unsigned tgsi_emit_sse2( const struct tgsi_token *tokens, struct x86_function *func, - float (*immediates)[4]) + float (*immediates)[4], + boolean do_swizzles ) { struct tgsi_parse_context parse; boolean instruction_phase = FALSE; @@ -2089,6 +2213,9 @@ tgsi_emit_sse2( else { assert(parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX); + if (do_swizzles) + aos_to_soa( func, 5, 0, 6, 7 ); + x86_mov( func, get_input_base(), @@ -2176,6 +2303,17 @@ tgsi_emit_sse2( } } + if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX) { + if (do_swizzles) + soa_to_aos( func, 8, 1, 9, 10 ); + } + +#ifdef WIN32 + emit_retw( func, 16 ); +#else + emit_ret( func ); +#endif + tgsi_parse_free( &parse ); return ok; diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h index 063287dc5e..e66d115283 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h @@ -12,8 +12,8 @@ unsigned tgsi_emit_sse2( const struct tgsi_token *tokens, struct x86_function *function, - float (*immediates)[4] - ); + float (*immediates)[4], + boolean do_swizzles ); #if defined __cplusplus } diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index f857d26143..4d569e1e22 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -133,7 +133,7 @@ softpipe_create_fs_sse(struct softpipe_context *softpipe, x86_init_func( &shader->sse2_program ); if (!tgsi_emit_sse2( templ->tokens, &shader->sse2_program, - shader->immediates)) { + shader->immediates, FALSE )) { FREE(shader); return NULL; } -- cgit v1.2.3 From 727257f32002544658219d2e0163993c1cbc5644 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 1 May 2008 15:31:17 +0100 Subject: rtasm: assert stack is fully popped in return --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/auxiliary/rtasm/rtasm_x86sse.c') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index d7e2230557..40f6f973d6 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -495,6 +495,7 @@ void x86_dec( struct x86_function *p, void x86_ret( struct x86_function *p ) { DUMP(); + assert(p->stack_offset == 0); emit_1ub(p, 0xc3); } -- cgit v1.2.3 From fb3623b235f5caa9d76e656b1e5eda797c7c73eb Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 1 May 2008 20:41:03 +0100 Subject: rtasm: fix labels after (not so) recent change to allow dynamic fn growth Using char * for labels doesn't work if you realloc the function during assembly and free the old storage... --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 49 +++++++++---------------- src/gallium/auxiliary/rtasm/rtasm_x86sse.h | 14 +++---- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 4 +- src/gallium/auxiliary/translate/translate_sse.c | 2 +- 4 files changed, 28 insertions(+), 41 deletions(-) (limited to 'src/gallium/auxiliary/rtasm/rtasm_x86sse.c') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index 40f6f973d6..e69251f072 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -347,9 +347,9 @@ struct x86_reg x86_get_base_reg( struct x86_reg reg ) return x86_make_reg( reg.file, reg.idx ); } -unsigned char *x86_get_label( struct x86_function *p ) +int x86_get_label( struct x86_function *p ) { - return p->csr; + return p->csr - p->store; } @@ -361,17 +361,22 @@ unsigned char *x86_get_label( struct x86_function *p ) void x86_jcc( struct x86_function *p, enum x86_cc cc, - unsigned char *label ) + int label ) { - intptr_t offset = pointer_to_intptr( label ) - (pointer_to_intptr( x86_get_label(p) ) + 2); + int offset = label - (x86_get_label(p) + 2); DUMP_I(cc); + if (offset < 0) { + int amt = p->csr - p->store; + assert(amt > -offset); + } + if (offset <= 127 && offset >= -128) { emit_1ub(p, 0x70 + cc); emit_1b(p, (char) offset); } else { - offset = pointer_to_intptr( label ) - (pointer_to_intptr( x86_get_label(p) ) + 6); + offset = label - (x86_get_label(p) + 6); emit_2ub(p, 0x0f, 0x80 + cc); emit_1i(p, offset); } @@ -379,8 +384,8 @@ void x86_jcc( struct x86_function *p, /* Always use a 32bit offset for forward jumps: */ -unsigned char *x86_jcc_forward( struct x86_function *p, - enum x86_cc cc ) +int x86_jcc_forward( struct x86_function *p, + enum x86_cc cc ) { DUMP_I(cc); emit_2ub(p, 0x0f, 0x80 + cc); @@ -388,7 +393,7 @@ unsigned char *x86_jcc_forward( struct x86_function *p, return x86_get_label(p); } -unsigned char *x86_jmp_forward( struct x86_function *p) +int x86_jmp_forward( struct x86_function *p) { DUMP(); emit_1ub(p, 0xe9); @@ -396,7 +401,7 @@ unsigned char *x86_jmp_forward( struct x86_function *p) return x86_get_label(p); } -unsigned char *x86_call_forward( struct x86_function *p) +int x86_call_forward( struct x86_function *p) { DUMP(); @@ -408,42 +413,24 @@ unsigned char *x86_call_forward( struct x86_function *p) /* Fixup offset from forward jump: */ void x86_fixup_fwd_jump( struct x86_function *p, - unsigned char *fixup ) + int fixup ) { - *(int *)(fixup - 4) = pointer_to_intptr( x86_get_label(p) ) - pointer_to_intptr( fixup ); + *(int *)(p->store + fixup - 4) = x86_get_label(p) - fixup; } -void x86_jmp( struct x86_function *p, unsigned char *label) +void x86_jmp( struct x86_function *p, int label) { DUMP_I( label ); emit_1ub(p, 0xe9); - emit_1i(p, pointer_to_intptr( label ) - pointer_to_intptr( x86_get_label(p) ) - 4); -} - -#if 0 -static unsigned char *cptr( void (*label)() ) -{ - return (unsigned char *) label; + emit_1i(p, label - x86_get_label(p) - 4); } -/* This doesn't work once we start reallocating & copying the - * generated code on buffer fills, because the call is relative to the - * current pc. - */ -void x86_call( struct x86_function *p, void (*label)()) -{ - DUMP_I( label ); - emit_1ub(p, 0xe8); - emit_1i(p, cptr(label) - x86_get_label(p) - 4); -} -#else void x86_call( struct x86_function *p, struct x86_reg reg) { DUMP_R( reg ); emit_1ub(p, 0xff); emit_modrm_noreg(p, 2, reg); } -#endif /* michal: diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index ad79b1facf..eacaeeaf6f 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -124,23 +124,23 @@ struct x86_reg x86_get_base_reg( struct x86_reg reg ); /* Labels, jumps and fixup: */ -unsigned char *x86_get_label( struct x86_function *p ); +int x86_get_label( struct x86_function *p ); void x86_jcc( struct x86_function *p, enum x86_cc cc, - unsigned char *label ); + int label ); -unsigned char *x86_jcc_forward( struct x86_function *p, +int x86_jcc_forward( struct x86_function *p, enum x86_cc cc ); -unsigned char *x86_jmp_forward( struct x86_function *p); +int x86_jmp_forward( struct x86_function *p); -unsigned char *x86_call_forward( struct x86_function *p); +int x86_call_forward( struct x86_function *p); void x86_fixup_fwd_jump( struct x86_function *p, - unsigned char *fixup ); + int fixup ); -void x86_jmp( struct x86_function *p, unsigned char *label ); +void x86_jmp( struct x86_function *p, int label ); /* void x86_call( struct x86_function *p, void (*label)() ); */ void x86_call( struct x86_function *p, struct x86_reg reg); diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index 45453c34ce..07db3292b4 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -2021,7 +2021,7 @@ static void aos_to_soa( struct x86_function *func, uint aos, uint soa, uint num, struct x86_reg aos_input; struct x86_reg num_inputs; struct x86_reg temp; - unsigned char *inner_loop; + int inner_loop; soa_input = x86_make_reg( file_REG32, reg_AX ); aos_input = get_temp_base(); /* BX or SI */ @@ -2083,7 +2083,7 @@ static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num, struct x86_reg aos_output; struct x86_reg num_outputs; struct x86_reg temp; - unsigned char *inner_loop; + int inner_loop; soa_output = x86_make_reg( file_REG32, reg_AX ); aos_output = get_temp_base(); /* BX or SI */ diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c index f590d48b78..a54ac5a82f 100644 --- a/src/gallium/auxiliary/translate/translate_sse.c +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -404,7 +404,7 @@ static boolean build_vertex_emit( struct translate_sse *p, struct x86_reg srcEAX = x86_make_reg(file_REG32, reg_CX); struct x86_reg countEBP = x86_make_reg(file_REG32, reg_BP); struct x86_reg translateESI = x86_make_reg(file_REG32, reg_SI); - uint8_t *fixup, *label; + int fixup, label; unsigned j; p->func = func; -- cgit v1.2.3 From 2c89b75e36fd35d5a003107d1d2f97b537321f95 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 1 May 2008 20:44:41 +0100 Subject: rtasm: learn another version of push --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary/rtasm/rtasm_x86sse.c') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index e69251f072..4e036d9032 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -449,8 +449,15 @@ void x86_push( struct x86_function *p, struct x86_reg reg ) { DUMP_R( reg ); - assert(reg.mod == mod_REG); - emit_1ub(p, 0x50 + reg.idx); + if (reg.mod == mod_REG) + emit_1ub(p, 0x50 + reg.idx); + else + { + emit_1ub(p, 0xff); + emit_modrm_noreg(p, 6, reg); + } + + p->stack_offset += 4; } -- cgit v1.2.3 From d3e64caef6f8654af1a84825803e517ab8221c68 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 21 May 2008 08:28:16 +0100 Subject: rtasm: export debug reg print function --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 20 +++++++++----------- src/gallium/auxiliary/rtasm/rtasm_x86sse.h | 3 +++ 2 files changed, 12 insertions(+), 11 deletions(-) (limited to 'src/gallium/auxiliary/rtasm/rtasm_x86sse.c') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index 4e036d9032..68ac91ed13 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -36,11 +36,8 @@ #define DUMP_SSE 0 -#if DUMP_SSE -static void -_print_reg( - struct x86_reg reg ) +void x86_print_reg( struct x86_reg reg ) { if (reg.mod != mod_REG) debug_printf( "[" ); @@ -77,6 +74,7 @@ _print_reg( debug_printf( "]" ); } +#if DUMP_SSE #define DUMP_START() debug_printf( "\n" ) #define DUMP_END() debug_printf( "\n" ) @@ -87,7 +85,7 @@ _print_reg( foo++; \ if (*foo) \ foo++; \ - debug_printf( "\n% 15s ", foo ); \ + debug_printf( "\n% 4x% 15s ", p->csr - p->store, foo ); \ } while (0) #define DUMP_I( I ) do { \ @@ -97,27 +95,27 @@ _print_reg( #define DUMP_R( R0 ) do { \ DUMP(); \ - _print_reg( R0 ); \ + x86_print_reg( R0 ); \ } while( 0 ) #define DUMP_RR( R0, R1 ) do { \ DUMP(); \ - _print_reg( R0 ); \ + x86_print_reg( R0 ); \ debug_printf( ", " ); \ - _print_reg( R1 ); \ + x86_print_reg( R1 ); \ } while( 0 ) #define DUMP_RI( R0, I ) do { \ DUMP(); \ - _print_reg( R0 ); \ + x86_print_reg( R0 ); \ debug_printf( ", %u", I ); \ } while( 0 ) #define DUMP_RRI( R0, R1, I ) do { \ DUMP(); \ - _print_reg( R0 ); \ + x86_print_reg( R0 ); \ debug_printf( ", " ); \ - _print_reg( R1 ); \ + x86_print_reg( R1 ); \ debug_printf( ", %u", I ); \ } while( 0 ) diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index baa10b7d4a..1e02c6e73b 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -106,6 +106,9 @@ void x86_init_func_size( struct x86_function *p, unsigned code_size ); void x86_release_func( struct x86_function *p ); void (*x86_get_func( struct x86_function *p ))( void ); +/* Debugging: + */ +void x86_print_reg( struct x86_reg reg ); /* Create and manipulate registers and regmem values: -- cgit v1.2.3 From 030af06691bc5bc82ca141a576da7a2edffe9d1c Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 21 May 2008 20:14:55 +0100 Subject: rtasm: add x87 instructions and debug-check for x87 stack usage --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 120 +++++++++++++++++++++++++++++ src/gallium/auxiliary/rtasm/rtasm_x86sse.h | 20 ++++- 2 files changed, 138 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary/rtasm/rtasm_x86sse.c') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index 68ac91ed13..a2e8af343b 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -986,6 +986,26 @@ void sse2_movd( struct x86_function *p, /*********************************************************************** * x87 instructions */ +static void note_x87_pop( struct x86_function *p ) +{ + p->x87_stack--; + assert(p->x87_stack >= 0); + debug_printf("\nstack: %d\n", p->x87_stack); +} + +static void note_x87_push( struct x86_function *p ) +{ + p->x87_stack++; + assert(p->x87_stack <= 7); + debug_printf("\nstack: %d\n", p->x87_stack); +} + +void x87_assert_stack_empty( struct x86_function *p ) +{ + assert (p->x87_stack == 0); +} + + void x87_fist( struct x86_function *p, struct x86_reg dst ) { DUMP_R( dst ); @@ -998,6 +1018,7 @@ void x87_fistp( struct x86_function *p, struct x86_reg dst ) DUMP_R( dst ); emit_1ub(p, 0xdb); emit_modrm_noreg(p, 3, dst); + note_x87_pop(p); } void x87_fild( struct x86_function *p, struct x86_reg arg ) @@ -1005,12 +1026,14 @@ void x87_fild( struct x86_function *p, struct x86_reg arg ) DUMP_R( arg ); emit_1ub(p, 0xdf); emit_modrm_noreg(p, 0, arg); + note_x87_push(p); } void x87_fldz( struct x86_function *p ) { DUMP(); emit_2ub(p, 0xd9, 0xee); + note_x87_push(p); } @@ -1027,18 +1050,21 @@ void x87_fld1( struct x86_function *p ) { DUMP(); emit_2ub(p, 0xd9, 0xe8); + note_x87_push(p); } void x87_fldl2e( struct x86_function *p ) { DUMP(); emit_2ub(p, 0xd9, 0xea); + note_x87_push(p); } void x87_fldln2( struct x86_function *p ) { DUMP(); emit_2ub(p, 0xd9, 0xed); + note_x87_push(p); } void x87_fwait( struct x86_function *p ) @@ -1059,6 +1085,49 @@ void x87_fclex( struct x86_function *p ) x87_fnclex(p); } +void x87_fcmovb( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + assert(arg.file == file_x87); + emit_2ub(p, 0xda, 0xc0+arg.idx); +} + +void x87_fcmove( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + assert(arg.file == file_x87); + emit_2ub(p, 0xda, 0xc8+arg.idx); +} + +void x87_fcmovbe( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + assert(arg.file == file_x87); + emit_2ub(p, 0xda, 0xd0+arg.idx); +} + +void x87_fcmovnb( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + assert(arg.file == file_x87); + emit_2ub(p, 0xdb, 0xc0+arg.idx); +} + +void x87_fcmovne( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + assert(arg.file == file_x87); + emit_2ub(p, 0xdb, 0xc8+arg.idx); +} + +void x87_fcmovnbe( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + assert(arg.file == file_x87); + emit_2ub(p, 0xdb, 0xd0+arg.idx); +} + + static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86_reg arg, unsigned char dst0ub0, @@ -1146,6 +1215,7 @@ void x87_fmulp( struct x86_function *p, struct x86_reg dst ) assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xc8+dst.idx); + note_x87_pop(p); } void x87_fsubp( struct x86_function *p, struct x86_reg dst ) @@ -1154,6 +1224,7 @@ void x87_fsubp( struct x86_function *p, struct x86_reg dst ) assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xe8+dst.idx); + note_x87_pop(p); } void x87_fsubrp( struct x86_function *p, struct x86_reg dst ) @@ -1162,6 +1233,7 @@ void x87_fsubrp( struct x86_function *p, struct x86_reg dst ) assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xe0+dst.idx); + note_x87_pop(p); } void x87_faddp( struct x86_function *p, struct x86_reg dst ) @@ -1170,6 +1242,7 @@ void x87_faddp( struct x86_function *p, struct x86_reg dst ) assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xc0+dst.idx); + note_x87_pop(p); } void x87_fdivp( struct x86_function *p, struct x86_reg dst ) @@ -1178,6 +1251,7 @@ void x87_fdivp( struct x86_function *p, struct x86_reg dst ) assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xf8+dst.idx); + note_x87_pop(p); } void x87_fdivrp( struct x86_function *p, struct x86_reg dst ) @@ -1186,6 +1260,13 @@ void x87_fdivrp( struct x86_function *p, struct x86_reg dst ) assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xf0+dst.idx); + note_x87_pop(p); +} + +void x87_ftst( struct x86_function *p ) +{ + DUMP(); + emit_2ub(p, 0xd9, 0xe4); } void x87_fucom( struct x86_function *p, struct x86_reg arg ) @@ -1200,12 +1281,15 @@ void x87_fucomp( struct x86_function *p, struct x86_reg arg ) DUMP_R( arg ); assert(arg.file == file_x87); emit_2ub(p, 0xdd, 0xe8+arg.idx); + note_x87_pop(p); } void x87_fucompp( struct x86_function *p ) { DUMP(); emit_2ub(p, 0xda, 0xe9); + note_x87_pop(p); /* pop twice */ + note_x87_pop(p); /* pop twice */ } void x87_fxch( struct x86_function *p, struct x86_reg arg ) @@ -1287,6 +1371,7 @@ void x87_fyl2x( struct x86_function *p ) { DUMP(); emit_2ub(p, 0xd9, 0xf1); + note_x87_pop(p); } /* st1 = st1 * log2(st0 + 1.0); @@ -1298,6 +1383,7 @@ void x87_fyl2xp1( struct x86_function *p ) { DUMP(); emit_2ub(p, 0xd9, 0xf9); + note_x87_pop(p); } @@ -1310,6 +1396,7 @@ void x87_fld( struct x86_function *p, struct x86_reg arg ) emit_1ub(p, 0xd9); emit_modrm_noreg(p, 0, arg); } + note_x87_push(p); } void x87_fst( struct x86_function *p, struct x86_reg dst ) @@ -1332,8 +1419,15 @@ void x87_fstp( struct x86_function *p, struct x86_reg dst ) emit_1ub(p, 0xd9); emit_modrm_noreg(p, 3, dst); } + note_x87_pop(p); +} + +void x87_fpop( struct x86_function *p ) +{ + x87_fstp( p, x86_make_reg( file_x87, 0 )); } + void x87_fcom( struct x86_function *p, struct x86_reg dst ) { DUMP_R( dst ); @@ -1345,6 +1439,7 @@ void x87_fcom( struct x86_function *p, struct x86_reg dst ) } } + void x87_fcomp( struct x86_function *p, struct x86_reg dst ) { DUMP_R( dst ); @@ -1354,6 +1449,20 @@ void x87_fcomp( struct x86_function *p, struct x86_reg dst ) emit_1ub(p, 0xd8); emit_modrm_noreg(p, 3, dst); } + note_x87_pop(p); +} + +void x87_fcomi( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + emit_2ub(p, 0xdb, 0xf0+arg.idx); +} + +void x87_fcomip( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + emit_2ub(p, 0xdb, 0xf0+arg.idx); + note_x87_pop(p); } @@ -1372,6 +1481,17 @@ void x87_fnstsw( struct x86_function *p, struct x86_reg dst ) } +void x87_fnstcw( struct x86_function *p, struct x86_reg dst ) +{ + DUMP_R( dst ); + assert(dst.file == file_REG32); + + emit_1ub(p, 0x9b); /* WAIT -- needed? */ + emit_1ub(p, 0xd9); + emit_modrm_noreg(p, 7, dst); +} + + /*********************************************************************** diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index 1e02c6e73b..9f7e31e055 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -41,8 +41,11 @@ struct x86_function { unsigned size; unsigned char *store; unsigned char *csr; - unsigned stack_offset; - int need_emms; + + unsigned stack_offset:16; + unsigned need_emms:8; + int x87_stack:8; + unsigned char error_overflow[4]; }; @@ -229,13 +232,23 @@ void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_sahf( struct x86_function *p ); +void x87_assert_stack_empty( struct x86_function *p ); + void x87_f2xm1( struct x86_function *p ); void x87_fabs( struct x86_function *p ); void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); void x87_faddp( struct x86_function *p, struct x86_reg dst ); void x87_fchs( struct x86_function *p ); void x87_fclex( struct x86_function *p ); +void x87_fcmovb( struct x86_function *p, struct x86_reg src ); +void x87_fcmovbe( struct x86_function *p, struct x86_reg src ); +void x87_fcmove( struct x86_function *p, struct x86_reg src ); +void x87_fcmovnb( struct x86_function *p, struct x86_reg src ); +void x87_fcmovnbe( struct x86_function *p, struct x86_reg src ); +void x87_fcmovne( struct x86_function *p, struct x86_reg src ); void x87_fcom( struct x86_function *p, struct x86_reg dst ); +void x87_fcomi( struct x86_function *p, struct x86_reg dst ); +void x87_fcomip( struct x86_function *p, struct x86_reg dst ); void x87_fcomp( struct x86_function *p, struct x86_reg dst ); void x87_fcos( struct x86_function *p ); void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); @@ -255,6 +268,7 @@ void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); void x87_fmulp( struct x86_function *p, struct x86_reg dst ); void x87_fnclex( struct x86_function *p ); void x87_fprndint( struct x86_function *p ); +void x87_fpop( struct x86_function *p ); void x87_fscale( struct x86_function *p ); void x87_fsin( struct x86_function *p ); void x87_fsincos( struct x86_function *p ); @@ -265,11 +279,13 @@ void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); void x87_fsubp( struct x86_function *p, struct x86_reg dst ); void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); void x87_fsubrp( struct x86_function *p, struct x86_reg dst ); +void x87_ftst( struct x86_function *p ); void x87_fxch( struct x86_function *p, struct x86_reg dst ); void x87_fxtract( struct x86_function *p ); void x87_fyl2x( struct x86_function *p ); void x87_fyl2xp1( struct x86_function *p ); void x87_fwait( struct x86_function *p ); +void x87_fnstcw( struct x86_function *p, struct x86_reg dst ); void x87_fnstsw( struct x86_function *p, struct x86_reg dst ); void x87_fucompp( struct x86_function *p ); void x87_fucomp( struct x86_function *p, struct x86_reg arg ); -- cgit v1.2.3 From 6f407b072453eb2bb7077a952257a099db4da025 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 21 May 2008 20:50:36 +0100 Subject: rtasm: remove debug --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/gallium/auxiliary/rtasm/rtasm_x86sse.c') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index a2e8af343b..d78676b8f3 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -990,14 +990,12 @@ static void note_x87_pop( struct x86_function *p ) { p->x87_stack--; assert(p->x87_stack >= 0); - debug_printf("\nstack: %d\n", p->x87_stack); } static void note_x87_push( struct x86_function *p ) { p->x87_stack++; assert(p->x87_stack <= 7); - debug_printf("\nstack: %d\n", p->x87_stack); } void x87_assert_stack_empty( struct x86_function *p ) -- cgit v1.2.3 From 6b3723ee8d084a1abbc971b21c58f7c1e66949a7 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 24 May 2008 13:22:15 +0100 Subject: rtasm: add some helpers for calling out from generated code --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 44 ++++++++++++++++++++++++++++-- src/gallium/auxiliary/rtasm/rtasm_x86sse.h | 6 ++++ 2 files changed, 47 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary/rtasm/rtasm_x86sse.c') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index d78676b8f3..2415b0156b 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -218,6 +218,8 @@ static void emit_3ub( struct x86_function *p, unsigned char b0, unsigned char b1 /* Build a modRM byte + possible displacement. No treatment of SIB * indexing. BZZT - no way to encode an absolute address. + * + * This is the "/r" field in the x86 manuals... */ static void emit_modrm( struct x86_function *p, struct x86_reg reg, @@ -256,7 +258,8 @@ static void emit_modrm( struct x86_function *p, } } - +/* Emits the "/0".."/7" specialized versions of the modrm ("/r") bytes. + */ static void emit_modrm_noreg( struct x86_function *p, unsigned op, struct x86_reg regmem ) @@ -365,8 +368,7 @@ void x86_jcc( struct x86_function *p, DUMP_I(cc); if (offset < 0) { - int amt = p->csr - p->store; - assert(amt > -offset); + assert(p->csr - p->store > -offset); } if (offset <= 127 && offset >= -128) { @@ -443,6 +445,16 @@ void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ) emit_1i(p, imm); } +void x86_add_reg_imm8( struct x86_function *p, struct x86_reg dst, ubyte imm ) +{ + DUMP_RI( dst, imm ); + assert(dst.mod == mod_REG); + emit_1ub(p, 0x80); + emit_modrm_noreg(p, 0, dst); + emit_1ub(p, imm); +} + + void x86_push( struct x86_function *p, struct x86_reg reg ) { @@ -459,6 +471,17 @@ void x86_push( struct x86_function *p, p->stack_offset += 4; } +void x86_push_imm32( struct x86_function *p, + int imm32 ) +{ + DUMP_I( imm32 ); + emit_1ub(p, 0x68); + emit_1i(p, imm32); + + p->stack_offset += 4; +} + + void x86_pop( struct x86_function *p, struct x86_reg reg ) { @@ -1558,6 +1581,21 @@ void mmx_movq( struct x86_function *p, */ +void x86_cdecl_caller_push_regs( struct x86_function *p ) +{ + x86_push(p, x86_make_reg(file_REG32, reg_AX)); + x86_push(p, x86_make_reg(file_REG32, reg_CX)); + x86_push(p, x86_make_reg(file_REG32, reg_DX)); +} + +void x86_cdecl_caller_pop_regs( struct x86_function *p ) +{ + x86_pop(p, x86_make_reg(file_REG32, reg_DX)); + x86_pop(p, x86_make_reg(file_REG32, reg_CX)); + x86_pop(p, x86_make_reg(file_REG32, reg_AX)); +} + + /* Retreive a reference to one of the function arguments, taking into * account any push/pop activity: */ diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index 9f7e31e055..63e812fac9 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -155,6 +155,7 @@ void x86_call( struct x86_function *p, struct x86_reg reg); * I load the immediate into general purpose register and use it. */ void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ); +void x86_add_reg_imm8( struct x86_function *p, struct x86_reg dst, ubyte imm ); /* Macro for sse_shufps() and sse2_pshufd(): @@ -225,6 +226,7 @@ void x86_imul( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_pop( struct x86_function *p, struct x86_reg reg ); void x86_push( struct x86_function *p, struct x86_reg reg ); +void x86_push_imm32( struct x86_function *p, int imm ); void x86_ret( struct x86_function *p ); void x86_retw( struct x86_function *p, unsigned short imm ); void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); @@ -232,6 +234,10 @@ void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_sahf( struct x86_function *p ); + +void x86_cdecl_caller_push_regs( struct x86_function *p ); +void x86_cdecl_caller_pop_regs( struct x86_function *p ); + void x87_assert_stack_empty( struct x86_function *p ); void x87_f2xm1( struct x86_function *p ); -- cgit v1.2.3 From 55d29a8d48663982a1aeea414f69a5896b97d1ea Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 28 May 2008 16:12:14 +0900 Subject: gallium: Windows CE portability fixes. --- src/gallium/auxiliary/draw/draw_pt_elts.c | 8 +- src/gallium/auxiliary/draw/draw_pt_varray.c | 4 +- src/gallium/auxiliary/draw/draw_vs_sse.c | 4 +- src/gallium/auxiliary/rtasm/rtasm_cpu.c | 4 +- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 4 +- src/gallium/auxiliary/rtasm/rtasm_x86sse.h | 4 +- src/gallium/auxiliary/tgsi/util/tgsi_util.c | 2 +- src/gallium/auxiliary/translate/translate.c | 3 +- .../auxiliary/translate/translate_generic.c | 236 ++++++++++----------- src/gallium/auxiliary/translate/translate_sse.c | 5 +- src/gallium/auxiliary/util/u_time.h | 2 +- src/gallium/include/pipe/p_compiler.h | 54 +++-- src/gallium/include/pipe/p_config.h | 8 +- src/gallium/include/pipe/p_debug.h | 11 +- 14 files changed, 192 insertions(+), 157 deletions(-) (limited to 'src/gallium/auxiliary/rtasm/rtasm_x86sse.c') diff --git a/src/gallium/auxiliary/draw/draw_pt_elts.c b/src/gallium/auxiliary/draw/draw_pt_elts.c index 2094c081ed..b7780fb507 100644 --- a/src/gallium/auxiliary/draw/draw_pt_elts.c +++ b/src/gallium/auxiliary/draw/draw_pt_elts.c @@ -60,10 +60,10 @@ static unsigned elt_vert( const void *elts, unsigned idx ) pt_elt_func draw_pt_elt_func( struct draw_context *draw ) { switch (draw->pt.user.eltSize) { - case 0: return elt_vert; - case 1: return elt_ubyte; - case 2: return elt_ushort; - case 4: return elt_uint; + case 0: return &elt_vert; + case 1: return &elt_ubyte; + case 2: return &elt_ushort; + case 4: return &elt_uint; default: return NULL; } } diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c index 355093f945..c7c66b34d4 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray.c +++ b/src/gallium/auxiliary/draw/draw_pt_varray.c @@ -147,8 +147,8 @@ static INLINE void varray_ef_quad( struct varray_frontend *varray, unsigned i2, unsigned i3 ) { - const unsigned omitEdge1 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_2; - const unsigned omitEdge2 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_1; + const ushort omitEdge1 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_2; + const ushort omitEdge2 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_1; varray_triangle_flags( varray, DRAW_PIPE_RESET_STIPPLE | omitEdge1, diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index e3f4e67472..c88bc137ee 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -31,9 +31,11 @@ * Brian Paul */ +#include "pipe/p_config.h" + #include "draw_vs.h" -#if defined(__i386__) || defined(__386__) +#if defined(PIPE_ARCH_X86) #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" diff --git a/src/gallium/auxiliary/rtasm/rtasm_cpu.c b/src/gallium/auxiliary/rtasm/rtasm_cpu.c index f01e12faa0..5499018b21 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_cpu.c +++ b/src/gallium/auxiliary/rtasm/rtasm_cpu.c @@ -47,7 +47,7 @@ static boolean rtasm_sse_enabled(void) int rtasm_cpu_has_sse(void) { /* FIXME: actually detect this at run-time */ -#if defined(__i386__) || defined(__386__) || defined(i386) +#if defined(PIPE_ARCH_X86) return rtasm_sse_enabled(); #else return 0; @@ -57,7 +57,7 @@ int rtasm_cpu_has_sse(void) int rtasm_cpu_has_sse2(void) { /* FIXME: actually detect this at run-time */ -#if defined(__i386__) || defined(__386__) || defined(i386) +#if defined(PIPE_ARCH_X86) return rtasm_sse_enabled(); #else return 0; diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index 4e036d9032..6cd88ebca3 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -21,7 +21,9 @@ * **************************************************************************/ -#if defined(__i386__) || defined(__386__) || defined(i386) +#include "pipe/p_config.h" + +#if defined(PIPE_ARCH_X86) #include "pipe/p_compiler.h" #include "pipe/p_debug.h" diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index eacaeeaf6f..a5afa16395 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -24,7 +24,9 @@ #ifndef _RTASM_X86SSE_H_ #define _RTASM_X86SSE_H_ -#if defined(__i386__) || defined(__386__) || defined(i386) +#include "pipe/p_config.h" + +#if defined(PIPE_ARCH_X86) /* It is up to the caller to ensure that instructions issued are * suitable for the host cpu. There are no checks made in this module diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_util.c b/src/gallium/auxiliary/tgsi/util/tgsi_util.c index 4cdd89182a..56a50d3b21 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_util.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_util.c @@ -8,7 +8,7 @@ union pointer_hack { void *pointer; - unsigned long long uint64; + uint64_t uint64; }; void * diff --git a/src/gallium/auxiliary/translate/translate.c b/src/gallium/auxiliary/translate/translate.c index b04bc6eefd..b93fbf9033 100644 --- a/src/gallium/auxiliary/translate/translate.c +++ b/src/gallium/auxiliary/translate/translate.c @@ -30,6 +30,7 @@ * Keith Whitwell */ +#include "pipe/p_config.h" #include "pipe/p_util.h" #include "pipe/p_state.h" #include "translate.h" @@ -38,7 +39,7 @@ struct translate *translate_create( const struct translate_key *key ) { struct translate *translate = NULL; -#if defined(__i386__) || defined(__386__) || defined(i386) +#if defined(PIPE_ARCH_X86) translate = translate_sse2_create( key ); if (translate) return translate; diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index 402780ee53..8f3b470333 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -255,140 +255,140 @@ static fetch_func get_fetch_func( enum pipe_format format ) { switch (format) { case PIPE_FORMAT_R64_FLOAT: - return fetch_R64_FLOAT; + return &fetch_R64_FLOAT; case PIPE_FORMAT_R64G64_FLOAT: - return fetch_R64G64_FLOAT; + return &fetch_R64G64_FLOAT; case PIPE_FORMAT_R64G64B64_FLOAT: - return fetch_R64G64B64_FLOAT; + return &fetch_R64G64B64_FLOAT; case PIPE_FORMAT_R64G64B64A64_FLOAT: - return fetch_R64G64B64A64_FLOAT; + return &fetch_R64G64B64A64_FLOAT; case PIPE_FORMAT_R32_FLOAT: - return fetch_R32_FLOAT; + return &fetch_R32_FLOAT; case PIPE_FORMAT_R32G32_FLOAT: - return fetch_R32G32_FLOAT; + return &fetch_R32G32_FLOAT; case PIPE_FORMAT_R32G32B32_FLOAT: - return fetch_R32G32B32_FLOAT; + return &fetch_R32G32B32_FLOAT; case PIPE_FORMAT_R32G32B32A32_FLOAT: - return fetch_R32G32B32A32_FLOAT; + return &fetch_R32G32B32A32_FLOAT; case PIPE_FORMAT_R32_UNORM: - return fetch_R32_UNORM; + return &fetch_R32_UNORM; case PIPE_FORMAT_R32G32_UNORM: - return fetch_R32G32_UNORM; + return &fetch_R32G32_UNORM; case PIPE_FORMAT_R32G32B32_UNORM: - return fetch_R32G32B32_UNORM; + return &fetch_R32G32B32_UNORM; case PIPE_FORMAT_R32G32B32A32_UNORM: - return fetch_R32G32B32A32_UNORM; + return &fetch_R32G32B32A32_UNORM; case PIPE_FORMAT_R32_USCALED: - return fetch_R32_USCALED; + return &fetch_R32_USCALED; case PIPE_FORMAT_R32G32_USCALED: - return fetch_R32G32_USCALED; + return &fetch_R32G32_USCALED; case PIPE_FORMAT_R32G32B32_USCALED: - return fetch_R32G32B32_USCALED; + return &fetch_R32G32B32_USCALED; case PIPE_FORMAT_R32G32B32A32_USCALED: - return fetch_R32G32B32A32_USCALED; + return &fetch_R32G32B32A32_USCALED; case PIPE_FORMAT_R32_SNORM: - return fetch_R32_SNORM; + return &fetch_R32_SNORM; case PIPE_FORMAT_R32G32_SNORM: - return fetch_R32G32_SNORM; + return &fetch_R32G32_SNORM; case PIPE_FORMAT_R32G32B32_SNORM: - return fetch_R32G32B32_SNORM; + return &fetch_R32G32B32_SNORM; case PIPE_FORMAT_R32G32B32A32_SNORM: - return fetch_R32G32B32A32_SNORM; + return &fetch_R32G32B32A32_SNORM; case PIPE_FORMAT_R32_SSCALED: - return fetch_R32_SSCALED; + return &fetch_R32_SSCALED; case PIPE_FORMAT_R32G32_SSCALED: - return fetch_R32G32_SSCALED; + return &fetch_R32G32_SSCALED; case PIPE_FORMAT_R32G32B32_SSCALED: - return fetch_R32G32B32_SSCALED; + return &fetch_R32G32B32_SSCALED; case PIPE_FORMAT_R32G32B32A32_SSCALED: - return fetch_R32G32B32A32_SSCALED; + return &fetch_R32G32B32A32_SSCALED; case PIPE_FORMAT_R16_UNORM: - return fetch_R16_UNORM; + return &fetch_R16_UNORM; case PIPE_FORMAT_R16G16_UNORM: - return fetch_R16G16_UNORM; + return &fetch_R16G16_UNORM; case PIPE_FORMAT_R16G16B16_UNORM: - return fetch_R16G16B16_UNORM; + return &fetch_R16G16B16_UNORM; case PIPE_FORMAT_R16G16B16A16_UNORM: - return fetch_R16G16B16A16_UNORM; + return &fetch_R16G16B16A16_UNORM; case PIPE_FORMAT_R16_USCALED: - return fetch_R16_USCALED; + return &fetch_R16_USCALED; case PIPE_FORMAT_R16G16_USCALED: - return fetch_R16G16_USCALED; + return &fetch_R16G16_USCALED; case PIPE_FORMAT_R16G16B16_USCALED: - return fetch_R16G16B16_USCALED; + return &fetch_R16G16B16_USCALED; case PIPE_FORMAT_R16G16B16A16_USCALED: - return fetch_R16G16B16A16_USCALED; + return &fetch_R16G16B16A16_USCALED; case PIPE_FORMAT_R16_SNORM: - return fetch_R16_SNORM; + return &fetch_R16_SNORM; case PIPE_FORMAT_R16G16_SNORM: - return fetch_R16G16_SNORM; + return &fetch_R16G16_SNORM; case PIPE_FORMAT_R16G16B16_SNORM: - return fetch_R16G16B16_SNORM; + return &fetch_R16G16B16_SNORM; case PIPE_FORMAT_R16G16B16A16_SNORM: - return fetch_R16G16B16A16_SNORM; + return &fetch_R16G16B16A16_SNORM; case PIPE_FORMAT_R16_SSCALED: - return fetch_R16_SSCALED; + return &fetch_R16_SSCALED; case PIPE_FORMAT_R16G16_SSCALED: - return fetch_R16G16_SSCALED; + return &fetch_R16G16_SSCALED; case PIPE_FORMAT_R16G16B16_SSCALED: - return fetch_R16G16B16_SSCALED; + return &fetch_R16G16B16_SSCALED; case PIPE_FORMAT_R16G16B16A16_SSCALED: - return fetch_R16G16B16A16_SSCALED; + return &fetch_R16G16B16A16_SSCALED; case PIPE_FORMAT_R8_UNORM: - return fetch_R8_UNORM; + return &fetch_R8_UNORM; case PIPE_FORMAT_R8G8_UNORM: - return fetch_R8G8_UNORM; + return &fetch_R8G8_UNORM; case PIPE_FORMAT_R8G8B8_UNORM: - return fetch_R8G8B8_UNORM; + return &fetch_R8G8B8_UNORM; case PIPE_FORMAT_R8G8B8A8_UNORM: - return fetch_R8G8B8A8_UNORM; + return &fetch_R8G8B8A8_UNORM; case PIPE_FORMAT_R8_USCALED: - return fetch_R8_USCALED; + return &fetch_R8_USCALED; case PIPE_FORMAT_R8G8_USCALED: - return fetch_R8G8_USCALED; + return &fetch_R8G8_USCALED; case PIPE_FORMAT_R8G8B8_USCALED: - return fetch_R8G8B8_USCALED; + return &fetch_R8G8B8_USCALED; case PIPE_FORMAT_R8G8B8A8_USCALED: - return fetch_R8G8B8A8_USCALED; + return &fetch_R8G8B8A8_USCALED; case PIPE_FORMAT_R8_SNORM: - return fetch_R8_SNORM; + return &fetch_R8_SNORM; case PIPE_FORMAT_R8G8_SNORM: - return fetch_R8G8_SNORM; + return &fetch_R8G8_SNORM; case PIPE_FORMAT_R8G8B8_SNORM: - return fetch_R8G8B8_SNORM; + return &fetch_R8G8B8_SNORM; case PIPE_FORMAT_R8G8B8A8_SNORM: - return fetch_R8G8B8A8_SNORM; + return &fetch_R8G8B8A8_SNORM; case PIPE_FORMAT_R8_SSCALED: - return fetch_R8_SSCALED; + return &fetch_R8_SSCALED; case PIPE_FORMAT_R8G8_SSCALED: - return fetch_R8G8_SSCALED; + return &fetch_R8G8_SSCALED; case PIPE_FORMAT_R8G8B8_SSCALED: - return fetch_R8G8B8_SSCALED; + return &fetch_R8G8B8_SSCALED; case PIPE_FORMAT_R8G8B8A8_SSCALED: - return fetch_R8G8B8A8_SSCALED; + return &fetch_R8G8B8A8_SSCALED; case PIPE_FORMAT_A8R8G8B8_UNORM: - return fetch_A8R8G8B8_UNORM; + return &fetch_A8R8G8B8_UNORM; case PIPE_FORMAT_B8G8R8A8_UNORM: - return fetch_B8G8R8A8_UNORM; + return &fetch_B8G8R8A8_UNORM; default: assert(0); - return fetch_NULL; + return &fetch_NULL; } } @@ -399,140 +399,140 @@ static emit_func get_emit_func( enum pipe_format format ) { switch (format) { case PIPE_FORMAT_R64_FLOAT: - return emit_R64_FLOAT; + return &emit_R64_FLOAT; case PIPE_FORMAT_R64G64_FLOAT: - return emit_R64G64_FLOAT; + return &emit_R64G64_FLOAT; case PIPE_FORMAT_R64G64B64_FLOAT: - return emit_R64G64B64_FLOAT; + return &emit_R64G64B64_FLOAT; case PIPE_FORMAT_R64G64B64A64_FLOAT: - return emit_R64G64B64A64_FLOAT; + return &emit_R64G64B64A64_FLOAT; case PIPE_FORMAT_R32_FLOAT: - return emit_R32_FLOAT; + return &emit_R32_FLOAT; case PIPE_FORMAT_R32G32_FLOAT: - return emit_R32G32_FLOAT; + return &emit_R32G32_FLOAT; case PIPE_FORMAT_R32G32B32_FLOAT: - return emit_R32G32B32_FLOAT; + return &emit_R32G32B32_FLOAT; case PIPE_FORMAT_R32G32B32A32_FLOAT: - return emit_R32G32B32A32_FLOAT; + return &emit_R32G32B32A32_FLOAT; case PIPE_FORMAT_R32_UNORM: - return emit_R32_UNORM; + return &emit_R32_UNORM; case PIPE_FORMAT_R32G32_UNORM: - return emit_R32G32_UNORM; + return &emit_R32G32_UNORM; case PIPE_FORMAT_R32G32B32_UNORM: - return emit_R32G32B32_UNORM; + return &emit_R32G32B32_UNORM; case PIPE_FORMAT_R32G32B32A32_UNORM: - return emit_R32G32B32A32_UNORM; + return &emit_R32G32B32A32_UNORM; case PIPE_FORMAT_R32_USCALED: - return emit_R32_USCALED; + return &emit_R32_USCALED; case PIPE_FORMAT_R32G32_USCALED: - return emit_R32G32_USCALED; + return &emit_R32G32_USCALED; case PIPE_FORMAT_R32G32B32_USCALED: - return emit_R32G32B32_USCALED; + return &emit_R32G32B32_USCALED; case PIPE_FORMAT_R32G32B32A32_USCALED: - return emit_R32G32B32A32_USCALED; + return &emit_R32G32B32A32_USCALED; case PIPE_FORMAT_R32_SNORM: - return emit_R32_SNORM; + return &emit_R32_SNORM; case PIPE_FORMAT_R32G32_SNORM: - return emit_R32G32_SNORM; + return &emit_R32G32_SNORM; case PIPE_FORMAT_R32G32B32_SNORM: - return emit_R32G32B32_SNORM; + return &emit_R32G32B32_SNORM; case PIPE_FORMAT_R32G32B32A32_SNORM: - return emit_R32G32B32A32_SNORM; + return &emit_R32G32B32A32_SNORM; case PIPE_FORMAT_R32_SSCALED: - return emit_R32_SSCALED; + return &emit_R32_SSCALED; case PIPE_FORMAT_R32G32_SSCALED: - return emit_R32G32_SSCALED; + return &emit_R32G32_SSCALED; case PIPE_FORMAT_R32G32B32_SSCALED: - return emit_R32G32B32_SSCALED; + return &emit_R32G32B32_SSCALED; case PIPE_FORMAT_R32G32B32A32_SSCALED: - return emit_R32G32B32A32_SSCALED; + return &emit_R32G32B32A32_SSCALED; case PIPE_FORMAT_R16_UNORM: - return emit_R16_UNORM; + return &emit_R16_UNORM; case PIPE_FORMAT_R16G16_UNORM: - return emit_R16G16_UNORM; + return &emit_R16G16_UNORM; case PIPE_FORMAT_R16G16B16_UNORM: - return emit_R16G16B16_UNORM; + return &emit_R16G16B16_UNORM; case PIPE_FORMAT_R16G16B16A16_UNORM: - return emit_R16G16B16A16_UNORM; + return &emit_R16G16B16A16_UNORM; case PIPE_FORMAT_R16_USCALED: - return emit_R16_USCALED; + return &emit_R16_USCALED; case PIPE_FORMAT_R16G16_USCALED: - return emit_R16G16_USCALED; + return &emit_R16G16_USCALED; case PIPE_FORMAT_R16G16B16_USCALED: - return emit_R16G16B16_USCALED; + return &emit_R16G16B16_USCALED; case PIPE_FORMAT_R16G16B16A16_USCALED: - return emit_R16G16B16A16_USCALED; + return &emit_R16G16B16A16_USCALED; case PIPE_FORMAT_R16_SNORM: - return emit_R16_SNORM; + return &emit_R16_SNORM; case PIPE_FORMAT_R16G16_SNORM: - return emit_R16G16_SNORM; + return &emit_R16G16_SNORM; case PIPE_FORMAT_R16G16B16_SNORM: - return emit_R16G16B16_SNORM; + return &emit_R16G16B16_SNORM; case PIPE_FORMAT_R16G16B16A16_SNORM: - return emit_R16G16B16A16_SNORM; + return &emit_R16G16B16A16_SNORM; case PIPE_FORMAT_R16_SSCALED: - return emit_R16_SSCALED; + return &emit_R16_SSCALED; case PIPE_FORMAT_R16G16_SSCALED: - return emit_R16G16_SSCALED; + return &emit_R16G16_SSCALED; case PIPE_FORMAT_R16G16B16_SSCALED: - return emit_R16G16B16_SSCALED; + return &emit_R16G16B16_SSCALED; case PIPE_FORMAT_R16G16B16A16_SSCALED: - return emit_R16G16B16A16_SSCALED; + return &emit_R16G16B16A16_SSCALED; case PIPE_FORMAT_R8_UNORM: - return emit_R8_UNORM; + return &emit_R8_UNORM; case PIPE_FORMAT_R8G8_UNORM: - return emit_R8G8_UNORM; + return &emit_R8G8_UNORM; case PIPE_FORMAT_R8G8B8_UNORM: - return emit_R8G8B8_UNORM; + return &emit_R8G8B8_UNORM; case PIPE_FORMAT_R8G8B8A8_UNORM: - return emit_R8G8B8A8_UNORM; + return &emit_R8G8B8A8_UNORM; case PIPE_FORMAT_R8_USCALED: - return emit_R8_USCALED; + return &emit_R8_USCALED; case PIPE_FORMAT_R8G8_USCALED: - return emit_R8G8_USCALED; + return &emit_R8G8_USCALED; case PIPE_FORMAT_R8G8B8_USCALED: - return emit_R8G8B8_USCALED; + return &emit_R8G8B8_USCALED; case PIPE_FORMAT_R8G8B8A8_USCALED: - return emit_R8G8B8A8_USCALED; + return &emit_R8G8B8A8_USCALED; case PIPE_FORMAT_R8_SNORM: - return emit_R8_SNORM; + return &emit_R8_SNORM; case PIPE_FORMAT_R8G8_SNORM: - return emit_R8G8_SNORM; + return &emit_R8G8_SNORM; case PIPE_FORMAT_R8G8B8_SNORM: - return emit_R8G8B8_SNORM; + return &emit_R8G8B8_SNORM; case PIPE_FORMAT_R8G8B8A8_SNORM: - return emit_R8G8B8A8_SNORM; + return &emit_R8G8B8A8_SNORM; case PIPE_FORMAT_R8_SSCALED: - return emit_R8_SSCALED; + return &emit_R8_SSCALED; case PIPE_FORMAT_R8G8_SSCALED: - return emit_R8G8_SSCALED; + return &emit_R8G8_SSCALED; case PIPE_FORMAT_R8G8B8_SSCALED: - return emit_R8G8B8_SSCALED; + return &emit_R8G8B8_SSCALED; case PIPE_FORMAT_R8G8B8A8_SSCALED: - return emit_R8G8B8A8_SSCALED; + return &emit_R8G8B8A8_SSCALED; case PIPE_FORMAT_A8R8G8B8_UNORM: - return emit_A8R8G8B8_UNORM; + return &emit_A8R8G8B8_UNORM; case PIPE_FORMAT_B8G8R8A8_UNORM: - return emit_B8G8R8A8_UNORM; + return &emit_B8G8R8A8_UNORM; default: assert(0); - return emit_NULL; + return &emit_NULL; } } diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c index a54ac5a82f..634b05b8a9 100644 --- a/src/gallium/auxiliary/translate/translate_sse.c +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -26,6 +26,7 @@ */ +#include "pipe/p_config.h" #include "pipe/p_compiler.h" #include "pipe/p_util.h" #include "util/u_simple_list.h" @@ -33,7 +34,7 @@ #include "translate.h" -#if defined(__i386__) || defined(__386__) || defined(i386) +#if defined(PIPE_ARCH_X86) #include "rtasm/rtasm_cpu.h" #include "rtasm/rtasm_x86sse.h" @@ -617,7 +618,7 @@ struct translate *translate_sse2_create( const struct translate_key *key ) #else -void translate_create_sse( const struct translate_key *key ) +struct translate *translate_sse2_create( const struct translate_key *key ) { return NULL; } diff --git a/src/gallium/auxiliary/util/u_time.h b/src/gallium/auxiliary/util/u_time.h index 48ec7a4a96..f9963ce0e2 100644 --- a/src/gallium/auxiliary/util/u_time.h +++ b/src/gallium/auxiliary/util/u_time.h @@ -61,7 +61,7 @@ struct util_time #if defined(PIPE_OS_LINUX) struct timeval tv; #else - long long counter; + int64_t counter; #endif }; diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h index a4b772bc4f..96b21d998d 100644 --- a/src/gallium/include/pipe/p_compiler.h +++ b/src/gallium/include/pipe/p_compiler.h @@ -52,39 +52,55 @@ #endif /* __MSC__ */ -typedef unsigned int uint; -typedef unsigned char ubyte; -typedef unsigned char boolean; -typedef unsigned short ushort; -typedef unsigned long long uint64; - - #if defined(__MSC__) -typedef char int8_t; -typedef unsigned char uint8_t; -typedef short int16_t; -typedef unsigned short uint16_t; -typedef long int32_t; -typedef unsigned long uint32_t; -typedef long long int64_t; -typedef unsigned long long uint64_t; +typedef __int8 int8_t; +typedef unsigned __int8 uint8_t; +typedef __int16 int16_t; +typedef unsigned __int16 uint16_t; +typedef __int32 int32_t; +typedef unsigned __int32 uint32_t; +typedef __int64 int64_t; +typedef unsigned __int64 uint64_t; #if defined(_WIN64) typedef __int64 intptr_t; typedef unsigned __int64 uintptr_t; #else -typedef int intptr_t; -typedef unsigned int uintptr_t; +typedef __int32 intptr_t; +typedef unsigned __int32 uintptr_t; #endif +#ifndef __cplusplus +#define false 0 +#define true 1 +#define bool _Bool +typedef int _Bool; +#define __bool_true_false_are_defined 1 +#endif /* !__cplusplus */ + #else #include +#include #endif -#define TRUE 1 -#define FALSE 0 +typedef unsigned int uint; +typedef unsigned char ubyte; +typedef unsigned short ushort; +typedef uint64_t uint64; + +#if 0 +#define boolean bool +#else +typedef unsigned char boolean; +#endif +#ifndef TRUE +#define TRUE true +#endif +#ifndef FALSE +#define FALSE false +#endif /* Function inlining */ diff --git a/src/gallium/include/pipe/p_config.h b/src/gallium/include/pipe/p_config.h index 6ba211a1fc..d2d2ae1617 100644 --- a/src/gallium/include/pipe/p_config.h +++ b/src/gallium/include/pipe/p_config.h @@ -35,6 +35,10 @@ * this file is auto-generated by an autoconf-like tool at some point, as some * things cannot be determined by existing defines alone. * + * See also: + * - http://gcc.gnu.org/onlinedocs/cpp/Common-Predefined-Macros.html + * - echo | gcc -dM -E - | sort + * - http://msdn.microsoft.com/en-us/library/b0084kay.aspx * @author José Fonseca */ @@ -63,11 +67,11 @@ * Processor architecture */ -#if defined(_X86_) || defined(__i386__) || defined(__386__) || defined(i386) +#if defined(__i386__) /* gcc */ || defined(_M_IX86) /* msvc */ || defined(_X86_) || defined(__386__) || defined(i386) #define PIPE_ARCH_X86 #endif -#if 0 /* FIXME */ +#if defined(__x86_64__) /* gcc */ || defined(_M_X64) /* msvc */ || defined(_M_AMD64) /* msvc */ #define PIPE_ARCH_X86_64 #endif diff --git a/src/gallium/include/pipe/p_debug.h b/src/gallium/include/pipe/p_debug.h index 0af635be57..05eca75201 100644 --- a/src/gallium/include/pipe/p_debug.h +++ b/src/gallium/include/pipe/p_debug.h @@ -59,6 +59,13 @@ extern "C" { #endif #endif + +/* MSVC bebore VC7 does not have the __FUNCTION__ macro */ +#if defined(_MSC_VER) && _MSC_VER < 1300 +#define __FUNCTION__ "???" +#endif + + void _debug_vprintf(const char *format, va_list ap); @@ -127,8 +134,8 @@ void _debug_break(void); #ifdef DEBUG #if (defined(__i386__) || defined(__386__)) && defined(__GNUC__) #define debug_break() __asm("int3") -#elif (defined(__i386__) || defined(__386__)) && defined(__MSC__) -#define debug_break() _asm {int 3} +#elif defined(_M_IX86) && defined(_MSC_VER) +#define debug_break() do { _asm {int 3} } while(0) #else #define debug_break() _debug_break() #endif -- cgit v1.2.3 From 648da5158e5f418bf859aee6aa4532b6899b0d94 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 28 May 2008 16:36:45 +0100 Subject: rtasm: special case for [ebp] --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary/rtasm/rtasm_x86sse.c') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index 2415b0156b..672d2ff554 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -328,7 +328,7 @@ struct x86_reg x86_make_disp( struct x86_reg reg, else reg.disp += disp; - if (reg.disp == 0) + if (reg.disp == 0 && reg.idx != reg_BP) reg.mod = mod_INDIRECT; else if (reg.disp <= 127 && reg.disp >= -128) reg.mod = mod_DISP8; -- cgit v1.2.3 From aa1a39d1a742c1bb346ba14814d6bf7b44e646cb Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 2 Jun 2008 20:46:05 +0900 Subject: rtasm: Use enum sse_cc in sse_cmpps. --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 2 +- src/gallium/auxiliary/rtasm/rtasm_x86sse.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary/rtasm/rtasm_x86sse.c') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index 664a69a537..f4ca282dd9 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -888,7 +888,7 @@ void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg sr void sse_cmpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src, - unsigned char cc) + enum sse_cc cc) { DUMP_RRI( dst, src, cc ); emit_2ub(p, X86_TWOB, 0xC2); diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index bd76e1729c..af94577aab 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -191,7 +191,7 @@ void sse_divss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) void sse_andnps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_andps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_cmpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src, - unsigned char cc ); + enum sse_cc cc ); void sse_maxps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_maxss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_minps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -- cgit v1.2.3 From f3a7463feefcf1f22c1309e1f5b0bfe381859686 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 1 Sep 2008 15:30:26 -0600 Subject: gallium: include u_pointer,h, not p_pointer.h --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary/rtasm/rtasm_x86sse.c') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index f4ca282dd9..6d4c081e04 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -27,7 +27,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_debug.h" -#include "pipe/p_pointer.h" +#include "util/u_pointer.h" #include "rtasm_execmem.h" #include "rtasm_x86sse.h" -- cgit v1.2.3 From 6607f2cf19d083a979716a341e6e175aef7d6830 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 29 Sep 2008 19:09:39 +0900 Subject: rtasm: Implement immediate group 1 instructions. Fix SIB emition. --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 66 +++++++++++++++++++++++++----- src/gallium/auxiliary/rtasm/rtasm_x86sse.h | 11 ++--- 2 files changed, 62 insertions(+), 15 deletions(-) (limited to 'src/gallium/auxiliary/rtasm/rtasm_x86sse.c') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index 6d4c081e04..3bba9dcc07 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -240,7 +240,8 @@ static void emit_modrm( struct x86_function *p, /* Oh-oh we've stumbled into the SIB thing. */ if (regmem.file == file_REG32 && - regmem.idx == reg_SP) { + regmem.idx == reg_SP && + regmem.mod != mod_REG) { emit_1ub(p, 0x24); /* simplistic! */ } @@ -435,25 +436,70 @@ void x86_call( struct x86_function *p, struct x86_reg reg) } -/* michal: - * Temporary. As I need immediate operands, and dont want to mess with the codegen, - * I load the immediate into general purpose register and use it. - */ void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ) { DUMP_RI( dst, imm ); + assert(dst.file == file_REG32); assert(dst.mod == mod_REG); emit_1ub(p, 0xb8 + dst.idx); emit_1i(p, imm); } -void x86_add_reg_imm8( struct x86_function *p, struct x86_reg dst, ubyte imm ) +/** + * Immediate group 1 instructions. + */ +static INLINE void +x86_group1_imm( struct x86_function *p, + unsigned op, struct x86_reg dst, int imm ) { - DUMP_RI( dst, imm ); + assert(dst.file == file_REG32); assert(dst.mod == mod_REG); - emit_1ub(p, 0x80); - emit_modrm_noreg(p, 0, dst); - emit_1ub(p, imm); + if(-0x80 <= imm && imm < 0x80) { + emit_1ub(p, 0x83); + emit_modrm_noreg(p, op, dst); + emit_1b(p, (char)imm); + } + else { + emit_1ub(p, 0x81); + emit_modrm_noreg(p, op, dst); + emit_1i(p, imm); + } +} + +void x86_add_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 0, dst, imm); +} + +void x86_or_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 1, dst, imm); +} + +void x86_and_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 4, dst, imm); +} + +void x86_sub_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 5, dst, imm); +} + +void x86_xor_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 6, dst, imm); +} + +void x86_cmp_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 7, dst, imm); } diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index af94577aab..510aa1b0de 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -152,12 +152,13 @@ void x86_jmp( struct x86_function *p, int label ); /* void x86_call( struct x86_function *p, void (*label)() ); */ void x86_call( struct x86_function *p, struct x86_reg reg); -/* michal: - * Temporary. As I need immediate operands, and dont want to mess with the codegen, - * I load the immediate into general purpose register and use it. - */ void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ); -void x86_add_reg_imm8( struct x86_function *p, struct x86_reg dst, ubyte imm ); +void x86_add_imm( struct x86_function *p, struct x86_reg dst, int imm ); +void x86_or_imm( struct x86_function *p, struct x86_reg dst, int imm ); +void x86_and_imm( struct x86_function *p, struct x86_reg dst, int imm ); +void x86_sub_imm( struct x86_function *p, struct x86_reg dst, int imm ); +void x86_xor_imm( struct x86_function *p, struct x86_reg dst, int imm ); +void x86_cmp_imm( struct x86_function *p, struct x86_reg dst, int imm ); /* Macro for sse_shufps() and sse2_pshufd(): -- cgit v1.2.3 From 102daee1b8971cf39235e220b9524bec1e4a7089 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 2 Oct 2008 12:46:01 +0100 Subject: rtasm: add prefetch instructions --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 26 ++++++++++++++++++++++++++ src/gallium/auxiliary/rtasm/rtasm_x86sse.h | 5 +++++ 2 files changed, 31 insertions(+) (limited to 'src/gallium/auxiliary/rtasm/rtasm_x86sse.c') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index 6d4c081e04..9085f4cc0e 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -629,6 +629,32 @@ void x86_and( struct x86_function *p, * SSE instructions */ +void sse_prefetchnta( struct x86_function *p, struct x86_reg ptr) +{ + DUMP_R( ptr ); + assert(ptr.mod != mod_REG); + emit_2ub(p, 0x0f, 0x18); + emit_modrm_noreg(p, 0, ptr); +} + +void sse_prefetch0( struct x86_function *p, struct x86_reg ptr) +{ + DUMP_R( ptr ); + assert(ptr.mod != mod_REG); + emit_2ub(p, 0x0f, 0x18); + emit_modrm_noreg(p, 1, ptr); +} + +void sse_prefetch1( struct x86_function *p, struct x86_reg ptr) +{ + DUMP_R( ptr ); + assert(ptr.mod != mod_REG); + emit_2ub(p, 0x0f, 0x18); + emit_modrm_noreg(p, 2, ptr); +} + + + void sse_movss( struct x86_function *p, struct x86_reg dst, diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index af94577aab..2d7715f965 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -184,6 +184,11 @@ void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg ar void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); + +void sse_prefetchnta( struct x86_function *p, struct x86_reg ptr); +void sse_prefetch0( struct x86_function *p, struct x86_reg ptr); +void sse_prefetch1( struct x86_function *p, struct x86_reg ptr); + void sse_addps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_addss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -- cgit v1.2.3 From 66d4beb874606baab95fb6539de895eb373b0ccb Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 2 Oct 2008 12:46:01 +0100 Subject: rtasm: add prefetch instructions --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 26 ++++++++++++++++++++++++++ src/gallium/auxiliary/rtasm/rtasm_x86sse.h | 5 +++++ 2 files changed, 31 insertions(+) (limited to 'src/gallium/auxiliary/rtasm/rtasm_x86sse.c') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index 3bba9dcc07..a5abbcde49 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -675,6 +675,32 @@ void x86_and( struct x86_function *p, * SSE instructions */ +void sse_prefetchnta( struct x86_function *p, struct x86_reg ptr) +{ + DUMP_R( ptr ); + assert(ptr.mod != mod_REG); + emit_2ub(p, 0x0f, 0x18); + emit_modrm_noreg(p, 0, ptr); +} + +void sse_prefetch0( struct x86_function *p, struct x86_reg ptr) +{ + DUMP_R( ptr ); + assert(ptr.mod != mod_REG); + emit_2ub(p, 0x0f, 0x18); + emit_modrm_noreg(p, 1, ptr); +} + +void sse_prefetch1( struct x86_function *p, struct x86_reg ptr) +{ + DUMP_R( ptr ); + assert(ptr.mod != mod_REG); + emit_2ub(p, 0x0f, 0x18); + emit_modrm_noreg(p, 2, ptr); +} + + + void sse_movss( struct x86_function *p, struct x86_reg dst, diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index 510aa1b0de..86091e7f6b 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -185,6 +185,11 @@ void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg ar void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); + +void sse_prefetchnta( struct x86_function *p, struct x86_reg ptr); +void sse_prefetch0( struct x86_function *p, struct x86_reg ptr); +void sse_prefetch1( struct x86_function *p, struct x86_reg ptr); + void sse_addps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_addss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -- cgit v1.2.3 From 6965532e14717f71a6f4353fb683c5070c6b7d7a Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 3 Oct 2008 13:50:34 +0100 Subject: rtasm: add sse_movntps --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 12 ++++++++++++ src/gallium/auxiliary/rtasm/rtasm_x86sse.h | 2 ++ 2 files changed, 14 insertions(+) (limited to 'src/gallium/auxiliary/rtasm/rtasm_x86sse.c') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index 9085f4cc0e..cc5871f873 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -653,6 +653,18 @@ void sse_prefetch1( struct x86_function *p, struct x86_reg ptr) emit_modrm_noreg(p, 2, ptr); } +void sse_movntps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src) +{ + DUMP_RR( dst, reg ); + + assert(dst.mod != mod_REG); + assert(src.mod == mod_REG); + emit_2ub(p, 0x0f, 0x2b); + emit_modrm(p, src, dst); +} + diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index 2d7715f965..af79f07dd3 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -189,6 +189,8 @@ void sse_prefetchnta( struct x86_function *p, struct x86_reg ptr); void sse_prefetch0( struct x86_function *p, struct x86_reg ptr); void sse_prefetch1( struct x86_function *p, struct x86_reg ptr); +void sse_movntps( struct x86_function *p, struct x86_reg dst, struct x86_reg src); + void sse_addps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_addss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -- cgit v1.2.3 From 7053f8c902e904495dffbbf6ea55f414cec780e7 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 6 Oct 2008 11:54:22 +0100 Subject: rtasm: fix debug build --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary/rtasm/rtasm_x86sse.c') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index cc5871f873..dd26d4d9ed 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -657,7 +657,7 @@ void sse_movntps( struct x86_function *p, struct x86_reg dst, struct x86_reg src) { - DUMP_RR( dst, reg ); + DUMP_RR( dst, src ); assert(dst.mod != mod_REG); assert(src.mod == mod_REG); -- cgit v1.2.3 From f7ee3c979261b4a2b77365b47c7147f69fbfd606 Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 6 Oct 2008 18:31:56 -0600 Subject: gallium: replace assertion with conditional/recovery code The assertion failed when we ran out of exec memory. Found with conform texcombine test. --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary/rtasm/rtasm_x86sse.c') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index dd26d4d9ed..ad9d8f8ced 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -370,7 +370,11 @@ void x86_jcc( struct x86_function *p, DUMP_I(cc); if (offset < 0) { - assert(p->csr - p->store > -offset); + /*assert(p->csr - p->store > -offset);*/ + if (p->csr - p->store <= -offset) { + /* probably out of memory (using the error_overflow buffer) */ + return; + } } if (offset <= 127 && offset >= -128) { -- cgit v1.2.3