#ifndef _X86SSE_H_ #define _X86SSE_H_ #if defined(USE_X86_ASM) #include "glheader.h" /* It is up to the caller to ensure that instructions issued are * suitable for the host cpu. There are no checks made in this module * for mmx/sse/sse2 support on the cpu. */ struct x86_reg { GLuint file:3; GLuint idx:3; GLuint mod:2; /* mod_REG if this is just a register */ GLint disp:24; /* only +/- 23bits of offset - should be enough... */ }; struct x86_function { GLubyte *store; GLubyte *csr; GLuint stack_offset; GLint need_emms; }; enum x86_reg_file { file_REG32, file_MMX, file_XMM }; /* Values for mod field of modr/m byte */ enum x86_reg_mod { mod_INDIRECT, mod_DISP8, mod_DISP32, mod_REG }; enum x86_reg_name { reg_AX, reg_CX, reg_DX, reg_BX, reg_SP, reg_BP, reg_SI, reg_DI }; enum x86_cc { cc_O, /* overflow */ cc_NO, /* not overflow */ cc_NAE, /* not above or equal / carry */ cc_AE, /* above or equal / not carry */ cc_E, /* equal / zero */ cc_NE /* not equal / not zero */ }; #define cc_Z cc_E #define cc_NZ cc_NE /* Begin/end/retreive function creation: */ void x86_init_func( struct x86_function *p ); void x86_release_func( struct x86_function *p ); void (*x86_get_func( struct x86_function *p ))( void ); /* Create and manipulate registers and regmem values: */ struct x86_reg x86_make_reg( enum x86_reg_file file, enum x86_reg_name idx ); struct x86_reg x86_make_disp( struct x86_reg reg, GLint disp ); struct x86_reg x86_deref( struct x86_reg reg ); struct x86_reg x86_get_base_reg( struct x86_reg reg ); /* Labels, jumps and fixup: */ GLubyte *x86_get_label( struct x86_function *p ); void x86_jcc( struct x86_function *p, enum x86_cc cc, GLubyte *label ); /* Always use a 32bit offset for forward jumps: */ GLubyte *x86_jcc_forward( struct x86_function *p, enum x86_cc cc ); /* Fixup offset from forward jump: */ void x86_fixup_fwd_jump( struct x86_function *p, GLubyte *fixup ); void x86_push( struct x86_function *p, struct x86_reg reg ); void x86_pop( struct x86_function *p, struct x86_reg reg ); void x86_inc( struct x86_function *p, struct x86_reg reg ); void x86_dec( struct x86_function *p, struct x86_reg reg ); void x86_ret( struct x86_function *p ); void mmx_emms( struct x86_function *p ); void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_cmp( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void mmx_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void mmx_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_movss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_movaps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_movups( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_movhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_movlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); /* SSE operations often only have one format, with dest constrained to * be a register: */ void sse_mulps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_addps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_movhlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_movlhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse2_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void mmx_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); /* Load effective address: */ void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); /* Perform a reduced swizzle in a single sse instruction: */ void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, GLubyte x, GLubyte y, GLubyte z, GLubyte w ); /* Shufps can also be used to implement a reduced swizzle when dest == * arg0. */ void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, GLubyte x, GLubyte y, GLubyte z, GLubyte w ); /* Retreive a reference to one of the function arguments, taking into * account any push/pop activity: */ struct x86_reg x86_fn_arg( struct x86_function *p, GLuint arg ); #endif #endif