diff options
author | Keith Whitwell <keith@tungstengraphics.com> | 2005-06-07 12:44:26 +0000 |
---|---|---|
committer | Keith Whitwell <keith@tungstengraphics.com> | 2005-06-07 12:44:26 +0000 |
commit | 461a2a799a99a8c8aba7e09c96d7c2e5c3196dcf (patch) | |
tree | 44aeed42bc90c9b7ecbba111a15c320c752a4491 /src/mesa/x86/rtasm/x86sse.h | |
parent | 757e0855adb1b1eb45b55e1fcf6acb47224b2853 (diff) |
New files - split off runtime assembly functions from
t_vertex_sse.c.
Diffstat (limited to 'src/mesa/x86/rtasm/x86sse.h')
-rw-r--r-- | src/mesa/x86/rtasm/x86sse.h | 257 |
1 files changed, 257 insertions, 0 deletions
diff --git a/src/mesa/x86/rtasm/x86sse.h b/src/mesa/x86/rtasm/x86sse.h new file mode 100644 index 0000000000..8d48e35647 --- /dev/null +++ b/src/mesa/x86/rtasm/x86sse.h @@ -0,0 +1,257 @@ + +#ifndef _X86SSE_H_ +#define _X86SSE_H_ + +#if defined(USE_X86_ASM) + +#include "glheader.h" + +/* It is up to the caller to ensure that instructions issued are + * suitable for the host cpu. There are no checks made in this module + * for mmx/sse/sse2 support on the cpu. + */ +struct x86_reg { + GLuint file:3; + GLuint idx:3; + GLuint mod:2; /* mod_REG if this is just a register */ + GLint disp:24; /* only +/- 23bits of offset - should be enough... */ +}; + +struct x86_function { + GLubyte *store; + GLubyte *csr; + GLuint stack_offset; + GLint need_emms; +}; + +enum x86_reg_file { + file_REG32, + file_MMX, + file_XMM +}; + +/* Values for mod field of modr/m byte + */ +enum x86_reg_mod { + mod_INDIRECT, + mod_DISP8, + mod_DISP32, + mod_REG +}; + +enum x86_reg_name { + reg_AX, + reg_CX, + reg_DX, + reg_BX, + reg_SP, + reg_BP, + reg_SI, + reg_DI +}; + + +enum x86_cc { + cc_O, /* overflow */ + cc_NO, /* not overflow */ + cc_NAE, /* not above or equal / carry */ + cc_AE, /* above or equal / not carry */ + cc_E, /* equal / zero */ + cc_NE /* not equal / not zero */ +}; + +#define cc_Z cc_E +#define cc_NZ cc_NE + +/* Begin/end/retreive function creation: + */ + + +void x86_init_func( struct x86_function *p ); +void x86_release_func( struct x86_function *p ); +void (*x86_get_func( struct x86_function *p ))( void ); + + + +/* Create and manipulate registers and regmem values: + */ +struct x86_reg x86_make_reg( enum x86_reg_file file, + enum x86_reg_name idx ); + +struct x86_reg x86_make_disp( struct x86_reg reg, + GLint disp ); + +struct x86_reg x86_deref( struct x86_reg reg ); + +struct x86_reg x86_get_base_reg( struct x86_reg reg ); + + + + +/* Labels, jumps and fixup: + */ +GLubyte *x86_get_label( struct x86_function *p ); + +void x86_jcc( struct x86_function *p, + enum x86_cc cc, + GLubyte *label ); + +/* Always use a 32bit offset for forward jumps: + */ +GLubyte *x86_jcc_forward( struct x86_function *p, + enum x86_cc cc ); + +/* Fixup offset from forward jump: + */ +void x86_fixup_fwd_jump( struct x86_function *p, + GLubyte *fixup ); + +void x86_push( struct x86_function *p, + struct x86_reg reg ); + +void x86_pop( struct x86_function *p, + struct x86_reg reg ); + +void x86_inc( struct x86_function *p, + struct x86_reg reg ); + +void x86_dec( struct x86_function *p, + struct x86_reg reg ); + +void x86_ret( struct x86_function *p ); + +void mmx_emms( struct x86_function *p ); + +void x86_mov( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ); + +void x86_xor( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ); + +void x86_cmp( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ); + +void sse2_movd( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ); + +void mmx_movd( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ); + +void mmx_movq( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ); + +void sse_movss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ); + +void sse_movaps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ); + +void sse_movups( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ); + +void sse_movhps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ); + +void sse_movlps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ); + +/* SSE operations often only have one format, with dest constrained to + * be a register: + */ +void sse_mulps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ); + +void sse_addps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ); + +void sse_movhlps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ); + +void sse_movlhps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ); + +void sse2_cvtps2dq( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ); + +void sse2_packssdw( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ); + +void sse2_packsswb( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ); + +void sse2_packuswb( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ); + +void sse_cvtps2pi( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ); + +void mmx_packssdw( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ); + +void mmx_packuswb( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ); + + +/* Load effective address: + */ +void x86_lea( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ); + +void x86_test( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ); + +/* Perform a reduced swizzle in a single sse instruction: + */ +void sse2_pshufd( struct x86_function *p, + struct x86_reg dest, + struct x86_reg arg0, + GLubyte x, + GLubyte y, + GLubyte z, + GLubyte w ); + + +/* Shufps can also be used to implement a reduced swizzle when dest == + * arg0. + */ +void sse_shufps( struct x86_function *p, + struct x86_reg dest, + struct x86_reg arg0, + GLubyte x, + GLubyte y, + GLubyte z, + GLubyte w ); + + +/* Retreive a reference to one of the function arguments, taking into + * account any push/pop activity: + */ +struct x86_reg x86_fn_arg( struct x86_function *p, + GLuint arg ); + +#endif +#endif |