summaryrefslogtreecommitdiff
path: root/src/mesa/tnl/t_vtx_x86_gcc.S
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/tnl/t_vtx_x86_gcc.S')
-rw-r--r--src/mesa/tnl/t_vtx_x86_gcc.S124
1 files changed, 64 insertions, 60 deletions
diff --git a/src/mesa/tnl/t_vtx_x86_gcc.S b/src/mesa/tnl/t_vtx_x86_gcc.S
index 2a2e933f97..5a1adc0f33 100644
--- a/src/mesa/tnl/t_vtx_x86_gcc.S
+++ b/src/mesa/tnl/t_vtx_x86_gcc.S
@@ -36,16 +36,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
.globl x; \
x:
-#define EXTRN( x ) x
-
#else /* defined(__DJGPP__) */
#define GLOBL( x ) \
.globl _##x; \
_##x:
-#define EXTRN( x ) _##x
-
#endif /* defined(__DJGPP__) */
.data
@@ -55,17 +51,22 @@ _##x:
// macro to note current offsets, etc in a special region of the
// object file & just make everything work out neat. I don't know
// enough to do that...
-
-#define SUBST( x ) (0x10101010 + x)
-
+
+#define SUBST( x ) (0x10101010 + x)
+
// [dBorca] TODO
// Unfold functions for each vertex size?
// Build super-specialized MMX/SSE versions?
+// STDCALL woes (HAVE_NONSTANDARD_GLAPIENTRY):
+// need separate routine for the non "fv" case,
+// to clean up the stack (I guess we could codegen
+// 'ret nn' insn)! Also we need to call notify, then
+// return, instead of jump!
GLOBL ( _tnl_x86_Vertex1fv )
- movl 4(%esp), %ecx
+ movl 4(%esp), %ecx
push %edi
push %esi
movl SUBST(0), %edi // 0x0 --> tnl->vtx.vbptr
@@ -75,7 +76,7 @@ GLOBL ( _tnl_x86_Vertex1fv )
movl $SUBST(1), %ecx // 0x1 --> (tnl->vtx.vertex_size - 1)
movl $SUBST(2), %esi // 0x2 --> (tnl->vtx.vertex + 1)
repz
- movsl %ds:(%esi), %es:(%edi)
+ movsl %ds:(%esi), %es:(%edi)
movl %edi, SUBST(0) // 0x0 --> tnl->vtx.vbptr
movl SUBST(3), %edx // 0x3 --> counter
pop %esi
@@ -90,7 +91,7 @@ GLOBL ( _tnl_x86_Vertex1fv_end )
.align 4
GLOBL ( _tnl_x86_Vertex2fv )
- movl 4(%esp), %ecx
+ movl 4(%esp), %ecx
push %edi
push %esi
movl SUBST(0), %edi // load tnl->vtx.vbptr
@@ -102,7 +103,7 @@ GLOBL ( _tnl_x86_Vertex2fv )
movl $SUBST(1), %ecx // vertex_size - 2
movl $SUBST(2), %esi // tnl->vtx.vertex + 2
repz
- movsl %ds:(%esi), %es:(%edi)
+ movsl %ds:(%esi), %es:(%edi)
movl %edi, SUBST(0) // save tnl->vtx.vbptr
movl SUBST(3), %edx // load counter
pop %esi
@@ -116,7 +117,7 @@ GLOBL ( _tnl_x86_Vertex2fv_end )
.align 4
GLOBL ( _tnl_x86_Vertex3fv )
- movl 4(%esp), %ecx
+ movl 4(%esp), %ecx
push %edi
push %esi
movl SUBST(0), %edi // load tnl->vtx.vbptr
@@ -130,7 +131,7 @@ GLOBL ( _tnl_x86_Vertex3fv )
movl $SUBST(1), %ecx // vertex_size - 3
movl $SUBST(2), %esi // tnl->vtx.vertex + 3
repz
- movsl %ds:(%esi), %es:(%edi)
+ movsl %ds:(%esi), %es:(%edi)
movl %edi, SUBST(0) // save tnl->vtx.vbptr
movl SUBST(3), %edx // load counter
pop %esi
@@ -142,10 +143,10 @@ GLOBL ( _tnl_x86_Vertex3fv )
ret // return
GLOBL ( _tnl_x86_Vertex3fv_end )
-
+
.align 4
GLOBL ( _tnl_x86_Vertex4fv )
- movl 4(%esp), %ecx
+ movl 4(%esp), %ecx
push %edi
push %esi
movl SUBST(0), %edi // load tnl->vtx.vbptr
@@ -161,7 +162,7 @@ GLOBL ( _tnl_x86_Vertex4fv )
movl $SUBST(1), %ecx // vertex_size - 4
movl $SUBST(2), %esi // tnl->vtx.vertex + 3
repz
- movsl %ds:(%esi), %es:(%edi)
+ movsl %ds:(%esi), %es:(%edi)
movl %edi, SUBST(0) // save tnl->vtx.vbptr
movl SUBST(3), %edx // load counter
pop %esi
@@ -174,49 +175,49 @@ GLOBL ( _tnl_x86_Vertex4fv )
GLOBL ( _tnl_x86_Vertex4fv_end )
-
+
/**
* Generic handlers for vector format data.
*/
GLOBL( _tnl_x86_Attribute1fv)
- movl 4(%esp), %ecx
- movl (%ecx), %eax /* load v[0] */
- movl %eax, SUBST(0) /* store v[0] to current vertex */
+ movl 4(%esp), %ecx
+ movl (%ecx), %eax /* load v[0] */
+ movl %eax, SUBST(0) /* store v[0] to current vertex */
ret
GLOBL ( _tnl_x86_Attribute1fv_end )
GLOBL( _tnl_x86_Attribute2fv)
- movl 4(%esp), %ecx
- movl (%ecx), %eax /* load v[0] */
- movl 4(%ecx), %edx /* load v[1] */
- movl %eax, SUBST(0) /* store v[0] to current vertex */
- movl %edx, SUBST(1) /* store v[1] to current vertex */
+ movl 4(%esp), %ecx
+ movl (%ecx), %eax /* load v[0] */
+ movl 4(%ecx), %edx /* load v[1] */
+ movl %eax, SUBST(0) /* store v[0] to current vertex */
+ movl %edx, SUBST(1) /* store v[1] to current vertex */
ret
GLOBL ( _tnl_x86_Attribute2fv_end )
GLOBL( _tnl_x86_Attribute3fv)
- movl 4(%esp), %ecx
- movl (%ecx), %eax /* load v[0] */
- movl 4(%ecx), %edx /* load v[1] */
- movl 8(%ecx), %ecx /* load v[2] */
- movl %eax, SUBST(0) /* store v[0] to current vertex */
- movl %edx, SUBST(1) /* store v[1] to current vertex */
- movl %ecx, SUBST(2) /* store v[2] to current vertex */
+ movl 4(%esp), %ecx
+ movl (%ecx), %eax /* load v[0] */
+ movl 4(%ecx), %edx /* load v[1] */
+ movl 8(%ecx), %ecx /* load v[2] */
+ movl %eax, SUBST(0) /* store v[0] to current vertex */
+ movl %edx, SUBST(1) /* store v[1] to current vertex */
+ movl %ecx, SUBST(2) /* store v[2] to current vertex */
ret
GLOBL ( _tnl_x86_Attribute3fv_end )
GLOBL( _tnl_x86_Attribute4fv)
- movl 4(%esp), %ecx
- movl (%ecx), %eax /* load v[0] */
- movl 4(%ecx), %edx /* load v[1] */
- movl %eax, SUBST(0) /* store v[0] to current vertex */
- movl %edx, SUBST(1) /* store v[1] to current vertex */
- movl 8(%ecx), %eax /* load v[2] */
- movl 12(%ecx), %edx /* load v[3] */
- movl %eax, SUBST(2) /* store v[2] to current vertex */
- movl %edx, SUBST(3) /* store v[3] to current vertex */
+ movl 4(%esp), %ecx
+ movl (%ecx), %eax /* load v[0] */
+ movl 4(%ecx), %edx /* load v[1] */
+ movl %eax, SUBST(0) /* store v[0] to current vertex */
+ movl %edx, SUBST(1) /* store v[1] to current vertex */
+ movl 8(%ecx), %eax /* load v[2] */
+ movl 12(%ecx), %edx /* load v[3] */
+ movl %eax, SUBST(2) /* store v[2] to current vertex */
+ movl %edx, SUBST(3) /* store v[3] to current vertex */
ret
GLOBL ( _tnl_x86_Attribute4fv_end )
@@ -225,29 +226,24 @@ GLOBL ( _tnl_x86_Attribute4fv_end )
// Must generate all of these ahead of first usage. Generate at
// compile-time?
-
-// NOT CURRENTLY USED
GLOBL( _tnl_x86_choose_fv)
subl $12, %esp // gcc does 16 byte alignment of stack frames?
movl $SUBST(0), (%esp) // arg 0 - attrib
movl $SUBST(1), 4(%esp) // arg 1 - N
- call EXTRN(_do_choose) // new function returned in %eax
- add $12, %esp // tear down stack frame
- jmp *%eax // jump to new func
-GLOBL ( _tnl_x86_choosefv_end )
-
-
+ .byte 0xe8 // call ...
+ .long SUBST(2) // ... do_choose
+ add $12, %esp // tear down stack frame
+ jmp *%eax // jump to new func
+GLOBL ( _tnl_x86_choose_fv_end )
+
// FIRST LEVEL FUNCTIONS -- these are plugged directly into GL dispatch.
-
-// NOT CURRENTLY USED
-
-
+
// In the 1st level dispatch functions, switch to a different
// calling convention -- (const GLfloat *v) in %ecx.
//
@@ -256,7 +252,7 @@ GLOBL ( _tnl_x86_choosefv_end )
// back to the original caller.
-
+
// Vertex/Normal/Color, etc: the address of the function pointer
// is known at codegen time.
@@ -282,6 +278,13 @@ GLOBL( _tnl_x86_dispatch_attrfv_end )
// MultiTexcoord: the address of the function pointer must be
// calculated, but can use the index argument slot to hold 'v', and
// avoid setting up a new stack frame.
+//
+// [dBorca]
+// right, this would be the preferred approach, but gcc does not
+// clean up the stack after each function call when optimizing (-fdefer-pop);
+// can it make assumptions about what's already on the stack? I dunno,
+// but in this case, we can't mess with the caller's stack frame, and
+// we must use a model like `_x86_dispatch_attrfv' above. Caveat emptor!
// Also, will only need a maximum of four of each of these per context:
//
@@ -302,15 +305,16 @@ GLOBL( _tnl_x86_dispatch_multitexcoordfv )
sall $4, %ecx
jmp *SUBST(0)(%ecx) // 0x0 - tabfv[tex0][n]
GLOBL( _tnl_x86_dispatch_multitexcoordfv_end )
-
+
// VertexAttrib: the address of the function pointer must be
// calculated.
GLOBL( _tnl_x86_dispatch_vertexattribf )
- movl $16, %ecx
movl 4(%esp), %eax
cmpl $16, %eax
- cmovge %ecx, %eax // [dBorca] BADBAD! might not be supported
+ jb .0 // "cmovge" is not supported on all CPUs
+ movl $16, %eax
+.0:
leal 8(%esp), %ecx // calculate 'v'
movl %ecx, 4(%esp) // save in 1st arg slot
sall $4, %eax
@@ -318,13 +322,13 @@ GLOBL( _tnl_x86_dispatch_vertexattribf )
GLOBL( _tnl_x86_dispatch_vertexattribf_end )
GLOBL( _tnl_x86_dispatch_vertexattribfv )
- movl $16, %ecx
movl 4(%esp), %eax
cmpl $16, %eax
- cmovge %ecx, %eax // [dBorca] BADBAD! might not be supported
+ jb .1 // "cmovge" is not supported on all CPUs
+ movl $16, %eax
+.1:
movl 8(%esp), %ecx // load 'v'
movl %ecx, 4(%esp) // save in 1st arg slot
sall $4, %eax
jmp *SUBST(0)(%eax) // 0x0 - tabfv[0][n]
GLOBL( _tnl_x86_dispatch_vertexattribfv_end )
-