diff options
-rw-r--r-- | src/mesa/tnl/t_vtx_x86_gcc.S | 200 |
1 files changed, 100 insertions, 100 deletions
diff --git a/src/mesa/tnl/t_vtx_x86_gcc.S b/src/mesa/tnl/t_vtx_x86_gcc.S index 1d8682d5e9..bad87d3ee9 100644 --- a/src/mesa/tnl/t_vtx_x86_gcc.S +++ b/src/mesa/tnl/t_vtx_x86_gcc.S @@ -49,7 +49,7 @@ _##x: // Someone who knew a lot about this sort of thing would use this // macro to note current offsets, etc in a special region of the -// object file & just make everything work out neat. I don't know +// object file & just make everything work out neat. I do not know // enough to do that... #define SUBST( x ) (0x10101010 + x) @@ -67,27 +67,27 @@ GLOBL ( _tnl_x86_Vertex1fv ) movl 4(%esp), %ecx push %edi push %esi - movl SUBST(0), %edi // 0x0 --> tnl->vtx.vbptr - movl (%ecx), %edx // load v[0] - movl %edx, (%edi) // tnl->vtx.vbptr[0] = v[0] - addl $4, %edi // tnl->vtx.vbptr += 1 - movl $SUBST(1), %ecx // 0x1 --> (tnl->vtx.vertex_size - 1) - movl $SUBST(2), %esi // 0x2 --> (tnl->vtx.vertex + 1) + movl SUBST(0), %edi # 0x0 --> tnl->vtx.vbptr + movl (%ecx), %edx # load v[0] + movl %edx, (%edi) # tnl->vtx.vbptr[0] = v[0] + addl $4, %edi # tnl->vtx.vbptr += 1 + movl $SUBST(1), %ecx # 0x1 --> (tnl->vtx.vertex_size - 1) + movl $SUBST(2), %esi # 0x2 --> (tnl->vtx.vertex + 1) repz movsl %ds:(%esi), %es:(%edi) - movl %edi, SUBST(0) // 0x0 --> tnl->vtx.vbptr - movl SUBST(3), %edx // 0x3 --> counter + movl %edi, SUBST(0) # 0x0 --> tnl->vtx.vbptr + movl SUBST(3), %edx # 0x3 --> counter pop %esi pop %edi - dec %edx // counter-- - movl %edx, SUBST(3) // 0x3 --> counter - jne .0 // if (counter != 0) return - pushl $SUBST(4) // 0x4 --> ctx - .byte 0xe8 // call ... - .long SUBST(5) // ... _tnl_wrap_filled_vertex(ctx) + dec %edx # counter-- + movl %edx, SUBST(3) # 0x3 --> counter + jne .0 # if (counter != 0) return + pushl $SUBST(4) # 0x4 --> ctx + .byte 0xe8 # call ... + .long SUBST(5) # ... _tnl_wrap_filled_vertex(ctx) pop %eax .0: - ret // return + ret # return GLOBL ( _tnl_x86_Vertex1fv_end ) @@ -96,29 +96,29 @@ GLOBL ( _tnl_x86_Vertex2fv ) movl 4(%esp), %ecx push %edi push %esi - movl SUBST(0), %edi // load tnl->vtx.vbptr - movl (%ecx), %edx // load v[0] - movl 4(%ecx), %eax // load v[1] - movl %edx, (%edi) // tnl->vtx.vbptr[0] = v[0] - movl %eax, 4(%edi) // tnl->vtx.vbptr[1] = v[1] - addl $8, %edi // tnl->vtx.vbptr += 2 - movl $SUBST(1), %ecx // vertex_size - 2 - movl $SUBST(2), %esi // tnl->vtx.vertex + 2 + movl SUBST(0), %edi # load tnl->vtx.vbptr + movl (%ecx), %edx # load v[0] + movl 4(%ecx), %eax # load v[1] + movl %edx, (%edi) # tnl->vtx.vbptr[0] = v[0] + movl %eax, 4(%edi) # tnl->vtx.vbptr[1] = v[1] + addl $8, %edi # tnl->vtx.vbptr += 2 + movl $SUBST(1), %ecx # vertex_size - 2 + movl $SUBST(2), %esi # tnl->vtx.vertex + 2 repz movsl %ds:(%esi), %es:(%edi) - movl %edi, SUBST(0) // save tnl->vtx.vbptr - movl SUBST(3), %edx // load counter + movl %edi, SUBST(0) # save tnl->vtx.vbptr + movl SUBST(3), %edx # load counter pop %esi pop %edi - dec %edx // counter-- - movl %edx, SUBST(3) // save counter - jne .1 // if (counter != 0) return - pushl $SUBST(4) // load ctx - .byte 0xe8 // call ... - .long SUBST(5) // ... _tnl_wrap_filled_vertex(ctx) + dec %edx # counter-- + movl %edx, SUBST(3) # save counter + jne .1 # if (counter != 0) return + pushl $SUBST(4) # load ctx + .byte 0xe8 # call ... + .long SUBST(5) # ... _tnl_wrap_filled_vertex(ctx) pop %eax .1: - ret // return + ret # return GLOBL ( _tnl_x86_Vertex2fv_end ) .align 4 @@ -126,31 +126,31 @@ GLOBL ( _tnl_x86_Vertex3fv ) movl 4(%esp), %ecx push %edi push %esi - movl SUBST(0), %edi // load tnl->vtx.vbptr - movl (%ecx), %edx // load v[0] - movl 4(%ecx), %eax // load v[1] - movl 8(%ecx), %esi // load v[2] - movl %edx, (%edi) // tnl->vtx.vbptr[0] = v[0] - movl %eax, 4(%edi) // tnl->vtx.vbptr[1] = v[1] - movl %esi, 8(%edi) // tnl->vtx.vbptr[2] = v[2] - addl $12, %edi // tnl->vtx.vbptr += 3 - movl $SUBST(1), %ecx // vertex_size - 3 - movl $SUBST(2), %esi // tnl->vtx.vertex + 3 + movl SUBST(0), %edi # load tnl->vtx.vbptr + movl (%ecx), %edx # load v[0] + movl 4(%ecx), %eax # load v[1] + movl 8(%ecx), %esi # load v[2] + movl %edx, (%edi) # tnl->vtx.vbptr[0] = v[0] + movl %eax, 4(%edi) # tnl->vtx.vbptr[1] = v[1] + movl %esi, 8(%edi) # tnl->vtx.vbptr[2] = v[2] + addl $12, %edi # tnl->vtx.vbptr += 3 + movl $SUBST(1), %ecx # vertex_size - 3 + movl $SUBST(2), %esi # tnl->vtx.vertex + 3 repz movsl %ds:(%esi), %es:(%edi) - movl %edi, SUBST(0) // save tnl->vtx.vbptr - movl SUBST(3), %edx // load counter + movl %edi, SUBST(0) # save tnl->vtx.vbptr + movl SUBST(3), %edx # load counter pop %esi pop %edi - dec %edx // counter-- - movl %edx, SUBST(3) // save counter - jne .2 // if (counter != 0) return - pushl $SUBST(4) // load ctx - .byte 0xe8 // call ... - .long SUBST(5) // ... _tnl_wrap_filled_vertex(ctx) + dec %edx # counter-- + movl %edx, SUBST(3) # save counter + jne .2 # if (counter != 0) return + pushl $SUBST(4) # load ctx + .byte 0xe8 # call ... + .long SUBST(5) # ... _tnl_wrap_filled_vertex(ctx) pop %eax .2: - ret // return + ret # return GLOBL ( _tnl_x86_Vertex3fv_end ) @@ -159,33 +159,33 @@ GLOBL ( _tnl_x86_Vertex4fv ) movl 4(%esp), %ecx push %edi push %esi - movl SUBST(0), %edi // load tnl->vtx.vbptr - movl (%ecx), %edx // load v[0] - movl 4(%ecx), %eax // load v[1] - movl 8(%ecx), %esi // load v[2] - movl 12(%ecx), %ecx // load v[3] - movl %edx, (%edi) // tnl->vtx.vbptr[0] = v[0] - movl %eax, 4(%edi) // tnl->vtx.vbptr[1] = v[1] - movl %esi, 8(%edi) // tnl->vtx.vbptr[2] = v[2] - movl %ecx, 12(%edi) // tnl->vtx.vbptr[3] = v[3] - addl $16, %edi // tnl->vtx.vbptr += 4 - movl $SUBST(1), %ecx // vertex_size - 4 - movl $SUBST(2), %esi // tnl->vtx.vertex + 3 + movl SUBST(0), %edi # load tnl->vtx.vbptr + movl (%ecx), %edx # load v[0] + movl 4(%ecx), %eax # load v[1] + movl 8(%ecx), %esi # load v[2] + movl 12(%ecx), %ecx # load v[3] + movl %edx, (%edi) # tnl->vtx.vbptr[0] = v[0] + movl %eax, 4(%edi) # tnl->vtx.vbptr[1] = v[1] + movl %esi, 8(%edi) # tnl->vtx.vbptr[2] = v[2] + movl %ecx, 12(%edi) # tnl->vtx.vbptr[3] = v[3] + addl $16, %edi # tnl->vtx.vbptr += 4 + movl $SUBST(1), %ecx # vertex_size - 4 + movl $SUBST(2), %esi # tnl->vtx.vertex + 3 repz movsl %ds:(%esi), %es:(%edi) - movl %edi, SUBST(0) // save tnl->vtx.vbptr - movl SUBST(3), %edx // load counter + movl %edi, SUBST(0) # save tnl->vtx.vbptr + movl SUBST(3), %edx # load counter pop %esi pop %edi - dec %edx // counter-- - movl %edx, SUBST(3) // save counter - jne .3 // if (counter != 0) return - pushl $SUBST(4) // load ctx - .byte 0xe8 // call ... - .long SUBST(5) // ... _tnl_wrap_filled_vertex(ctx) + dec %edx # counter-- + movl %edx, SUBST(3) # save counter + jne .3 # if (counter != 0) return + pushl $SUBST(4) # load ctx + .byte 0xe8 # call ... + .long SUBST(5) # ... _tnl_wrap_filled_vertex(ctx) pop %eax .3: - ret // return + ret # return GLOBL ( _tnl_x86_Vertex4fv_end ) @@ -243,13 +243,13 @@ GLOBL ( _tnl_x86_Attribute4fv_end ) GLOBL( _tnl_x86_choose_fv) - subl $12, %esp // gcc does 16 byte alignment of stack frames? - movl $SUBST(0), (%esp) // arg 0 - attrib - movl $SUBST(1), 4(%esp) // arg 1 - N - .byte 0xe8 // call ... - .long SUBST(2) // ... do_choose - add $12, %esp // tear down stack frame - jmp *%eax // jump to new func + subl $12, %esp # gcc does 16 byte alignment of stack frames? + movl $SUBST(0), (%esp) # arg 0 - attrib + movl $SUBST(1), 4(%esp) # arg 1 - N + .byte 0xe8 # call ... + .long SUBST(2) # ... do_choose + add $12, %esp # tear down stack frame + jmp *%eax # jump to new func GLOBL ( _tnl_x86_choose_fv_end ) @@ -261,7 +261,7 @@ GLOBL ( _tnl_x86_choose_fv_end ) // In the 1st level dispatch functions, switch to a different // calling convention -- (const GLfloat *v) in %ecx. // -// As with regular (x86) dispatch, don't create a new stack frame - +// As with regular (x86) dispatch, do not create a new stack frame - // just let the 'ret' in the dispatched function return straight // back to the original caller. @@ -274,18 +274,18 @@ GLOBL ( _tnl_x86_choose_fv_end ) // Unfortunately, have to play with the stack in the non-fv case: // GLOBL( _tnl_x86_dispatch_attrf ) - subl $12, %esp // gcc does 16 byte alignment of stack frames? - leal 16(%esp), %edx // address of first float on stack - movl %edx, (%esp) // save as 'v' - call *SUBST(0) // 0x0 --> tabfv[attr][n] - addl $12, %esp // tear down frame - ret // return + subl $12, %esp # gcc does 16 byte alignment of stack frames? + leal 16(%esp), %edx # address of first float on stack + movl %edx, (%esp) # save as 'v' + call *SUBST(0) # 0x0 --> tabfv[attr][n] + addl $12, %esp # tear down frame + ret # return GLOBL( _tnl_x86_dispatch_attrf_end ) // The fv case is simpler: // GLOBL( _tnl_x86_dispatch_attrfv ) - jmp *SUBST(0) // 0x0 --> tabfv[attr][n] + jmp *SUBST(0) # 0x0 --> tabfv[attr][n] GLOBL( _tnl_x86_dispatch_attrfv_end ) @@ -296,9 +296,9 @@ GLOBL( _tnl_x86_dispatch_attrfv_end ) // [dBorca] // right, this would be the preferred approach, but gcc does not // clean up the stack after each function call when optimizing (-fdefer-pop); -// can it make assumptions about what's already on the stack? I dunno, +// can it make assumptions about what is already on the stack? I dunno, // but in this case, we can't mess with the caller's stack frame, and -// we must use a model like `_x86_dispatch_attrfv' above. Caveat emptor! +// we must use a model like '_x86_dispatch_attrfv' above. Caveat emptor! // Also, will only need a maximum of four of each of these per context: // @@ -308,7 +308,7 @@ GLOBL( _tnl_x86_dispatch_multitexcoordf ) andl $7, %ecx movl %edx, 4(%esp) sall $4, %ecx - jmp *SUBST(0)(%ecx) // 0x0 - tabfv[tex0][n] + jmp *SUBST(0)(%ecx) # 0x0 - tabfv[tex0][n] GLOBL( _tnl_x86_dispatch_multitexcoordf_end ) GLOBL( _tnl_x86_dispatch_multitexcoordfv ) @@ -317,7 +317,7 @@ GLOBL( _tnl_x86_dispatch_multitexcoordfv ) andl $7, %ecx movl %edx, 4(%esp) sall $4, %ecx - jmp *SUBST(0)(%ecx) // 0x0 - tabfv[tex0][n] + jmp *SUBST(0)(%ecx) # 0x0 - tabfv[tex0][n] GLOBL( _tnl_x86_dispatch_multitexcoordfv_end ) // VertexAttrib: the address of the function pointer must be @@ -326,23 +326,23 @@ GLOBL( _tnl_x86_dispatch_multitexcoordfv_end ) GLOBL( _tnl_x86_dispatch_vertexattribf ) movl 4(%esp), %eax cmpl $16, %eax - jb .8 // "cmovge" is not supported on all CPUs + jb .8 # "cmovge" is not supported on all CPUs movl $16, %eax .8: - leal 8(%esp), %ecx // calculate 'v' - movl %ecx, 4(%esp) // save in 1st arg slot + leal 8(%esp), %ecx # calculate 'v' + movl %ecx, 4(%esp) # save in 1st arg slot sall $4, %eax - jmp *SUBST(0)(%eax) // 0x0 - tabfv[0][n] + jmp *SUBST(0)(%eax) # 0x0 - tabfv[0][n] GLOBL( _tnl_x86_dispatch_vertexattribf_end ) GLOBL( _tnl_x86_dispatch_vertexattribfv ) movl 4(%esp), %eax cmpl $16, %eax - jb .9 // "cmovge" is not supported on all CPUs + jb .9 # "cmovge" is not supported on all CPUs movl $16, %eax .9: - movl 8(%esp), %ecx // load 'v' - movl %ecx, 4(%esp) // save in 1st arg slot + movl 8(%esp), %ecx # load 'v' + movl %ecx, 4(%esp) # save in 1st arg slot sall $4, %eax - jmp *SUBST(0)(%eax) // 0x0 - tabfv[0][n] + jmp *SUBST(0)(%eax) # 0x0 - tabfv[0][n] GLOBL( _tnl_x86_dispatch_vertexattribfv_end ) |