summaryrefslogtreecommitdiff
path: root/src/mesa/sparc
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/sparc')
-rw-r--r--src/mesa/sparc/clip.S233
-rw-r--r--src/mesa/sparc/norm.S605
-rw-r--r--src/mesa/sparc/sparc.c142
-rw-r--r--src/mesa/sparc/sparc.h36
-rw-r--r--src/mesa/sparc/sparc_matrix.h170
-rw-r--r--src/mesa/sparc/xform.S1392
6 files changed, 2578 insertions, 0 deletions
diff --git a/src/mesa/sparc/clip.S b/src/mesa/sparc/clip.S
new file mode 100644
index 0000000000..dc239171ff
--- /dev/null
+++ b/src/mesa/sparc/clip.S
@@ -0,0 +1,233 @@
+/*
+ * Clip testing in SPARC assembly
+ */
+
+#if __arch64__
+#define LDPTR ldx
+#define V4F_DATA 0x00
+#define V4F_START 0x08
+#define V4F_COUNT 0x10
+#define V4F_STRIDE 0x14
+#define V4F_SIZE 0x18
+#define V4F_FLAGS 0x1c
+#else
+#define LDPTR ld
+#define V4F_DATA 0x00
+#define V4F_START 0x04
+#define V4F_COUNT 0x08
+#define V4F_STRIDE 0x0c
+#define V4F_SIZE 0x10
+#define V4F_FLAGS 0x14
+#endif
+
+#define VEC_SIZE_1 1
+#define VEC_SIZE_2 3
+#define VEC_SIZE_3 7
+#define VEC_SIZE_4 15
+
+ .register %g2, #scratch
+ .register %g3, #scratch
+
+ .text
+ .align 64
+
+one_dot_zero:
+ .word 0x3f800000 /* 1.0f */
+
+ /* This trick is shamelessly stolen from the x86
+ * Mesa asm. Very clever, and we can do it too
+ * since we have the necessary add with carry
+ * instructions on Sparc.
+ */
+clip_table:
+ .byte 0, 1, 0, 2, 4, 5, 4, 6
+ .byte 0, 1, 0, 2, 8, 9, 8, 10
+ .byte 32, 33, 32, 34, 36, 37, 36, 38
+ .byte 32, 33, 32, 34, 40, 41, 40, 42
+ .byte 0, 1, 0, 2, 4, 5, 4, 6
+ .byte 0, 1, 0, 2, 8, 9, 8, 10
+ .byte 16, 17, 16, 18, 20, 21, 20, 22
+ .byte 16, 17, 16, 18, 24, 25, 24, 26
+ .byte 63, 61, 63, 62, 55, 53, 55, 54
+ .byte 63, 61, 63, 62, 59, 57, 59, 58
+ .byte 47, 45, 47, 46, 39, 37, 39, 38
+ .byte 47, 45, 47, 46, 43, 41, 43, 42
+ .byte 63, 61, 63, 62, 55, 53, 55, 54
+ .byte 63, 61, 63, 62, 59, 57, 59, 58
+ .byte 31, 29, 31, 30, 23, 21, 23, 22
+ .byte 31, 29, 31, 30, 27, 25, 27, 26
+
+/* GLvector4f *clip_vec, GLvector4f *proj_vec,
+ GLubyte clipMask[], GLubyte *orMask, GLubyte *andMask,
+ GLboolean viewport_z_enable */
+
+ .align 64
+__pc_tramp:
+ retl
+ nop
+
+ .globl _mesa_sparc_cliptest_points4
+_mesa_sparc_cliptest_points4:
+ save %sp, -64, %sp
+ call __pc_tramp
+ sub %o7, (. - one_dot_zero - 4), %g1
+ ld [%g1 + 0x0], %f4
+ add %g1, 0x4, %g1
+
+ ld [%i0 + V4F_STRIDE], %l1
+ ld [%i0 + V4F_COUNT], %l3
+ LDPTR [%i0 + V4F_START], %i0
+ LDPTR [%i1 + V4F_START], %i5
+ ldub [%i3], %g2
+ ldub [%i4], %g3
+ sll %g3, 8, %g3
+ or %g2, %g3, %g2
+
+ ld [%i1 + V4F_FLAGS], %g3
+ or %g3, VEC_SIZE_4, %g3
+ st %g3, [%i1 + V4F_FLAGS]
+ mov 3, %g3
+ st %g3, [%i1 + V4F_SIZE]
+ st %l3, [%i1 + V4F_COUNT]
+ clr %l2
+ clr %l0
+
+ /* l0: i
+ * l3: count
+ * l1: stride
+ * l2: c
+ * g2: (tmpAndMask << 8) | tmpOrMask
+ * g1: clip_table
+ * i0: from[stride][i]
+ * i2: clipMask
+ * i5: vProj[4][i]
+ */
+
+1: ld [%i0 + 0x0c], %f3 ! LSU Group
+ ld [%i0 + 0x0c], %g5 ! LSU Group
+ ld [%i0 + 0x08], %g4 ! LSU Group
+ fdivs %f4, %f3, %f8 ! FGM
+ addcc %g5, %g5, %g5 ! IEU1 Group
+ addx %g0, 0x0, %g3 ! IEU1 Group
+ addcc %g4, %g4, %g4 ! IEU1 Group
+ addx %g3, %g3, %g3 ! IEU1 Group
+ subcc %g5, %g4, %g0 ! IEU1 Group
+ ld [%i0 + 0x04], %g4 ! LSU Group
+ addx %g3, %g3, %g3 ! IEU1 Group
+ addcc %g4, %g4, %g4 ! IEU1 Group
+ addx %g3, %g3, %g3 ! IEU1 Group
+ subcc %g5, %g4, %g0 ! IEU1 Group
+ ld [%i0 + 0x00], %g4 ! LSU Group
+ addx %g3, %g3, %g3 ! IEU1 Group
+ addcc %g4, %g4, %g4 ! IEU1 Group
+ addx %g3, %g3, %g3 ! IEU1 Group
+ subcc %g5, %g4, %g0 ! IEU1 Group
+ addx %g3, %g3, %g3 ! IEU1 Group
+ ldub [%g1 + %g3], %g3 ! LSU Group
+ cmp %g3, 0 ! IEU1 Group, stall
+ be 2f ! CTI
+ stb %g3, [%i2] ! LSU
+ sll %g3, 8, %g4 ! IEU1 Group
+ add %l2, 1, %l2 ! IEU0
+ st %g0, [%i5 + 0x00] ! LSU
+ or %g4, 0xff, %g4 ! IEU0 Group
+ or %g2, %g3, %g2 ! IEU1
+ st %g0, [%i5 + 0x04] ! LSU
+ and %g2, %g4, %g2 ! IEU0 Group
+ st %g0, [%i5 + 0x08] ! LSU
+ b 3f ! CTI
+ st %f4, [%i5 + 0x0c] ! LSU Group
+2: ld [%i0 + 0x00], %f0 ! LSU Group
+ ld [%i0 + 0x04], %f1 ! LSU Group
+ ld [%i0 + 0x08], %f2 ! LSU Group
+ fmuls %f0, %f8, %f0 ! FGM
+ st %f0, [%i5 + 0x00] ! LSU Group
+ fmuls %f1, %f8, %f1 ! FGM
+ st %f1, [%i5 + 0x04] ! LSU Group
+ fmuls %f2, %f8, %f2 ! FGM
+ st %f2, [%i5 + 0x08] ! LSU Group
+ st %f8, [%i5 + 0x0c] ! LSU Group
+3: add %i5, 0x10, %i5 ! IEU1
+ add %l0, 1, %l0 ! IEU0 Group
+ add %i2, 1, %i2 ! IEU0 Group
+ cmp %l0, %l3 ! IEU1 Group
+ bne 1b ! CTI
+ add %i0, %l1, %i0 ! IEU0 Group
+ stb %g2, [%i3] ! LSU
+ srl %g2, 8, %g3 ! IEU0 Group
+ cmp %l2, %l3 ! IEU1 Group
+ bl,a 1f ! CTI
+ clr %g3 ! IEU0
+1: stb %g3, [%i4] ! LSU Group
+ ret ! CTI Group
+ restore %i1, 0x0, %o0
+
+ .globl _mesa_sparc_cliptest_points4_np
+_mesa_sparc_cliptest_points4_np:
+ save %sp, -64, %sp
+
+ call __pc_tramp
+ sub %o7, (. - one_dot_zero - 4), %g1
+ add %g1, 0x4, %g1
+
+ ld [%i0 + V4F_STRIDE], %l1
+ ld [%i0 + V4F_COUNT], %l3
+ LDPTR [%i0 + V4F_START], %i0
+ ldub [%i3], %g2
+ ldub [%i4], %g3
+ sll %g3, 8, %g3
+ or %g2, %g3, %g2
+
+ clr %l2
+ clr %l0
+
+ /* l0: i
+ * l3: count
+ * l1: stride
+ * l2: c
+ * g2: (tmpAndMask << 8) | tmpOrMask
+ * g1: clip_table
+ * i0: from[stride][i]
+ * i2: clipMask
+ */
+
+1: ld [%i0 + 0x0c], %g5 ! LSU Group
+ ld [%i0 + 0x08], %g4 ! LSU Group
+ addcc %g5, %g5, %g5 ! IEU1 Group
+ addx %g0, 0x0, %g3 ! IEU1 Group
+ addcc %g4, %g4, %g4 ! IEU1 Group
+ addx %g3, %g3, %g3 ! IEU1 Group
+ subcc %g5, %g4, %g0 ! IEU1 Group
+ ld [%i0 + 0x04], %g4 ! LSU Group
+ addx %g3, %g3, %g3 ! IEU1 Group
+ addcc %g4, %g4, %g4 ! IEU1 Group
+ addx %g3, %g3, %g3 ! IEU1 Group
+ subcc %g5, %g4, %g0 ! IEU1 Group
+ ld [%i0 + 0x00], %g4 ! LSU Group
+ addx %g3, %g3, %g3 ! IEU1 Group
+ addcc %g4, %g4, %g4 ! IEU1 Group
+ addx %g3, %g3, %g3 ! IEU1 Group
+ subcc %g5, %g4, %g0 ! IEU1 Group
+ addx %g3, %g3, %g3 ! IEU1 Group
+ ldub [%g1 + %g3], %g3 ! LSU Group
+ cmp %g3, 0 ! IEU1 Group, stall
+ be 2f ! CTI
+ stb %g3, [%i2] ! LSU
+ sll %g3, 8, %g4 ! IEU1 Group
+ add %l2, 1, %l2 ! IEU0
+ or %g4, 0xff, %g4 ! IEU0 Group
+ or %g2, %g3, %g2 ! IEU1
+ and %g2, %g4, %g2 ! IEU0 Group
+2: add %l0, 1, %l0 ! IEU0 Group
+ add %i2, 1, %i2 ! IEU0 Group
+ cmp %l0, %l3 ! IEU1 Group
+ bne 1b ! CTI
+ add %i0, %l1, %i0 ! IEU0 Group
+ stb %g2, [%i3] ! LSU
+ srl %g2, 8, %g3 ! IEU0 Group
+ cmp %l2, %l3 ! IEU1 Group
+ bl,a 1f ! CTI
+ clr %g3 ! IEU0
+1: stb %g3, [%i4] ! LSU Group
+ ret ! CTI Group
+ restore %i1, 0x0, %o0
diff --git a/src/mesa/sparc/norm.S b/src/mesa/sparc/norm.S
new file mode 100644
index 0000000000..117d36fa22
--- /dev/null
+++ b/src/mesa/sparc/norm.S
@@ -0,0 +1,605 @@
+
+#include "sparc_matrix.h"
+
+ .register %g2, #scratch
+ .register %g3, #scratch
+
+ .text
+
+#ifdef __arch64__
+#define STACK_VAR_OFF (2047 + (8 * 16))
+#else
+#define STACK_VAR_OFF (4 * 16)
+#endif
+
+ /* Newton-Raphson approximation turns out to be slower
+ * (and less accurate) than direct fsqrts/fdivs.
+ */
+#define ONE_DOT_ZERO 0x3f800000
+
+ .globl _mesa_sparc_transform_normalize_normals
+_mesa_sparc_transform_normalize_normals:
+ /* o0=mat o1=scale o2=in o3=lengths o4=dest */
+
+ sethi %hi(ONE_DOT_ZERO), %g2
+ sub %sp, 16, %sp
+ st %g2, [%sp + STACK_VAR_OFF+0x0]
+ st %o1, [%sp + STACK_VAR_OFF+0x4]
+ ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f
+ ld [%sp + STACK_VAR_OFF+0x4], %f15 ! f15 = scale
+ add %sp, 16, %sp
+
+ LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
+ LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
+ ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
+ ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
+ LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
+
+ LDMATRIX_0_1_2_4_5_6_8_9_10(%o0)
+
+ /* dest->count = in->count */
+ st %g1, [%o4 + V4F_COUNT]
+
+ cmp %g1, 1
+ bl 7f
+ cmp %o3, 0
+ bne 4f
+ clr %o4 ! 'i' for STRIDE_LOOP
+
+1: /* LENGTHS == NULL */
+ ld [%o5 + 0x00], %f0 ! ux = from[0]
+ ld [%o5 + 0x04], %f1 ! uy = from[1]
+ ld [%o5 + 0x08], %f2 ! uz = from[2]
+ add %o5, %g2, %o5 ! STRIDE_F(from, stride)
+ add %o4, 1, %o4 ! i++
+
+ /* tx (f3) = (ux * m0) + (uy * m1) + (uz * m2)
+ * ty (f5) = (ux * m4) + (uy * m5) + (uz * m6)
+ * tz (f7) = (ux * m8) + (uy * m9) + (uz * m10)
+ */
+ fmuls %f0, M0, %f3 ! FGM Group
+ fmuls %f1, M1, %f4 ! FGM Group
+ fmuls %f0, M4, %f5 ! FGM Group
+ fmuls %f1, M5, %f6 ! FGM Group
+ fmuls %f0, M8, %f7 ! FGM Group f3 available
+ fmuls %f1, M9, %f8 ! FGM Group f4 available
+ fadds %f3, %f4, %f3 ! FGA
+ fmuls %f2, M2, %f10 ! FGM Group f5 available
+ fmuls %f2, M6, %f0 ! FGM Group f6 available
+ fadds %f5, %f6, %f5 ! FGA
+ fmuls %f2, M10, %f4 ! FGM Group f7 available
+ fadds %f7, %f8, %f7 ! FGA Group f8,f3 available
+ fadds %f3, %f10, %f3 ! FGA Group f10 available
+ fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available
+ fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available
+
+ /* f3=tx, f5=ty, f7=tz */
+
+ /* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */
+ fmuls %f3, %f3, %f6 ! FGM Group f3 available
+ fmuls %f5, %f5, %f8 ! FGM Group f5 available
+ fmuls %f7, %f7, %f10 ! FGM Group f7 available
+ fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available
+ fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available
+
+ /* scale (f6) = 1.0 / sqrt(len) */
+ fsqrts %f6, %f6 ! FDIV 20 cycles
+ fdivs %f12, %f6, %f6 ! FDIV 14 cycles
+
+ fmuls %f3, %f6, %f3
+ st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale
+ fmuls %f5, %f6, %f5
+ st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale
+ fmuls %f7, %f6, %f7
+ st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale
+
+ cmp %o4, %g1 ! continue if (i < count)
+ bl 1b
+ add %g3, 0x10, %g3 ! advance out vector pointer
+
+ ba 7f
+ nop
+
+4: /* LENGTHS != NULL */
+ fmuls M0, %f15, M0
+ fmuls M1, %f15, M1
+ fmuls M2, %f15, M2
+ fmuls M4, %f15, M4
+ fmuls M5, %f15, M5
+ fmuls M6, %f15, M6
+ fmuls M8, %f15, M8
+ fmuls M9, %f15, M9
+ fmuls M10, %f15, M10
+
+5:
+ ld [%o5 + 0x00], %f0 ! ux = from[0]
+ ld [%o5 + 0x04], %f1 ! uy = from[1]
+ ld [%o5 + 0x08], %f2 ! uz = from[2]
+ add %o5, %g2, %o5 ! STRIDE_F(from, stride)
+ add %o4, 1, %o4 ! i++
+
+ /* tx (f3) = (ux * m0) + (uy * m1) + (uz * m2)
+ * ty (f5) = (ux * m4) + (uy * m5) + (uz * m6)
+ * tz (f7) = (ux * m8) + (uy * m9) + (uz * m10)
+ */
+ fmuls %f0, M0, %f3 ! FGM Group
+ fmuls %f1, M1, %f4 ! FGM Group
+ fmuls %f0, M4, %f5 ! FGM Group
+ fmuls %f1, M5, %f6 ! FGM Group
+ fmuls %f0, M8, %f7 ! FGM Group f3 available
+ fmuls %f1, M9, %f8 ! FGM Group f4 available
+ fadds %f3, %f4, %f3 ! FGA
+ fmuls %f2, M2, %f10 ! FGM Group f5 available
+ fmuls %f2, M6, %f0 ! FGM Group f6 available
+ fadds %f5, %f6, %f5 ! FGA
+ fmuls %f2, M10, %f4 ! FGM Group f7 available
+ fadds %f7, %f8, %f7 ! FGA Group f8,f3 available
+ fadds %f3, %f10, %f3 ! FGA Group f10 available
+ ld [%o3], %f13 ! LSU
+ fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available
+ add %o3, 4, %o3 ! IEU0
+ fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available
+
+ /* f3=tx, f5=ty, f7=tz, f13=lengths[i] */
+
+ fmuls %f3, %f13, %f3
+ st %f3, [%g3 + 0x00] ! out[i][0] = tx * len
+ fmuls %f5, %f13, %f5
+ st %f5, [%g3 + 0x04] ! out[i][1] = ty * len
+ fmuls %f7, %f13, %f7
+ st %f7, [%g3 + 0x08] ! out[i][2] = tz * len
+
+ cmp %o4, %g1 ! continue if (i < count)
+ bl 5b
+ add %g3, 0x10, %g3 ! advance out vector pointer
+
+7: retl
+ nop
+
+ .globl _mesa_sparc_transform_normalize_normals_no_rot
+_mesa_sparc_transform_normalize_normals_no_rot:
+ /* o0=mat o1=scale o2=in o3=lengths o4=dest */
+
+ sethi %hi(ONE_DOT_ZERO), %g2
+ sub %sp, 16, %sp
+ st %g2, [%sp + STACK_VAR_OFF+0x0]
+ st %o1, [%sp + STACK_VAR_OFF+0x4]
+ ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f
+ ld [%sp + STACK_VAR_OFF+0x4], %f15 ! f15 = scale
+ add %sp, 16, %sp
+
+ LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
+ LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
+ ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
+ ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
+ LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
+
+ LDMATRIX_0_5_10(%o0)
+
+ /* dest->count = in->count */
+ st %g1, [%o4 + V4F_COUNT]
+
+ cmp %g1, 1
+ bl 7f
+ cmp %o3, 0
+ bne 4f
+ clr %o4 ! 'i' for STRIDE_LOOP
+
+1: /* LENGTHS == NULL */
+ ld [%o5 + 0x00], %f0 ! ux = from[0]
+ ld [%o5 + 0x04], %f1 ! uy = from[1]
+ ld [%o5 + 0x08], %f2 ! uz = from[2]
+ add %o5, %g2, %o5 ! STRIDE_F(from, stride)
+ add %o4, 1, %o4 ! i++
+
+ /* tx (f3) = (ux * m0)
+ * ty (f5) = (uy * m5)
+ * tz (f7) = (uz * m10)
+ */
+ fmuls %f0, M0, %f3 ! FGM Group
+ fmuls %f1, M5, %f5 ! FGM Group
+ fmuls %f2, M10, %f7 ! FGM Group
+
+ /* f3=tx, f5=ty, f7=tz */
+
+ /* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */
+ fmuls %f3, %f3, %f6 ! FGM Group stall, f3 available
+ fmuls %f5, %f5, %f8 ! FGM Group f5 available
+ fmuls %f7, %f7, %f10 ! FGM Group f7 available
+ fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available
+ fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available
+
+ /* scale (f6) = 1.0 / sqrt(len) */
+ fsqrts %f6, %f6 ! FDIV 20 cycles
+ fdivs %f12, %f6, %f6 ! FDIV 14 cycles
+
+ fmuls %f3, %f6, %f3
+ st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale
+ fmuls %f5, %f6, %f5
+ st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale
+ fmuls %f7, %f6, %f7
+ st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale
+
+ cmp %o4, %g1 ! continue if (i < count)
+ bl 1b
+ add %g3, 0x10, %g3 ! advance out vector pointer
+
+ ba 7f
+ nop
+
+4: /* LENGTHS != NULL */
+ fmuls M0, %f15, M0
+ fmuls M5, %f15, M5
+ fmuls M10, %f15, M10
+
+5:
+ ld [%o5 + 0x00], %f0 ! ux = from[0]
+ ld [%o5 + 0x04], %f1 ! uy = from[1]
+ ld [%o5 + 0x08], %f2 ! uz = from[2]
+ add %o5, %g2, %o5 ! STRIDE_F(from, stride)
+ add %o4, 1, %o4 ! i++
+
+ /* tx (f3) = (ux * m0)
+ * ty (f5) = (uy * m5)
+ * tz (f7) = (uz * m10)
+ */
+ fmuls %f0, M0, %f3 ! FGM Group
+ ld [%o3], %f13 ! LSU
+ fmuls %f1, M5, %f5 ! FGM Group
+ add %o3, 4, %o3 ! IEU0
+ fmuls %f2, M10, %f7 ! FGM Group
+
+ /* f3=tx, f5=ty, f7=tz, f13=lengths[i] */
+
+ fmuls %f3, %f13, %f3
+ st %f3, [%g3 + 0x00] ! out[i][0] = tx * len
+ fmuls %f5, %f13, %f5
+ st %f5, [%g3 + 0x04] ! out[i][1] = ty * len
+ fmuls %f7, %f13, %f7
+ st %f7, [%g3 + 0x08] ! out[i][2] = tz * len
+
+ cmp %o4, %g1 ! continue if (i < count)
+ bl 5b
+ add %g3, 0x10, %g3 ! advance out vector pointer
+
+7: retl
+ nop
+
+ .globl _mesa_sparc_transform_rescale_normals_no_rot
+_mesa_sparc_transform_rescale_normals_no_rot:
+ /* o0=mat o1=scale o2=in o3=lengths o4=dest */
+ sub %sp, 16, %sp
+ st %o1, [%sp + STACK_VAR_OFF+0x0]
+ ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale
+ add %sp, 16, %sp
+
+ LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
+ LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
+ ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
+ ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
+ LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
+
+ LDMATRIX_0_5_10(%o0)
+
+ /* dest->count = in->count */
+ st %g1, [%o4 + V4F_COUNT]
+
+ cmp %g1, 1
+ bl 7f
+ clr %o4 ! 'i' for STRIDE_LOOP
+
+ fmuls M0, %f15, M0
+ fmuls M5, %f15, M5
+ fmuls M10, %f15, M10
+
+1: ld [%o5 + 0x00], %f0 ! ux = from[0]
+ ld [%o5 + 0x04], %f1 ! uy = from[1]
+ ld [%o5 + 0x08], %f2 ! uz = from[2]
+ add %o5, %g2, %o5 ! STRIDE_F(from, stride)
+ add %o4, 1, %o4 ! i++
+
+ /* tx (f3) = (ux * m0)
+ * ty (f5) = (uy * m5)
+ * tz (f7) = (uz * m10)
+ */
+ fmuls %f0, M0, %f3 ! FGM Group
+ st %f3, [%g3 + 0x00] ! LSU
+ fmuls %f1, M5, %f5 ! FGM Group
+ st %f5, [%g3 + 0x04] ! LSU
+ fmuls %f2, M10, %f7 ! FGM Group
+ st %f7, [%g3 + 0x08] ! LSU
+
+ cmp %o4, %g1 ! continue if (i < count)
+ bl 1b
+ add %g3, 0x10, %g3 ! advance out vector pointer
+
+7: retl
+ nop
+
+ .globl _mesa_sparc_transform_rescale_normals
+_mesa_sparc_transform_rescale_normals:
+ /* o0=mat o1=scale o2=in o3=lengths o4=dest */
+ sub %sp, 16, %sp
+ st %o1, [%sp + STACK_VAR_OFF+0x0]
+ ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale
+ add %sp, 16, %sp
+
+ LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
+ LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
+ ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
+ ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
+ LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
+
+ LDMATRIX_0_1_2_4_5_6_8_9_10(%o0)
+
+ /* dest->count = in->count */
+ st %g1, [%o4 + V4F_COUNT]
+
+ cmp %g1, 1
+ bl 7f
+ clr %o4 ! 'i' for STRIDE_LOOP
+
+ fmuls M0, %f15, M0
+ fmuls M1, %f15, M1
+ fmuls M2, %f15, M2
+ fmuls M4, %f15, M4
+ fmuls M5, %f15, M5
+ fmuls M6, %f15, M6
+ fmuls M8, %f15, M8
+ fmuls M9, %f15, M9
+ fmuls M10, %f15, M10
+
+1: ld [%o5 + 0x00], %f0 ! ux = from[0]
+ ld [%o5 + 0x04], %f1 ! uy = from[1]
+ ld [%o5 + 0x08], %f2 ! uz = from[2]
+ add %o5, %g2, %o5 ! STRIDE_F(from, stride)
+ add %o4, 1, %o4 ! i++
+
+ fmuls %f0, M0, %f3 ! FGM Group
+ fmuls %f1, M1, %f4 ! FGM Group
+ fmuls %f0, M4, %f5 ! FGM Group
+ fmuls %f1, M5, %f6 ! FGM Group
+ fmuls %f0, M8, %f7 ! FGM Group f3 available
+ fmuls %f1, M9, %f8 ! FGM Group f4 available
+ fadds %f3, %f4, %f3 ! FGA
+ fmuls %f2, M2, %f10 ! FGM Group f5 available
+ fmuls %f2, M6, %f0 ! FGM Group f6 available
+ fadds %f5, %f6, %f5 ! FGA
+ fmuls %f2, M10, %f4 ! FGM Group f7 available
+ fadds %f7, %f8, %f7 ! FGA Group f8,f3 available
+ fadds %f3, %f10, %f3 ! FGA Group f10 available
+ st %f3, [%g3 + 0x00] ! LSU
+ fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available
+ st %f5, [%g3 + 0x04] ! LSU
+ fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available
+ st %f7, [%g3 + 0x08] ! LSU
+
+ cmp %o4, %g1 ! continue if (i < count)
+ bl 1b
+ add %g3, 0x10, %g3 ! advance out vector pointer
+
+7: retl
+ nop
+
+ .globl _mesa_sparc_transform_normals_no_rot
+_mesa_sparc_transform_normals_no_rot:
+ /* o0=mat o1=scale o2=in o3=lengths o4=dest */
+ LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
+ LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
+ ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
+ ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
+ LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
+
+ LDMATRIX_0_5_10(%o0)
+
+ /* dest->count = in->count */
+ st %g1, [%o4 + V4F_COUNT]
+
+ cmp %g1, 1
+ bl 7f
+ clr %o4 ! 'i' for STRIDE_LOOP
+
+1: ld [%o5 + 0x00], %f0 ! ux = from[0]
+ ld [%o5 + 0x04], %f1 ! uy = from[1]
+ ld [%o5 + 0x08], %f2 ! uz = from[2]
+ add %o5, %g2, %o5 ! STRIDE_F(from, stride)
+ add %o4, 1, %o4 ! i++
+
+ /* tx (f3) = (ux * m0)
+ * ty (f5) = (uy * m5)
+ * tz (f7) = (uz * m10)
+ */
+ fmuls %f0, M0, %f3 ! FGM Group
+ st %f3, [%g3 + 0x00] ! LSU
+ fmuls %f1, M5, %f5 ! FGM Group
+ st %f5, [%g3 + 0x04] ! LSU
+ fmuls %f2, M10, %f7 ! FGM Group
+ st %f7, [%g3 + 0x08] ! LSU
+
+ cmp %o4, %g1 ! continue if (i < count)
+ bl 1b
+ add %g3, 0x10, %g3 ! advance out vector pointer
+
+7: retl
+ nop
+
+ .globl _mesa_sparc_transform_normals
+_mesa_sparc_transform_normals:
+ /* o0=mat o1=scale o2=in o3=lengths o4=dest */
+ LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
+ LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
+ ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
+ ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
+ LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
+
+ LDMATRIX_0_1_2_4_5_6_8_9_10(%o0)
+
+ /* dest->count = in->count */
+ st %g1, [%o4 + V4F_COUNT]
+
+ cmp %g1, 1
+ bl 7f
+ clr %o4 ! 'i' for STRIDE_LOOP
+
+1: ld [%o5 + 0x00], %f0 ! ux = from[0]
+ ld [%o5 + 0x04], %f1 ! uy = from[1]
+ ld [%o5 + 0x08], %f2 ! uz = from[2]
+ add %o5, %g2, %o5 ! STRIDE_F(from, stride)
+ add %o4, 1, %o4 ! i++
+
+ fmuls %f0, M0, %f3 ! FGM Group
+ fmuls %f1, M1, %f4 ! FGM Group
+ fmuls %f0, M4, %f5 ! FGM Group
+ fmuls %f1, M5, %f6 ! FGM Group
+ fmuls %f0, M8, %f7 ! FGM Group f3 available
+ fmuls %f1, M9, %f8 ! FGM Group f4 available
+ fadds %f3, %f4, %f3 ! FGA
+ fmuls %f2, M2, %f10 ! FGM Group f5 available
+ fmuls %f2, M6, %f0 ! FGM Group f6 available
+ fadds %f5, %f6, %f5 ! FGA
+ fmuls %f2, M10, %f4 ! FGM Group f7 available
+ fadds %f7, %f8, %f7 ! FGA Group f8,f3 available
+ fadds %f3, %f10, %f3 ! FGA Group f10 available
+ st %f3, [%g3 + 0x00] ! LSU
+ fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available
+ st %f5, [%g3 + 0x04] ! LSU
+ fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available
+ st %f7, [%g3 + 0x08] ! LSU
+
+ cmp %o4, %g1 ! continue if (i < count)
+ bl 1b
+ add %g3, 0x10, %g3 ! advance out vector pointer
+
+7: retl
+ nop
+
+ .globl _mesa_sparc_normalize_normals
+_mesa_sparc_normalize_normals:
+ /* o0=mat o1=scale o2=in o3=lengths o4=dest */
+
+ sethi %hi(ONE_DOT_ZERO), %g2
+ sub %sp, 16, %sp
+ st %g2, [%sp + STACK_VAR_OFF+0x0]
+ ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f
+ add %sp, 16, %sp
+
+ LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
+ ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
+ ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
+ LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
+
+ /* dest->count = in->count */
+ st %g1, [%o4 + V4F_COUNT]
+
+ cmp %g1, 1
+ bl 7f
+ cmp %o3, 0
+ bne 4f
+ clr %o4 ! 'i' for STRIDE_LOOP
+
+1: /* LENGTHS == NULL */
+ ld [%o5 + 0x00], %f3 ! ux = from[0]
+ ld [%o5 + 0x04], %f5 ! uy = from[1]
+ ld [%o5 + 0x08], %f7 ! uz = from[2]
+ add %o5, %g2, %o5 ! STRIDE_F(from, stride)
+ add %o4, 1, %o4 ! i++
+
+ /* f3=tx, f5=ty, f7=tz */
+
+ /* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */
+ fmuls %f3, %f3, %f6 ! FGM Group f3 available
+ fmuls %f5, %f5, %f8 ! FGM Group f5 available
+ fmuls %f7, %f7, %f10 ! FGM Group f7 available
+ fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available
+ fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available
+
+ /* scale (f6) = 1.0 / sqrt(len) */
+ fsqrts %f6, %f6 ! FDIV 20 cycles
+ fdivs %f12, %f6, %f6 ! FDIV 14 cycles
+
+ fmuls %f3, %f6, %f3
+ st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale
+ fmuls %f5, %f6, %f5
+ st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale
+ fmuls %f7, %f6, %f7
+ st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale
+
+ cmp %o4, %g1 ! continue if (i < count)
+ bl 1b
+ add %g3, 0x10, %g3 ! advance out vector pointer
+
+ ba 7f
+ nop
+
+4: /* LENGTHS != NULL */
+
+5:
+ ld [%o5 + 0x00], %f3 ! ux = from[0]
+ ld [%o5 + 0x04], %f5 ! uy = from[1]
+ ld [%o5 + 0x08], %f7 ! uz = from[2]
+ add %o5, %g2, %o5 ! STRIDE_F(from, stride)
+ add %o4, 1, %o4 ! i++
+
+ ld [%o3], %f13 ! LSU
+ add %o3, 4, %o3 ! IEU0
+
+ /* f3=tx, f5=ty, f7=tz, f13=lengths[i] */
+
+ fmuls %f3, %f13, %f3
+ st %f3, [%g3 + 0x00] ! out[i][0] = tx * len
+ fmuls %f5, %f13, %f5
+ st %f5, [%g3 + 0x04] ! out[i][1] = ty * len
+ fmuls %f7, %f13, %f7
+ st %f7, [%g3 + 0x08] ! out[i][2] = tz * len
+
+ cmp %o4, %g1 ! continue if (i < count)
+ bl 5b
+ add %g3, 0x10, %g3 ! advance out vector pointer
+
+7: retl
+ nop
+
+ .globl _mesa_sparc_rescale_normals
+_mesa_sparc_rescale_normals:
+ /* o0=mat o1=scale o2=in o3=lengths o4=dest */
+
+ sethi %hi(ONE_DOT_ZERO), %g2
+ sub %sp, 16, %sp
+ st %o1, [%sp + STACK_VAR_OFF+0x0]
+ ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale
+ add %sp, 16, %sp
+
+ LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
+ ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
+ ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
+ LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
+
+ /* dest->count = in->count */
+ st %g1, [%o4 + V4F_COUNT]
+
+ cmp %g1, 1
+ bl 7f
+ clr %o4 ! 'i' for STRIDE_LOOP
+
+1:
+ ld [%o5 + 0x00], %f3 ! ux = from[0]
+ ld [%o5 + 0x04], %f5 ! uy = from[1]
+ ld [%o5 + 0x08], %f7 ! uz = from[2]
+ add %o5, %g2, %o5 ! STRIDE_F(from, stride)
+ add %o4, 1, %o4 ! i++
+
+ /* f3=tx, f5=ty, f7=tz */
+
+ fmuls %f3, %f15, %f3
+ st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale
+ fmuls %f5, %f15, %f5
+ st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale
+ fmuls %f7, %f15, %f7
+ st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale
+
+ cmp %o4, %g1 ! continue if (i < count)
+ bl 1b
+ add %g3, 0x10, %g3 ! advance out vector pointer
+
+7: retl
+ nop
diff --git a/src/mesa/sparc/sparc.c b/src/mesa/sparc/sparc.c
new file mode 100644
index 0000000000..cea0c7cecf
--- /dev/null
+++ b/src/mesa/sparc/sparc.c
@@ -0,0 +1,142 @@
+/*
+ * Mesa 3-D graphics library
+ * Version: 6.3
+ *
+ * Copyright (C) 1999-2003 Brian Paul All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Sparc assembly code by David S. Miller
+ */
+
+
+#include "sparc.h"
+
+#ifdef USE_SPARC_ASM
+
+#include "main/context.h"
+#include "math/m_xform.h"
+#include "tnl/t_context.h"
+
+#ifdef DEBUG
+#include "math/m_debug.h"
+#endif
+
+#define XFORM_ARGS GLvector4f *to_vec, \
+ const GLfloat m[16], \
+ const GLvector4f *from_vec
+
+#define DECLARE_XFORM_GROUP(pfx, sz) \
+ extern void _mesa_##pfx##_transform_points##sz##_general(XFORM_ARGS); \
+ extern void _mesa_##pfx##_transform_points##sz##_identity(XFORM_ARGS); \
+ extern void _mesa_##pfx##_transform_points##sz##_3d_no_rot(XFORM_ARGS); \
+ extern void _mesa_##pfx##_transform_points##sz##_perspective(XFORM_ARGS); \
+ extern void _mesa_##pfx##_transform_points##sz##_2d(XFORM_ARGS); \
+ extern void _mesa_##pfx##_transform_points##sz##_2d_no_rot(XFORM_ARGS); \
+ extern void _mesa_##pfx##_transform_points##sz##_3d(XFORM_ARGS);
+
+#define ASSIGN_XFORM_GROUP(pfx, sz) \
+ _mesa_transform_tab[sz][MATRIX_GENERAL] = \
+ _mesa_##pfx##_transform_points##sz##_general; \
+ _mesa_transform_tab[sz][MATRIX_IDENTITY] = \
+ _mesa_##pfx##_transform_points##sz##_identity; \
+ _mesa_transform_tab[sz][MATRIX_3D_NO_ROT] = \
+ _mesa_##pfx##_transform_points##sz##_3d_no_rot; \
+ _mesa_transform_tab[sz][MATRIX_PERSPECTIVE] = \
+ _mesa_##pfx##_transform_points##sz##_perspective; \
+ _mesa_transform_tab[sz][MATRIX_2D] = \
+ _mesa_##pfx##_transform_points##sz##_2d; \
+ _mesa_transform_tab[sz][MATRIX_2D_NO_ROT] = \
+ _mesa_##pfx##_transform_points##sz##_2d_no_rot; \
+ _mesa_transform_tab[sz][MATRIX_3D] = \
+ _mesa_##pfx##_transform_points##sz##_3d;
+
+
+DECLARE_XFORM_GROUP(sparc, 1)
+DECLARE_XFORM_GROUP(sparc, 2)
+DECLARE_XFORM_GROUP(sparc, 3)
+DECLARE_XFORM_GROUP(sparc, 4)
+
+extern GLvector4f *_mesa_sparc_cliptest_points4(GLvector4f *clip_vec,
+ GLvector4f *proj_vec,
+ GLubyte clipMask[],
+ GLubyte *orMask,
+ GLubyte *andMask,
+ GLboolean viewport_z_clip);
+
+extern GLvector4f *_mesa_sparc_cliptest_points4_np(GLvector4f *clip_vec,
+ GLvector4f *proj_vec,
+ GLubyte clipMask[],
+ GLubyte *orMask,
+ GLubyte *andMask,
+ GLboolean viewport_z_clip);
+
+#define NORM_ARGS const GLmatrix *mat, \
+ GLfloat scale, \
+ const GLvector4f *in, \
+ const GLfloat *lengths, \
+ GLvector4f *dest
+
+extern void _mesa_sparc_transform_normalize_normals(NORM_ARGS);
+extern void _mesa_sparc_transform_normalize_normals_no_rot(NORM_ARGS);
+extern void _mesa_sparc_transform_rescale_normals_no_rot(NORM_ARGS);
+extern void _mesa_sparc_transform_rescale_normals(NORM_ARGS);
+extern void _mesa_sparc_transform_normals_no_rot(NORM_ARGS);
+extern void _mesa_sparc_transform_normals(NORM_ARGS);
+extern void _mesa_sparc_normalize_normals(NORM_ARGS);
+extern void _mesa_sparc_rescale_normals(NORM_ARGS);
+
+
+
+void _mesa_init_all_sparc_transform_asm(void)
+{
+ ASSIGN_XFORM_GROUP(sparc, 1)
+ ASSIGN_XFORM_GROUP(sparc, 2)
+ ASSIGN_XFORM_GROUP(sparc, 3)
+ ASSIGN_XFORM_GROUP(sparc, 4)
+
+ _mesa_clip_tab[4] = _mesa_sparc_cliptest_points4;
+ _mesa_clip_np_tab[4] = _mesa_sparc_cliptest_points4_np;
+
+ _mesa_normal_tab[NORM_TRANSFORM | NORM_NORMALIZE] =
+ _mesa_sparc_transform_normalize_normals;
+ _mesa_normal_tab[NORM_TRANSFORM_NO_ROT | NORM_NORMALIZE] =
+ _mesa_sparc_transform_normalize_normals_no_rot;
+ _mesa_normal_tab[NORM_TRANSFORM_NO_ROT | NORM_RESCALE] =
+ _mesa_sparc_transform_rescale_normals_no_rot;
+ _mesa_normal_tab[NORM_TRANSFORM | NORM_RESCALE] =
+ _mesa_sparc_transform_rescale_normals;
+ _mesa_normal_tab[NORM_TRANSFORM_NO_ROT] =
+ _mesa_sparc_transform_normals_no_rot;
+ _mesa_normal_tab[NORM_TRANSFORM] =
+ _mesa_sparc_transform_normals;
+ _mesa_normal_tab[NORM_NORMALIZE] =
+ _mesa_sparc_normalize_normals;
+ _mesa_normal_tab[NORM_RESCALE] =
+ _mesa_sparc_rescale_normals;
+
+#ifdef DEBUG_MATH
+ _math_test_all_transform_functions("sparc");
+ _math_test_all_cliptest_functions("sparc");
+ _math_test_all_normal_transform_functions("sparc");
+#endif
+}
+
+#endif /* USE_SPARC_ASM */
diff --git a/src/mesa/sparc/sparc.h b/src/mesa/sparc/sparc.h
new file mode 100644
index 0000000000..b9ea336856
--- /dev/null
+++ b/src/mesa/sparc/sparc.h
@@ -0,0 +1,36 @@
+
+/*
+ * Mesa 3-D graphics library
+ * Version: 3.1
+ *
+ * Copyright (C) 1999 Brian Paul All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Sparc assembly code by David S. Miller
+ */
+
+
+#ifndef SPARC_H
+#define SPARC_H
+
+extern void _mesa_init_all_sparc_transform_asm(void);
+
+#endif /* !(SPARC_H) */
diff --git a/src/mesa/sparc/sparc_matrix.h b/src/mesa/sparc/sparc_matrix.h
new file mode 100644
index 0000000000..f677d9bda1
--- /dev/null
+++ b/src/mesa/sparc/sparc_matrix.h
@@ -0,0 +1,170 @@
+/*
+ * SPARC assembly matrix code.
+ */
+
+#ifndef _SPARC_MATRIX_H
+#define _SPARC_MATRIX_H
+
+#ifdef __arch64__
+#define LDPTR ldx
+#define MAT_M 0x00
+#define MAT_INV 0x08
+#define V4F_DATA 0x00
+#define V4F_START 0x08
+#define V4F_COUNT 0x10
+#define V4F_STRIDE 0x14
+#define V4F_SIZE 0x18
+#define V4F_FLAGS 0x1c
+#else
+#define LDPTR ld
+#define MAT_M 0x00
+#define MAT_INV 0x04
+#define V4F_DATA 0x00
+#define V4F_START 0x04
+#define V4F_COUNT 0x08
+#define V4F_STRIDE 0x0c
+#define V4F_SIZE 0x10
+#define V4F_FLAGS 0x14
+#endif
+
+#define VEC_SIZE_1 1
+#define VEC_SIZE_2 3
+#define VEC_SIZE_3 7
+#define VEC_SIZE_4 15
+
+#define M0 %f16
+#define M1 %f17
+#define M2 %f18
+#define M3 %f19
+#define M4 %f20
+#define M5 %f21
+#define M6 %f22
+#define M7 %f23
+#define M8 %f24
+#define M9 %f25
+#define M10 %f26
+#define M11 %f27
+#define M12 %f28
+#define M13 %f29
+#define M14 %f30
+#define M15 %f31
+
+#define LDMATRIX_0_1_2_3_12_13_14_15(BASE) \
+ ldd [BASE + ( 0 * 0x4)], M0; \
+ ldd [BASE + ( 2 * 0x4)], M2; \
+ ldd [BASE + (12 * 0x4)], M12; \
+ ldd [BASE + (14 * 0x4)], M14
+
+#define LDMATRIX_0_1_12_13(BASE) \
+ ldd [BASE + ( 0 * 0x4)], M0; \
+ ldd [BASE + (12 * 0x4)], M12
+
+#define LDMATRIX_0_12_13(BASE) \
+ ld [BASE + ( 0 * 0x4)], M0; \
+ ldd [BASE + (12 * 0x4)], M12
+
+#define LDMATRIX_0_1_2_12_13_14(BASE) \
+ ldd [BASE + ( 0 * 0x4)], M0; \
+ ld [BASE + ( 2 * 0x4)], M2; \
+ ldd [BASE + (12 * 0x4)], M12; \
+ ld [BASE + (14 * 0x4)], M14
+
+#define LDMATRIX_0_12_13_14(BASE) \
+ ld [BASE + ( 0 * 0x4)], M0; \
+ ldd [BASE + (12 * 0x4)], M12; \
+ ld [BASE + (14 * 0x4)], M14
+
+#define LDMATRIX_0_14(BASE) \
+ ld [BASE + ( 0 * 0x4)], M0; \
+ ld [BASE + (14 * 0x4)], M14
+
+#define LDMATRIX_0_1_2_3_4_5_6_7_12_13_14_15(BASE) \
+ ldd [BASE + ( 0 * 0x4)], M0; \
+ ldd [BASE + ( 2 * 0x4)], M2; \
+ ldd [BASE + ( 4 * 0x4)], M4; \
+ ldd [BASE + ( 6 * 0x4)], M6; \
+ ldd [BASE + (12 * 0x4)], M12; \
+ ldd [BASE + (14 * 0x4)], M14
+
+#define LDMATRIX_0_5_12_13(BASE) \
+ ld [BASE + ( 0 * 0x4)], M0; \
+ ld [BASE + ( 5 * 0x4)], M5; \
+ ldd [BASE + (12 * 0x4)], M12
+
+#define LDMATRIX_0_1_2_3_4_5_6_12_13_14(BASE) \
+ ldd [BASE + ( 0 * 0x4)], M0; \
+ ldd [BASE + ( 2 * 0x4)], M2; \
+ ldd [BASE + ( 4 * 0x4)], M4; \
+ ld [BASE + ( 6 * 0x4)], M6; \
+ ldd [BASE + (12 * 0x4)], M12; \
+ ld [BASE + (14 * 0x4)], M14
+
+#define LDMATRIX_0_5_12_13_14(BASE) \
+ ld [BASE + ( 0 * 0x4)], M0; \
+ ld [BASE + ( 5 * 0x4)], M5; \
+ ldd [BASE + (12 * 0x4)], M12; \
+ ld [BASE + (14 * 0x4)], M14
+
+#define LDMATRIX_0_5_14(BASE) \
+ ld [BASE + ( 0 * 0x4)], M0; \
+ ld [BASE + ( 5 * 0x4)], M5; \
+ ld [BASE + (14 * 0x4)], M14
+
+#define LDMATRIX_0_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15(BASE) \
+ ldd [BASE + ( 0 * 0x4)], M0; \
+ ldd [BASE + ( 2 * 0x4)], M2; \
+ ldd [BASE + ( 4 * 0x4)], M4; \
+ ldd [BASE + ( 6 * 0x4)], M6; \
+ ldd [BASE + ( 8 * 0x4)], M8; \
+ ldd [BASE + (10 * 0x4)], M10; \
+ ldd [BASE + (12 * 0x4)], M12; \
+ ldd [BASE + (14 * 0x4)], M14
+
+#define LDMATRIX_0_1_4_5_12_13(BASE) \
+ ldd [BASE + ( 0 * 0x4)], M0; \
+ ldd [BASE + ( 4 * 0x4)], M4; \
+ ldd [BASE + (12 * 0x4)], M12
+
+#define LDMATRIX_0_5_12_13(BASE) \
+ ld [BASE + ( 0 * 0x4)], M0; \
+ ld [BASE + ( 5 * 0x4)], M5; \
+ ldd [BASE + (12 * 0x4)], M12
+
+#define LDMATRIX_0_1_2_4_5_6_8_9_10(BASE) \
+ ldd [BASE + ( 0 * 0x4)], M0; \
+ ld [BASE + ( 2 * 0x4)], M2; \
+ ldd [BASE + ( 4 * 0x4)], M4; \
+ ld [BASE + ( 6 * 0x4)], M6; \
+ ldd [BASE + ( 8 * 0x4)], M8; \
+ ld [BASE + (10 * 0x4)], M10
+
+#define LDMATRIX_0_1_2_4_5_6_8_9_10_12_13_14(BASE) \
+ ldd [BASE + ( 0 * 0x4)], M0; \
+ ld [BASE + ( 2 * 0x4)], M2; \
+ ldd [BASE + ( 4 * 0x4)], M4; \
+ ld [BASE + ( 6 * 0x4)], M6; \
+ ldd [BASE + ( 8 * 0x4)], M8; \
+ ld [BASE + (10 * 0x4)], M10; \
+ ldd [BASE + (12 * 0x4)], M12; \
+ ld [BASE + (14 * 0x4)], M14
+
+#define LDMATRIX_0_5_10(BASE) \
+ ld [BASE + ( 0 * 0x4)], M0; \
+ ld [BASE + ( 5 * 0x4)], M5; \
+ ld [BASE + (10 * 0x4)], M10; \
+
+#define LDMATRIX_0_5_10_12_13_14(BASE) \
+ ld [BASE + ( 0 * 0x4)], M0; \
+ ld [BASE + ( 5 * 0x4)], M5; \
+ ld [BASE + (10 * 0x4)], M10; \
+ ldd [BASE + (12 * 0x4)], M12; \
+ ld [BASE + (14 * 0x4)], M14
+
+#define LDMATRIX_0_5_8_9_10_14(BASE) \
+ ld [BASE + ( 0 * 0x4)], M0; \
+ ld [BASE + ( 5 * 0x4)], M5; \
+ ldd [BASE + ( 8 * 0x4)], M8; \
+ ld [BASE + (10 * 0x4)], M10; \
+ ld [BASE + (14 * 0x4)], M14
+
+#endif /* !(_SPARC_MATRIX_H) */
diff --git a/src/mesa/sparc/xform.S b/src/mesa/sparc/xform.S
new file mode 100644
index 0000000000..2a7cce41e5
--- /dev/null
+++ b/src/mesa/sparc/xform.S
@@ -0,0 +1,1392 @@
+
+ /* TODO
+ *
+ * 1) It would be nice if load/store double could be used
+ * at least for the matrix parts. I think for the matrices
+ * it is safe, but for the vertices it probably is not due to
+ * things like glInterleavedArrays etc.
+ *
+ * UPDATE: Trying this now in sparc_matrix.h -DaveM_990624
+ *
+ * 2) One extremely slick trick would be if we could enclose
+ * groups of xform calls on the same vertices such that
+ * we just load the matrix into f16-->f31 before the calls
+ * and then we would not have to do them here. This may be
+ * tricky and not much of a gain though.
+ */
+
+#include "sparc_matrix.h"
+
+#if defined(SVR4) || defined(__SVR4) || defined(__svr4__) || defined(__arch64__)
+ /* Solaris requires this for 64-bit. */
+ .register %g2, #scratch
+ .register %g3, #scratch
+#endif
+
+ .text
+ .align 64
+
+__set_v4f_1:
+ ld [%o0 + V4F_FLAGS], %g2
+ mov 1, %g1
+ st %g1, [%o0 + V4F_SIZE]
+ or %g2, VEC_SIZE_1, %g2
+ retl
+ st %g2, [%o0 + V4F_FLAGS]
+__set_v4f_2:
+ ld [%o0 + V4F_FLAGS], %g2
+ mov 2, %g1
+ st %g1, [%o0 + V4F_SIZE]
+ or %g2, VEC_SIZE_2, %g2
+ retl
+ st %g2, [%o0 + V4F_FLAGS]
+__set_v4f_3:
+ ld [%o0 + V4F_FLAGS], %g2
+ mov 3, %g1
+ st %g1, [%o0 + V4F_SIZE]
+ or %g2, VEC_SIZE_3, %g2
+ retl
+ st %g2, [%o0 + V4F_FLAGS]
+__set_v4f_4:
+ ld [%o0 + V4F_FLAGS], %g2
+ mov 4, %g1
+ st %g1, [%o0 + V4F_SIZE]
+ or %g2, VEC_SIZE_4, %g2
+ retl
+ st %g2, [%o0 + V4F_FLAGS]
+
+ /* First the raw versions. */
+
+ .globl _mesa_sparc_transform_points1_general
+_mesa_sparc_transform_points1_general:
+ ld [%o2 + V4F_STRIDE], %o5
+ LDPTR [%o2 + V4F_START], %g1
+ LDPTR [%o0 + V4F_START], %g2
+ ld [%o2 + V4F_COUNT], %g3
+
+ LDMATRIX_0_1_2_3_12_13_14_15(%o1)
+
+ cmp %g3, 1
+ st %g3, [%o0 + V4F_COUNT]
+ bl 3f
+ clr %o1
+
+ be 2f
+ andn %g3, 1, %o2
+
+1: ld [%g1 + 0x00], %f0 ! LSU Group
+ add %g1, %o5, %g1 ! IEU0
+ ld [%g1 + 0x00], %f8 ! LSU Group
+ add %o1, 2, %o1 ! IEU0
+ add %g1, %o5, %g1 ! IEU1
+ fmuls %f0, M0, %f1 ! FGM Group 1-cycle stall on %f0
+ fmuls %f0, M1, %f2 ! FGM Group
+ fmuls %f0, M2, %f3 ! FGM Group
+ fmuls %f0, M3, %f4 ! FGM Group
+ fmuls %f8, M0, %f9 ! FGM Group f1 available
+ fadds %f1, M12, %f1 ! FGA
+ st %f1, [%g2 + 0x00] ! LSU
+ fmuls %f8, M1, %f10 ! FGM Group f2 available
+ fadds %f2, M13, %f2 ! FGA
+ st %f2, [%g2 + 0x04] ! LSU
+ fmuls %f8, M2, %f11 ! FGM Group f3 available
+ fadds %f3, M14, %f3 ! FGA
+ st %f3, [%g2 + 0x08] ! LSU
+ fmuls %f8, M3, %f12 ! FGM Group f4 available
+ fadds %f4, M15, %f4 ! FGA
+ st %f4, [%g2 + 0x0c] ! LSU
+ fadds %f9, M12, %f9 ! FGA Group f9 available
+ st %f9, [%g2 + 0x10] ! LSU
+ fadds %f10, M13, %f10 ! FGA Group f10 available
+ st %f10, [%g2 + 0x14] ! LSU
+ fadds %f11, M14, %f11 ! FGA Group f11 available
+ st %f11, [%g2 + 0x18] ! LSU
+ fadds %f12, M15, %f12 ! FGA Group f12 available
+ st %f12, [%g2 + 0x1c] ! LSU
+ cmp %o1, %o2 ! IEU1
+ bne 1b ! CTI
+ add %g2, 0x20, %g2 ! IEU0 Group
+
+ cmp %o1, %g3
+ be 3f
+ nop
+
+2: ld [%g1 + 0x00], %f0 ! LSU Group
+ fmuls %f0, M0, %f1 ! FGM Group 1-cycle stall on %f0
+ fmuls %f0, M1, %f2 ! FGM Group
+ fmuls %f0, M2, %f3 ! FGM Group
+ fmuls %f0, M3, %f4 ! FGM Group
+ fadds %f1, M12, %f1 ! FGA Group
+ st %f1, [%g2 + 0x00] ! LSU
+ fadds %f2, M13, %f2 ! FGA Group
+ st %f2, [%g2 + 0x04] ! LSU
+ fadds %f3, M14, %f3 ! FGA Group
+ st %f3, [%g2 + 0x08] ! LSU
+ fadds %f4, M15, %f4 ! FGA Group
+ st %f4, [%g2 + 0x0c] ! LSU
+
+3:
+ ba __set_v4f_4
+ nop
+
+ .globl _mesa_sparc_transform_points1_identity
+_mesa_sparc_transform_points1_identity:
+ cmp %o0, %o2
+ be 4f
+ ld [%o2 + V4F_STRIDE], %o5
+ LDPTR [%o2 + V4F_START], %g1
+ LDPTR [%o0 + V4F_START], %g2
+ ld [%o2 + V4F_COUNT], %g3
+
+ cmp %g3, 1
+ st %g3, [%o0 + V4F_COUNT]
+ bl 3f
+ clr %o1
+
+ be 2f
+ andn %g3, 1, %o2
+
+1: ld [%g1 + 0x00], %f0 ! LSU Group
+ add %g1, %o5, %g1 ! IEU0
+ ld [%g1 + 0x00], %f1 ! LSU Group
+ add %o1, 2, %o1 ! IEU0
+ add %g1, %o5, %g1 ! IEU1
+ st %f0, [%g2 + 0x00] ! LSU Group
+ cmp %o1, %o2 ! IEU1
+ st %f1, [%g2 + 0x10] ! LSU Group
+ bne 1b ! CTI
+ add %g2, 0x20, %g2 ! IEU0
+
+ cmp %o1, %g3
+ be 3f
+ nop
+
+2: ld [%g1 + 0x00], %f0
+ addx %g0, %g0, %g0
+ st %f0, [%g2 + 0x00]
+
+3:
+ ba __set_v4f_1
+ nop
+
+4: retl
+ nop
+
+ .globl _mesa_sparc_transform_points1_2d
+_mesa_sparc_transform_points1_2d:
+ ld [%o2 + V4F_STRIDE], %o5
+ LDPTR [%o2 + V4F_START], %g1
+ LDPTR [%o0 + V4F_START], %g2
+ ld [%o2 + V4F_COUNT], %g3
+
+ LDMATRIX_0_1_12_13(%o1)
+
+ cmp %g3, 1
+ st %g3, [%o0 + V4F_COUNT]
+ bl 3f
+ clr %o1
+
+ be 2f
+ andn %g3, 1, %o2
+
+1: ld [%g1 + 0x00], %f0 ! LSU Group
+ add %g1, %o5, %g1 ! IEU0
+ ld [%g1 + 0x00], %f8 ! LSU Group
+ add %o1, 2, %o1 ! IEU0
+ add %g1, %o5, %g1 ! IEU1
+ fmuls %f0, M0, %f1 ! FGM Group
+ fmuls %f0, M1, %f2 ! FGM Group
+ fmuls %f8, M0, %f9 ! FGM Group
+ fmuls %f8, M1, %f10 ! FGM Group
+ fadds %f1, M12, %f3 ! FGA Group f1 available
+ st %f3, [%g2 + 0x00] ! LSU
+ fadds %f2, M13, %f4 ! FGA Group f2 available
+ st %f4, [%g2 + 0x04] ! LSU
+ fadds %f9, M12, %f11 ! FGA Group f9 available
+ st %f11, [%g2 + 0x10] ! LSU
+ fadds %f10, M13, %f12 ! FGA Group f10 available
+ st %f12, [%g2 + 0x14] ! LSU
+ cmp %o1, %o2 ! IEU1
+ bne 1b ! CTI
+ add %g2, 0x20, %g2 ! IEU0 Group
+
+ cmp %o1, %g3
+ be 3f
+ nop
+
+2: ld [%g1 + 0x00], %f0
+ fmuls %f0, M0, %f1
+ fmuls %f0, M1, %f2
+ fadds %f1, M12, %f3
+ st %f3, [%g2 + 0x00]
+ fadds %f2, M13, %f4
+ st %f4, [%g2 + 0x04]
+
+3:
+ ba __set_v4f_2
+ nop
+
+ .globl _mesa_sparc_transform_points1_2d_no_rot
+_mesa_sparc_transform_points1_2d_no_rot:
+ ld [%o2 + V4F_STRIDE], %o5
+ LDPTR [%o2 + V4F_START], %g1
+ LDPTR [%o0 + V4F_START], %g2
+ ld [%o2 + V4F_COUNT], %g3
+
+ LDMATRIX_0_12_13(%o1)
+
+ cmp %g3, 1
+ st %g3, [%o0 + V4F_COUNT]
+ bl 3f
+ clr %o1
+
+ be 2f
+ andn %g3, 1, %o2
+
+1: ld [%g1 + 0x00], %f0 ! LSU Group
+ add %g1, %o5, %g1 ! IEU0
+ ld [%g1 + 0x00], %f4 ! LSU Group
+ add %o1, 2, %o1 ! IEU0
+ add %g1, %o5, %g1 ! IEU1
+ fmuls %f0, M0, %f1 ! FGM Group
+ fmuls %f4, M0, %f5 ! FGM Group
+ fadds %f1, M12, %f3 ! FGA Group, 2 cycle stall, f1 available
+ st %f3, [%g2 + 0x00] ! LSU
+ st M13, [%g2 + 0x04] ! LSU Group, f5 available
+ fadds %f5, M12, %f6 ! FGA
+ st %f6, [%g2 + 0x10] ! LSU Group
+ st M13, [%g2 + 0x14] ! LSU Group
+ cmp %o1, %o2 ! IEU1
+ bne 1b ! CTI
+ add %g2, 0x20, %g2 ! IEU0 Group
+
+ cmp %o1, %g3
+ be 3f
+ nop
+
+2: ld [%g1 + 0x00], %f0
+ fmuls %f0, M0, %f1
+ fadds %f1, M12, %f3
+ st %f3, [%g2 + 0x00]
+ st M13, [%g2 + 0x04]
+
+3:
+ ba __set_v4f_2
+ nop
+
+ .globl _mesa_sparc_transform_points1_3d
+_mesa_sparc_transform_points1_3d:
+ ld [%o2 + V4F_STRIDE], %o5
+ LDPTR [%o2 + V4F_START], %g1
+ LDPTR [%o0 + V4F_START], %g2
+ ld [%o2 + V4F_COUNT], %g3
+
+ LDMATRIX_0_1_2_12_13_14(%o1)
+
+ cmp %g3, 1
+ st %g3, [%o0 + V4F_COUNT]
+ bl 3f
+ clr %o1
+
+ be 2f
+ andn %g3, 1, %o2
+
+1: ld [%g1 + 0x00], %f0 ! LSU Group
+ add %g1, %o5, %g1 ! IEU0
+ ld [%g1 + 0x00], %f4 ! LSU Group
+ add %o1, 2, %o1 ! IEU0
+ add %g1, %o5, %g1 ! IEU1
+ fmuls %f0, M0, %f1 ! FGM Group
+ fmuls %f0, M1, %f2 ! FGM Group
+ fmuls %f0, M2, %f3 ! FGM Group
+ fmuls %f4, M0, %f5 ! FGM Group
+ fadds %f1, M12, %f1 ! FGA Group, f1 available
+ st %f1, [%g2 + 0x00] ! LSU
+ fmuls %f4, M1, %f6 ! FGM
+ fadds %f2, M13, %f2 ! FGA Group, f2 available
+ st %f2, [%g2 + 0x04] ! LSU
+ fmuls %f4, M2, %f7 ! FGM
+ fadds %f3, M14, %f3 ! FGA Group, f3 available
+ st %f3, [%g2 + 0x08] ! LSU
+ fadds %f5, M12, %f5 ! FGA Group, f5 available
+ st %f5, [%g2 + 0x10] ! LSU
+ fadds %f6, M13, %f6 ! FGA Group, f6 available
+ st %f6, [%g2 + 0x14] ! LSU
+ fadds %f7, M14, %f7 ! FGA Group, f7 available
+ st %f7, [%g2 + 0x18] ! LSU
+ cmp %o1, %o2 ! IEU1
+ bne 1b ! CTI
+ add %g2, 0x20, %g2 ! IEU0 Group
+
+ cmp %o1, %g3
+ be 3f
+ nop
+
+2: ld [%g1 + 0x00], %f0
+ fmuls %f0, M0, %f1
+ fmuls %f0, M1, %f2
+ fmuls %f0, M2, %f3
+ fadds %f1, M12, %f1
+ st %f1, [%g2 + 0x00]
+ fadds %f2, M13, %f2
+ st %f2, [%g2 + 0x04]
+ fadds %f3, M14, %f3
+ st %f3, [%g2 + 0x08]
+
+3:
+ ba __set_v4f_3
+ nop
+
+ .globl _mesa_sparc_transform_points1_3d_no_rot
+_mesa_sparc_transform_points1_3d_no_rot:
+ ld [%o2 + V4F_STRIDE], %o5
+ LDPTR [%o2 + V4F_START], %g1
+ LDPTR [%o0 + V4F_START], %g2
+ ld [%o2 + V4F_COUNT], %g3
+
+ LDMATRIX_0_12_13_14(%o1)
+
+ cmp %g3, 1
+ st %g3, [%o0 + V4F_COUNT]
+ bl 3f
+ clr %o1
+
+ be 2f
+ andn %g3, 1, %o2
+
+1: ld [%g1 + 0x00], %f0 ! LSU Group
+ add %g1, %o5, %g1 ! IEU0
+ ld [%g1 + 0x00], %f2 ! LSU Group
+ add %o1, 2, %o1 ! IEU0
+ add %g1, %o5, %g1 ! IEU1
+ fmuls %f0, M0, %f1 ! FGM Group
+ fmuls %f2, M0, %f3 ! FGM Group
+ fadds %f1, M12, %f1 ! FGA Group, 2 cycle stall, f1 available
+ st %f1, [%g2 + 0x00] ! LSU
+ fadds %f3, M12, %f3 ! FGA Group, f3 available
+ st M13, [%g2 + 0x04] ! LSU
+ st M14, [%g2 + 0x08] ! LSU Group
+ st %f3, [%g2 + 0x10] ! LSU Group
+ st M13, [%g2 + 0x14] ! LSU Group
+ st M14, [%g2 + 0x18] ! LSU Group
+ cmp %o1, %o2 ! IEU1
+ bne 1b ! CTI
+ add %g2, 0x20, %g2 ! IEU0 Group
+
+ cmp %o1, %g3
+ be 3f
+ nop
+
+2: ld [%g1 + 0x00], %f0
+ fmuls %f0, M0, %f1
+ fadds %f1, M12, %f1
+ st %f1, [%g2 + 0x00]
+ st M13, [%g2 + 0x04]
+ st M14, [%g2 + 0x08]
+
+3:
+ ba __set_v4f_3
+ nop
+
+ .globl _mesa_sparc_transform_points1_perspective
+_mesa_sparc_transform_points1_perspective:
+ ld [%o2 + V4F_STRIDE], %o5
+ LDPTR [%o2 + V4F_START], %g1
+ LDPTR [%o0 + V4F_START], %g2
+ ld [%o2 + V4F_COUNT], %g3
+
+ LDMATRIX_0_14(%o1)
+
+ cmp %g3, 1
+ st %g3, [%o0 + V4F_COUNT]
+ bl 3f
+ clr %o1
+
+ be 2f
+ andn %g3, 1, %o2
+
+1: ld [%g1 + 0x00], %f0 ! LSU Group
+ add %g1, %o5, %g1 ! IEU0
+ ld [%g1 + 0x00], %f2 ! LSU Group
+ add %o1, 2, %o1 ! IEU0
+ add %g1, %o5, %g1 ! IEU1
+ fmuls %f0, M0, %f1 ! FGM Group
+ st %f1, [%g2 + 0x00] ! LSU
+ fmuls %f2, M0, %f3 ! FGM Group
+ st %g0, [%g2 + 0x04] ! LSU
+ st M14, [%g2 + 0x08] ! LSU Group
+ st %g0, [%g2 + 0x0c] ! LSU Group
+ st %f3, [%g2 + 0x10] ! LSU Group
+ st %g0, [%g2 + 0x14] ! LSU Group
+ st M14, [%g2 + 0x18] ! LSU Group
+ st %g0, [%g2 + 0x1c] ! LSU Group
+ cmp %o1, %o2 ! IEU1
+ bne 1b ! CTI
+ add %g2, 0x20, %g2 ! IEU0 Group
+
+ cmp %o1, %g3
+ be 3f
+ nop
+
+2: ld [%g1 + 0x00], %f0
+ fmuls %f0, M0, %f1
+ st %f1, [%g2 + 0x00]
+ st %g0, [%g2 + 0x04]
+ st M14, [%g2 + 0x08]
+ st %g0, [%g2 + 0x0c]
+
+3:
+ ba __set_v4f_4
+ nop
+
+ .globl _mesa_sparc_transform_points2_general
+_mesa_sparc_transform_points2_general:
+ ld [%o2 + V4F_STRIDE], %o5
+ LDPTR [%o2 + V4F_START], %g1
+ LDPTR [%o0 + V4F_START], %g2
+ ld [%o2 + V4F_COUNT], %g3
+
+ LDMATRIX_0_1_2_3_4_5_6_7_12_13_14_15(%o1)
+
+ cmp %g3, 0
+ st %g3, [%o0 + V4F_COUNT]
+ be 2f
+ clr %o1
+
+1: ld [%g1 + 0x00], %f0 ! LSU Group
+ ld [%g1 + 0x04], %f1 ! LSU Group
+ add %o1, 1, %o1 ! IEU0
+ add %g1, %o5, %g1 ! IEU1
+ fmuls %f0, M0, %f2 ! FGM Group
+ fmuls %f0, M1, %f3 ! FGM Group
+ fmuls %f0, M2, %f4 ! FGM Group
+ fmuls %f0, M3, %f5 ! FGM Group
+ fadds %f2, M12, %f2 ! FGA Group f2 available
+ fmuls %f1, M4, %f6 ! FGM
+ fadds %f3, M13, %f3 ! FGA Group f3 available
+ fmuls %f1, M5, %f7 ! FGM
+ fadds %f4, M14, %f4 ! FGA Group f4 available
+ fmuls %f1, M6, %f8 ! FGM
+ fadds %f5, M15, %f5 ! FGA Group f5 available
+ fmuls %f1, M7, %f9 ! FGM
+ fadds %f2, %f6, %f2 ! FGA Group f6 available
+ st %f2, [%g2 + 0x00] ! LSU
+ fadds %f3, %f7, %f3 ! FGA Group f7 available
+ st %f3, [%g2 + 0x04] ! LSU
+ fadds %f4, %f8, %f4 ! FGA Group f8 available
+ st %f4, [%g2 + 0x08] ! LSU
+ fadds %f5, %f9, %f5 ! FGA Group f9 available
+ st %f5, [%g2 + 0x0c] ! LSU
+ cmp %o1, %g3 ! IEU1
+ bne 1b ! CTI
+ add %g2, 0x10, %g2 ! IEU0 Group
+2:
+ ba __set_v4f_4
+ nop
+
+ .globl _mesa_sparc_transform_points2_identity
+_mesa_sparc_transform_points2_identity:
+ cmp %o2, %o0
+ be 3f
+ ld [%o2 + V4F_STRIDE], %o5
+ LDPTR [%o2 + V4F_START], %g1
+ LDPTR [%o0 + V4F_START], %g2
+ ld [%o2 + V4F_COUNT], %g3
+
+ cmp %g3, 0
+ st %g3, [%o0 + V4F_COUNT]
+ be 2f
+ clr %o1
+
+1: ld [%g1 + 0x00], %f0 ! LSU Group
+ add %o1, 1, %o1 ! IEU0
+ ld [%g1 + 0x04], %f1 ! LSU Group
+ add %g1, %o5, %g1 ! IEU0
+ cmp %o1, %g3 ! IEU1
+ st %f0, [%g2 + 0x00] ! LSU Group
+ st %f1, [%g2 + 0x04] ! LSU Group
+ bne 1b ! CTI
+ add %g2, 0x10, %g2 ! IEU0
+2:
+ ba __set_v4f_2
+ nop
+
+3: retl
+ nop
+
+ .globl _mesa_sparc_transform_points2_2d
+_mesa_sparc_transform_points2_2d:
+ ld [%o2 + V4F_STRIDE], %o5
+ LDPTR [%o2 + V4F_START], %g1
+ LDPTR [%o0 + V4F_START], %g2
+ ld [%o2 + V4F_COUNT], %g3
+
+ LDMATRIX_0_1_4_5_12_13(%o1)
+
+ cmp %g3, 1
+ st %g3, [%o0 + V4F_COUNT]
+ bl 3f
+ clr %o1
+
+ be 2f
+ andn %g3, 1, %o2
+
+1: ld [%g1 + 0x00], %f0 ! LSU Group
+ ld [%g1 + 0x04], %f1 ! LSU Group
+ add %o1, 2, %o1 ! IEU0
+ add %g1, %o5, %g1 ! IEU1
+ fmuls %f0, M0, %f2 ! FGM
+ ld [%g1 + 0x00], %f8 ! LSU Group
+ fmuls %f0, M1, %f3 ! FGM
+ ld [%g1 + 0x04], %f9 ! LSU Group
+ fmuls %f1, M4, %f6 ! FGM
+ fmuls %f1, M5, %f7 ! FGM Group
+ add %g1, %o5, %g1 ! IEU0
+ fmuls %f8, M0, %f10 ! FGM Group f2 available
+ fadds %f2, M12, %f2 ! FGA
+ fmuls %f8, M1, %f11 ! FGM Group f3 available
+ fadds %f3, M13, %f3 ! FGA
+ fmuls %f9, M4, %f12 ! FGM Group
+ fmuls %f9, M5, %f13 ! FGM Group
+ fadds %f10, M12, %f10 ! FGA Group f2, f10 available
+ fadds %f2, %f6, %f2 ! FGA Group f3, f11 available
+ st %f2, [%g2 + 0x00] ! LSU
+ fadds %f11, M13, %f11 ! FGA Group f12 available
+ fadds %f3, %f7, %f3 ! FGA Group f13 available
+ st %f3, [%g2 + 0x04] ! LSU
+ fadds %f10, %f12, %f10 ! FGA Group f10 available
+ st %f10, [%g2 + 0x10] ! LSU
+ fadds %f11, %f13, %f11 ! FGA Group f11 available
+ st %f11, [%g2 + 0x14] ! LSU
+ cmp %o1, %o2 ! IEU1
+ bne 1b ! CTI
+ add %g2, 0x20, %g2 ! IEU0 Group
+
+ cmp %o1, %g3
+ be 3f
+ nop
+
+2: ld [%g1 + 0x00], %f0 ! LSU Group
+ ld [%g1 + 0x04], %f1 ! LSU Group
+ fmuls %f0, M0, %f2 ! FGM Group
+ fmuls %f0, M1, %f3 ! FGM Group
+ fmuls %f1, M4, %f6 ! FGM Group
+ fmuls %f1, M5, %f7 ! FGM Group
+ fadds %f2, M12, %f2 ! FGA Group f2 available
+ fadds %f3, M13, %f3 ! FGA Group f3 available
+ fadds %f2, %f6, %f2 ! FGA Group 2 cycle stall, f2 available
+ st %f2, [%g2 + 0x00] ! LSU
+ fadds %f3, %f7, %f3 ! FGA Group f3 available
+ st %f3, [%g2 + 0x04] ! LSU
+
+3:
+ ba __set_v4f_2
+ nop
+
+ .globl _mesa_sparc_transform_points2_2d_no_rot
+_mesa_sparc_transform_points2_2d_no_rot:
+ ld [%o2 + V4F_STRIDE], %o5
+ LDPTR [%o2 + V4F_START], %g1
+ LDPTR [%o0 + V4F_START], %g2
+ ld [%o2 + V4F_COUNT], %g3
+
+ LDMATRIX_0_5_12_13(%o1)
+
+ cmp %g3, 1
+ st %g3, [%o0 + V4F_COUNT]
+ bl 3f
+ clr %o1
+
+ be 2f
+ andn %g3, 1, %o2
+
+1: ld [%g1 + 0x00], %f0 ! LSU Group
+ ld [%g1 + 0x04], %f1 ! LSU Group
+ add %o1, 2, %o1 ! IEU0
+ add %g1, %o5, %g1 ! IEU1
+ ld [%g1 + 0x00], %f4 ! LSU Group
+ fmuls %f0, M0, %f2 ! FGM
+ ld [%g1 + 0x04], %f5 ! LSU Group
+ fmuls %f1, M5, %f3 ! FGM
+ fmuls %f4, M0, %f6 ! FGM Group
+ add %g1, %o5, %g1 ! IEU0
+ fmuls %f5, M5, %f7 ! FGM Group
+ fadds %f2, M12, %f2 ! FGA Group f2 available
+ st %f2, [%g2 + 0x00] ! LSU
+ fadds %f3, M13, %f3 ! FGA Group f3 available
+ st %f3, [%g2 + 0x04] ! LSU
+ fadds %f6, M12, %f6 ! FGA Group f6 available
+ st %f6, [%g2 + 0x10] ! LSU
+ fadds %f7, M13, %f7 ! FGA Group f7 available
+ st %f7, [%g2 + 0x14] ! LSU
+ cmp %o1, %o2 ! IEU1
+ bne 1b ! CTI
+ add %g2, 0x20, %g2 ! IEU0 Group
+
+ cmp %o1, %g3
+ be 3f
+ nop
+
+2: ld [%g1 + 0x00], %f0 ! LSU Group
+ ld [%g1 + 0x04], %f1 ! LSU Group
+ fmuls %f0, M0, %f2 ! FGM Group
+ fmuls %f1, M5, %f3 ! FGM Group
+ fadds %f2, M12, %f2 ! FGA Group, 2 cycle stall, f2 available
+ st %f2, [%g2 + 0x00] ! LSU
+ fadds %f3, M13, %f3 ! FGA Group f3 available
+ st %f3, [%g2 + 0x04] ! LSU
+
+3:
+ ba __set_v4f_2
+ nop
+
+ /* orig: 12 cycles */
+ .globl _mesa_sparc_transform_points2_3d
+_mesa_sparc_transform_points2_3d:
+ ld [%o2 + V4F_STRIDE], %o5
+ ld [%o2 + V4F_START], %g1
+ ld [%o0 + V4F_START], %g2
+ ld [%o2 + V4F_COUNT], %g3
+
+ LDMATRIX_0_1_2_3_4_5_6_12_13_14(%o1)
+
+ cmp %g3, 1
+ st %g3, [%o0 + V4F_COUNT]
+ bl 3f
+ clr %o1
+
+ be 2f
+ andn %g3, 1, %o2
+
+1: ld [%g1 + 0x00], %f0 ! LSU Group
+ ld [%g1 + 0x04], %f1 ! LSU Group
+ add %o1, 2, %o1 ! IEU0
+ add %g1, %o5, %g1 ! IEU1
+ ld [%g1 + 0x00], %f9 ! LSU Group
+ fmuls %f0, M0, %f2 ! FGM
+ ld [%g1 + 0x04], %f10 ! LSU Group
+ fmuls %f0, M1, %f3 ! FGM
+ fmuls %f0, M2, %f4 ! FGM Group
+ add %g1, %o5, %g1 ! IEU0
+ fmuls %f1, M4, %f6 ! FGM Group
+ fmuls %f1, M5, %f7 ! FGM Group f2 available
+ fadds %f2, M12, %f2 ! FGA
+ fmuls %f1, M6, %f8 ! FGM Group f3 available
+ fadds %f3, M13, %f3 ! FGA
+ fmuls %f9, M0, %f11 ! FGM Group f4 available
+ fadds %f4, M14, %f4 ! FGA
+ fmuls %f9, M1, %f12 ! FGM Group f6 available
+ fmuls %f9, M2, %f13 ! FGM Group f2, f7 available
+ fadds %f2, %f6, %f2 ! FGA
+ st %f2, [%g2 + 0x00] ! LSU
+ fmuls %f10, M4, %f14 ! FGM Group f3, f8 available
+ fadds %f3, %f7, %f3 ! FGA
+ st %f3, [%g2 + 0x04] ! LSU
+ fmuls %f10, M5, %f15 ! FGM Group f4, f11 available
+ fadds %f11, M12, %f11 ! FGA
+ fmuls %f10, M6, %f0 ! FGM Group f12 available
+ fadds %f12, M13, %f12 ! FGA
+ fadds %f13, M14, %f13 ! FGA Group f13 available
+ fadds %f4, %f8, %f4 ! FGA Group f14 available
+ st %f4, [%g2 + 0x08] ! LSU
+ fadds %f11, %f14, %f11 ! FGA Group f15, f11 available
+ st %f11, [%g2 + 0x10] ! LSU
+ fadds %f12, %f15, %f12 ! FGA Group f0, f12 available
+ st %f12, [%g2 + 0x14] ! LSU
+ fadds %f13, %f0, %f13 ! FGA Group f13 available
+ st %f13, [%g2 + 0x18] ! LSU
+
+ cmp %o1, %o2 ! IEU1
+ bne 1b ! CTI
+ add %g2, 0x20, %g2 ! IEU0 Group
+
+ cmp %o1, %g3
+ be 3f
+ nop
+
+2: ld [%g1 + 0x00], %f0 ! LSU Group
+ ld [%g1 + 0x04], %f1 ! LSU Group
+ fmuls %f0, M0, %f2 ! FGM Group
+ fmuls %f0, M1, %f3 ! FGM Group
+ fmuls %f0, M2, %f4 ! FGM Group
+ fmuls %f1, M4, %f6 ! FGM Group
+ fmuls %f1, M5, %f7 ! FGM Group f2 available
+ fadds %f2, M12, %f2 ! FGA
+ fmuls %f1, M6, %f8 ! FGM Group f3 available
+ fadds %f3, M13, %f3 ! FGA
+ fadds %f4, M14, %f4 ! FGA Group f4 available
+ fadds %f2, %f6, %f2 ! FGA Group stall, f2, f6, f7 available
+ st %f2, [%g2 + 0x00] ! LSU
+ fadds %f3, %f7, %f3 ! FGA Group f3, f8 available
+ st %f3, [%g2 + 0x04] ! LSU
+ fadds %f4, %f8, %f4 ! FGA Group f4 available
+ st %f4, [%g2 + 0x08] ! LSU
+
+3:
+ ba __set_v4f_3
+ nop
+
+ .globl _mesa_sparc_transform_points2_3d_no_rot
+_mesa_sparc_transform_points2_3d_no_rot:
+ ld [%o2 + V4F_STRIDE], %o5
+ LDPTR [%o2 + V4F_START], %g1
+ LDPTR [%o0 + V4F_START], %g2
+ ld [%o2 + V4F_COUNT], %g3
+
+ LDMATRIX_0_5_12_13_14(%o1)
+
+ cmp %g3, 1
+ st %g3, [%o0 + V4F_COUNT]
+ bl 3f
+ clr %o3
+
+ be 2f
+ andn %g3, 1, %o2
+
+1: ld [%g1 + 0x00], %f0 ! LSU Group
+ ld [%g1 + 0x04], %f1 ! LSU Group
+ add %o3, 2, %o3 ! IEU0
+ add %g1, %o5, %g1 ! IEU1
+ ld [%g1 + 0x00], %f4 ! LSU Group
+ fmuls %f0, M0, %f2 ! FGM
+ ld [%g1 + 0x04], %f5 ! LSU Group
+ fmuls %f1, M5, %f3 ! FGM
+ fmuls %f4, M0, %f6 ! FGM Group
+ add %g1, %o5, %g1 ! IEU0
+ fmuls %f5, M5, %f7 ! FGM Group
+ fadds %f2, M12, %f2 ! FGA Group f2 available
+ st %f2, [%g2 + 0x00] ! LSU
+ fadds %f3, M13, %f3 ! FGA Group f3 available
+ st %f3, [%g2 + 0x04] ! LSU
+ fadds %f6, M12, %f6 ! FGA Group f6 available
+ st M14, [%g2 + 0x08] ! LSU
+ fadds %f7, M13, %f7 ! FGA Group f7 available
+ st %f6, [%g2 + 0x10] ! LSU
+ st %f7, [%g2 + 0x14] ! LSU Group
+ st M14, [%g2 + 0x18] ! LSU Group
+ cmp %o3, %o2 ! IEU1
+ bne 1b ! CTI
+ add %g2, 0x20, %g2 ! IEU0 Group
+
+ cmp %o3, %g3
+ be 3f
+ nop
+
+2: ld [%g1 + 0x00], %f0 ! LSU Group
+ ld [%g1 + 0x04], %f1 ! LSU Group
+ fmuls %f0, M0, %f2 ! FGM Group
+ fmuls %f1, M5, %f3 ! FGM Group
+ fadds %f2, M12, %f2 ! FGA Group, 2 cycle stall, f2 available
+ st %f2, [%g2 + 0x00] ! LSU
+ fadds %f3, M13, %f3 ! FGA Group f3 available
+ st %f3, [%g2 + 0x04] ! LSU
+ st M14, [%g2 + 0x08] ! LSU Group
+
+3: ld [%o1 + (14 * 0x4)], %g3
+ cmp %g3, 0
+ bne __set_v4f_3
+ nop
+ ba __set_v4f_2
+ nop
+
+ .globl _mesa_sparc_transform_points2_perspective
+_mesa_sparc_transform_points2_perspective:
+ ld [%o2 + V4F_STRIDE], %o5
+ LDPTR [%o2 + V4F_START], %g1
+ LDPTR [%o0 + V4F_START], %g2
+ ld [%o2 + V4F_COUNT], %g3
+
+ LDMATRIX_0_5_14(%o1)
+
+ cmp %g3, 0
+ st %g3, [%o0 + V4F_COUNT]
+ be 2f
+ clr %o1
+
+1: ld [%g1 + 0x00], %f0
+ ld [%g1 + 0x04], %f1
+ add %o1, 1, %o1
+ add %g1, %o5, %g1
+ fmuls %f0, M0, %f2
+ st %f2, [%g2 + 0x00]
+ fmuls %f1, M5, %f3
+ st %f3, [%g2 + 0x04]
+ st M14, [%g2 + 0x08]
+ st %g0, [%g2 + 0x0c]
+ cmp %o1, %g3
+ bne 1b
+ add %g2, 0x10, %g2
+2:
+ ba __set_v4f_4
+ nop
+
+ .globl _mesa_sparc_transform_points3_general
+_mesa_sparc_transform_points3_general:
+ ld [%o2 + V4F_STRIDE], %o5
+ LDPTR [%o2 + V4F_START], %g1
+ LDPTR [%o0 + V4F_START], %g2
+ ld [%o2 + V4F_COUNT], %g3
+
+ LDMATRIX_0_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15(%o1)
+
+ cmp %g3, 0
+ st %g3, [%o0 + V4F_COUNT]
+ be 2f
+ clr %o1
+
+1: ld [%g1 + 0x00], %f0 ! LSU Group
+ ld [%g1 + 0x04], %f1 ! LSU Group
+ ld [%g1 + 0x08], %f2 ! LSU Group
+ add %o1, 1, %o1 ! IEU0
+ add %g1, %o5, %g1 ! IEU1
+ fmuls %f0, M0, %f3 ! FGM
+ fmuls %f1, M4, %f7 ! FGM Group
+ fmuls %f0, M1, %f4 ! FGM Group
+ fmuls %f1, M5, %f8 ! FGM Group
+ fmuls %f0, M2, %f5 ! FGM Group f3 available
+ fmuls %f1, M6, %f9 ! FGM Group f7 available
+ fadds %f3, %f7, %f3 ! FGA
+ fmuls %f0, M3, %f6 ! FGM Group f4 available
+ fmuls %f1, M7, %f10 ! FGM Group f8 available
+ fadds %f4, %f8, %f4 ! FGA
+ fmuls %f2, M8, %f7 ! FGM Group f5 available
+ fmuls %f2, M9, %f8 ! FGM Group f9,f3 available
+ fadds %f5, %f9, %f5 ! FGA
+ fmuls %f2, M10, %f9 ! FGM Group f6 available
+ fadds %f6, %f10, %f6 ! FGA Group f10,f4 available
+ fmuls %f2, M11, %f10 ! FGM
+ fadds %f3, M12, %f3 ! FGA Group f7 available
+ fadds %f4, M13, %f4 ! FGA Group f8,f5 available
+ fadds %f5, M14, %f5 ! FGA Group f9 available
+ fadds %f6, M15, %f6 ! FGA Group f10,f6 available
+ fadds %f3, %f7, %f3 ! FGA Group f3 available
+ st %f3, [%g2 + 0x00] ! LSU
+ fadds %f4, %f8, %f4 ! FGA Group f4 available
+ st %f4, [%g2 + 0x04] ! LSU
+ fadds %f5, %f9, %f5 ! FGA Group f5 available
+ st %f5, [%g2 + 0x08] ! LSU
+ fadds %f6, %f10, %f6 ! FGA Group f6 available
+ st %f6, [%g2 + 0x0c] ! LSU
+ cmp %o1, %g3 ! IEU1
+ bne 1b ! CTI
+ add %g2, 0x10, %g2 ! IEU0 Group
+2:
+ ba __set_v4f_4
+ nop
+
+ .globl _mesa_sparc_transform_points3_identity
+_mesa_sparc_transform_points3_identity:
+ ld [%o2 + V4F_STRIDE], %o5
+ LDPTR [%o2 + V4F_START], %g1
+ LDPTR [%o0 + V4F_START], %g2
+ ld [%o2 + V4F_COUNT], %g3
+
+ cmp %g3, 0
+ st %g3, [%o0 + V4F_COUNT]
+ be 2f
+ clr %o1
+
+1: ld [%g1 + 0x00], %f0
+ ld [%g1 + 0x04], %f1
+ ld [%g1 + 0x08], %f2
+ add %o1, 1, %o1
+ add %g1, %o5, %g1
+ cmp %o1, %g3
+ st %f0, [%g2 + 0x00]
+ st %f1, [%g2 + 0x04]
+ st %f2, [%g2 + 0x08]
+ bne 1b
+ add %g2, 0x10, %g2
+2:
+ ba __set_v4f_3
+ nop
+
+ .globl _mesa_sparc_transform_points3_2d
+_mesa_sparc_transform_points3_2d:
+ ld [%o2 + V4F_STRIDE], %o5
+ LDPTR [%o2 + V4F_START], %g1
+ LDPTR [%o0 + V4F_START], %g2
+ ld [%o2 + V4F_COUNT], %g3
+
+ LDMATRIX_0_1_4_5_12_13(%o1)
+
+ cmp %g3, 0
+ st %g3, [%o0 + V4F_COUNT]
+ be 2f
+ clr %o1
+
+1: ld [%g1 + 0x00], %f0 ! LSU Group
+ ld [%g1 + 0x04], %f1 ! LSU Group
+ ld [%g1 + 0x08], %f2 ! LSU Group
+ add %o1, 1, %o1 ! IEU0
+ add %g1, %o5, %g1 ! IEU1
+ fmuls %f0, M0, %f3 ! FGM
+ fmuls %f0, M1, %f4 ! FGM Group
+ fmuls %f1, M4, %f6 ! FGM Group
+ fmuls %f1, M5, %f7 ! FGM Group
+ fadds %f3, M12, %f3 ! FGA Group f3 available
+ fadds %f4, M13, %f4 ! FGA Group f4 available
+ fadds %f3, %f6, %f3 ! FGA Group f6 available
+ st %f3, [%g2 + 0x00] ! LSU
+ fadds %f4, %f7, %f4 ! FGA Group f7 available
+ st %f4, [%g2 + 0x04] ! LSU
+ st %f2, [%g2 + 0x08] ! LSU Group
+ cmp %o1, %g3 ! IEU1
+ bne 1b ! CTI
+ add %g2, 0x10, %g2 ! IEU0 Group
+2:
+ ba __set_v4f_3
+ nop
+
+ .globl _mesa_sparc_transform_points3_2d_no_rot
+_mesa_sparc_transform_points3_2d_no_rot:
+ ld [%o2 + V4F_STRIDE], %o5
+ LDPTR [%o2 + V4F_START], %g1
+ LDPTR [%o0 + V4F_START], %g2
+ ld [%o2 + V4F_COUNT], %g3
+
+ LDMATRIX_0_5_12_13(%o1)
+
+ cmp %g3, 0
+ st %g3, [%o0 + V4F_COUNT]
+ be 2f
+ clr %o1
+
+1: ld [%g1 + 0x00], %f0 ! LSU Group
+ ld [%g1 + 0x04], %f1 ! LSU Group
+ ld [%g1 + 0x08], %f2 ! LSU Group
+ add %o1, 1, %o1 ! IEU0
+ add %g1, %o5, %g1 ! IEU1
+ fmuls %f0, M0, %f3 ! FGM
+ fmuls %f1, M5, %f4 ! FGM Group
+ st %f2, [%g2 + 0x08] ! LSU
+ fadds %f3, M12, %f3 ! FGA Group
+ st %f3, [%g2 + 0x00] ! LSU
+ fadds %f4, M13, %f4 ! FGA Group
+ st %f4, [%g2 + 0x04] ! LSU
+ cmp %o1, %g3 ! IEU1
+ bne 1b ! CTI
+ add %g2, 0x10, %g2 ! IEU0 Group
+2:
+ ba __set_v4f_3
+ nop
+
+ .globl _mesa_sparc_transform_points3_3d
+_mesa_sparc_transform_points3_3d:
+ ld [%o2 + V4F_STRIDE], %o5
+ LDPTR [%o2 + V4F_START], %g1
+ LDPTR [%o0 + V4F_START], %g2
+ ld [%o2 + V4F_COUNT], %g3
+
+ LDMATRIX_0_1_2_4_5_6_8_9_10_12_13_14(%o1)
+
+ cmp %g3, 0
+ st %g3, [%o0 + V4F_COUNT]
+ be 2f
+ clr %o1
+
+1: ld [%g1 + 0x00], %f0 ! LSU Group
+ ld [%g1 + 0x04], %f1 ! LSU Group
+ ld [%g1 + 0x08], %f2 ! LSU Group
+ add %o1, 1, %o1 ! IEU0
+ add %g1, %o5, %g1 ! IEU1
+ fmuls %f0, M0, %f3 ! FGM
+ fmuls %f1, M4, %f6 ! FGM Group
+ fmuls %f0, M1, %f4 ! FGM Group
+ fmuls %f1, M5, %f7 ! FGM Group
+ fmuls %f0, M2, %f5 ! FGM Group f3 available
+ fmuls %f1, M6, %f8 ! FGM Group f6 available
+ fadds %f3, %f6, %f3 ! FGA
+ fmuls %f2, M8, %f9 ! FGM Group f4 available
+ fmuls %f2, M9, %f10 ! FGM Group f7 available
+ fadds %f4, %f7, %f4 ! FGA
+ fmuls %f2, M10, %f11 ! FGM Group f5 available
+ fadds %f5, %f8, %f5 ! FGA Group f8, f3 available
+ fadds %f3, %f9, %f3 ! FGA Group f9 available
+ fadds %f4, %f10, %f4 ! FGA Group f10, f4 available
+ fadds %f5, %f11, %f5 ! FGA Group stall, f11, f5 available
+ fadds %f3, M12, %f3 ! FGA Group f3 available
+ st %f3, [%g2 + 0x00] ! LSU
+ fadds %f4, M13, %f4 ! FGA Group f4 available
+ st %f4, [%g2 + 0x04] ! LSU
+ fadds %f5, M14, %f5 ! FGA Group f5 available
+ st %f5, [%g2 + 0x08] ! LSU
+ cmp %o1, %g3 ! IEU1
+ bne 1b ! CTI
+ add %g2, 0x10, %g2 ! IEU0 Group
+2:
+ ba __set_v4f_3
+ nop
+
+ .globl _mesa_sparc_transform_points3_3d_no_rot
+_mesa_sparc_transform_points3_3d_no_rot:
+ ld [%o2 + V4F_STRIDE], %o5
+ LDPTR [%o2 + V4F_START], %g1
+ LDPTR [%o0 + V4F_START], %g2
+ ld [%o2 + V4F_COUNT], %g3
+
+ LDMATRIX_0_5_10_12_13_14(%o1)
+
+ cmp %g3, 0
+ st %g3, [%o0 + V4F_COUNT]
+ be 2f
+ clr %o1
+
+1: ld [%g1 + 0x00], %f0 ! LSU Group
+ ld [%g1 + 0x04], %f1 ! LSU Group
+ ld [%g1 + 0x08], %f2 ! LSU Group
+ add %o1, 1, %o1 ! IEU0
+ add %g1, %o5, %g1 ! IEU1
+ cmp %o1, %g3 ! IEU1 Group
+ fmuls %f0, M0, %f3 ! FGM
+ fmuls %f1, M5, %f4 ! FGM Group
+ fmuls %f2, M10, %f5 ! FGM Group
+ fadds %f3, M12, %f3 ! FGA Group, stall, f3 available
+ st %f3, [%g2 + 0x00] ! LSU
+ fadds %f4, M13, %f4 ! FGA Group, f4 available
+ st %f4, [%g2 + 0x04] ! LSU
+ fadds %f5, M14, %f5 ! FGA Group, f5 available
+ st %f5, [%g2 + 0x08] ! LEU
+ bne 1b ! CTI
+ add %g2, 0x10, %g2 ! IEU0 Group
+2:
+ ba __set_v4f_3
+ nop
+
+ .globl _mesa_sparc_transform_points3_perspective
+_mesa_sparc_transform_points3_perspective:
+ ld [%o2 + V4F_STRIDE], %o5
+ LDPTR [%o2 + V4F_START], %g1
+ LDPTR [%o0 + V4F_START], %g2
+ ld [%o2 + V4F_COUNT], %g3
+
+ LDMATRIX_0_5_8_9_10_14(%o1)
+
+ cmp %g3, 0
+ st %g3, [%o0 + V4F_COUNT]
+ be 2f
+ clr %o1
+
+1: ld [%g1 + 0x00], %f0 ! LSU Group
+ ld [%g1 + 0x04], %f1 ! LSU Group
+ ld [%g1 + 0x08], %f2 ! LSU Group
+ add %o1, 1, %o1 ! IEU0
+ add %g1, %o5, %g1 ! IEU1
+ fmuls %f0, M0, %f3 ! FGM
+ fmuls %f2, M8, %f6 ! FGM Group
+ fmuls %f1, M5, %f4 ! FGM Group
+ fmuls %f2, M9, %f7 ! FGM Group
+ fmuls %f2, M10, %f5 ! FGM Group f3 available
+ fadds %f3, %f6, %f3 ! FGA Group f6 available
+ st %f3, [%g2 + 0x00] ! LSU
+ fadds %f4, %f7, %f4 ! FGA Group stall, f4, f7 available
+ st %f4, [%g2 + 0x04] ! LSU
+ fadds %f5, M14, %f5 ! FGA Group
+ st %f5, [%g2 + 0x08] ! LSU
+ fnegs %f2, %f6 ! FGA Group
+ st %f6, [%g2 + 0x0c] ! LSU
+ cmp %o1, %g3 ! IEU1
+ bne 1b ! CTI
+ add %g2, 0x10, %g2 ! IEU0 Group
+2:
+ ba __set_v4f_4
+ nop
+
+ .globl _mesa_sparc_transform_points4_general
+_mesa_sparc_transform_points4_general:
+ ld [%o2 + V4F_STRIDE], %o5
+ LDPTR [%o2 + V4F_START], %g1
+ LDPTR [%o0 + V4F_START], %g2
+ ld [%o2 + V4F_COUNT], %g3
+
+ LDMATRIX_0_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15(%o1)
+
+ cmp %g3, 0
+ st %g3, [%o0 + V4F_COUNT]
+ be 2f
+ clr %o1
+
+1: ld [%g1 + 0x00], %f0 ! LSU Group
+ ld [%g1 + 0x04], %f1 ! LSU Group
+ ld [%g1 + 0x08], %f2 ! LSU Group
+ ld [%g1 + 0x0c], %f3 ! LSU Group
+ add %o1, 1, %o1 ! IEU0
+ add %g1, %o5, %g1 ! IEU1
+ fmuls %f0, M0, %f4 ! FGM Group
+ fmuls %f1, M4, %f8 ! FGM Group
+ fmuls %f0, M1, %f5 ! FGM Group
+ fmuls %f1, M5, %f9 ! FGM Group
+ fmuls %f0, M2, %f6 ! FGM Group f4 available
+ fmuls %f1, M6, %f10 ! FGM Group f8 available
+ fadds %f4, %f8, %f4 ! FGA
+ fmuls %f0, M3, %f7 ! FGM Group f5 available
+ fmuls %f1, M7, %f11 ! FGM Group f9 available
+ fadds %f5, %f9, %f5 ! FGA
+ fmuls %f2, M8, %f12 ! FGM Group f6 available
+ fmuls %f2, M9, %f13 ! FGM Group f10, f4 available
+ fadds %f6, %f10, %f6 ! FGA
+ fmuls %f2, M10, %f14 ! FGM Group f7 available
+ fmuls %f2, M11, %f15 ! FGM Group f11, f5 available
+ fadds %f7, %f11, %f7 ! FGA
+ fmuls %f3, M12, %f8 ! FGM Group f12 available
+ fadds %f4, %f12, %f4 ! FGA
+ fmuls %f3, M13, %f9 ! FGM Group f13, f6 available
+ fadds %f5, %f13, %f5 ! FGA
+ fmuls %f3, M14, %f10 ! FGM Group f14 available
+ fadds %f6, %f14, %f6 ! FGA
+ fmuls %f3, M15, %f11 ! FGM Group f15, f7 available
+ fadds %f7, %f15, %f7 ! FGA
+ fadds %f4, %f8, %f4 ! FGA Group f8, f4 available
+ st %f4, [%g2 + 0x00] ! LSU
+ fadds %f5, %f9, %f5 ! FGA Group f9, f5 available
+ st %f5, [%g2 + 0x04] ! LSU
+ fadds %f6, %f10, %f6 ! FGA Group f10, f6 available
+ st %f6, [%g2 + 0x08] ! LSU
+ fadds %f7, %f11, %f7 ! FGA Group f11, f7 available
+ st %f7, [%g2 + 0x0c] ! LSU
+ cmp %o1, %g3 ! IEU1
+ bne 1b ! CTI
+ add %g2, 0x10, %g2 ! IEU0 Group
+2:
+ ba __set_v4f_4
+ nop
+
+ .globl _mesa_sparc_transform_points4_identity
+_mesa_sparc_transform_points4_identity:
+ ld [%o2 + V4F_STRIDE], %o5
+ LDPTR [%o2 + V4F_START], %g1
+ LDPTR [%o0 + V4F_START], %g2
+ ld [%o2 + V4F_COUNT], %g3
+
+ cmp %g3, 0
+ st %g3, [%o0 + V4F_COUNT]
+ be 2f
+ clr %o1
+
+1: ld [%g1 + 0x00], %f0
+ ld [%g1 + 0x04], %f1
+ ld [%g1 + 0x08], %f2
+ add %o1, 1, %o1
+ ld [%g1 + 0x0c], %f3
+ add %g1, %o5, %g1
+ st %f0, [%g2 + 0x00]
+ st %f1, [%g2 + 0x04]
+ st %f2, [%g2 + 0x08]
+ cmp %o1, %g3
+ st %f3, [%g2 + 0x0c]
+ bne 1b
+ add %g2, 0x10, %g2
+2:
+ ba __set_v4f_4
+ nop
+
+ .globl _mesa_sparc_transform_points4_2d
+_mesa_sparc_transform_points4_2d:
+ ld [%o2 + V4F_STRIDE], %o5
+ LDPTR [%o2 + V4F_START], %g1
+ LDPTR [%o0 + V4F_START], %g2
+ ld [%o2 + V4F_COUNT], %g3
+
+ LDMATRIX_0_1_4_5_12_13(%o1)
+
+ cmp %g3, 0
+ st %g3, [%o0 + V4F_COUNT]
+ be 2f
+ clr %o1
+
+1: ld [%g1 + 0x00], %f0 ! LSU Group
+ ld [%g1 + 0x04], %f1 ! LSU Group
+ ld [%g1 + 0x08], %f2 ! LSU Group
+ ld [%g1 + 0x0c], %f3 ! LSU Group
+ add %o1, 1, %o1 ! IEU0
+ add %g1, %o5, %g1 ! IEU1
+ fmuls %f0, M0, %f4 ! FGM
+ fmuls %f1, M4, %f8 ! FGM Group
+ fmuls %f0, M1, %f5 ! FGM Group
+ fmuls %f1, M5, %f9 ! FGM Group f4 available
+ fmuls %f3, M12, %f12 ! FGM Group
+ fmuls %f3, M13, %f13 ! FGM Group f8 available
+ fadds %f4, %f8, %f4 ! FGA
+ fadds %f5, %f9, %f5 ! FGA Group stall, f5, f9 available
+ fadds %f4, %f12, %f4 ! FGA Group 2 cycle stall, f4, f12, f13 avail
+ st %f4, [%g2 + 0x00] ! LSU
+ fadds %f5, %f13, %f5 ! FGA Group f5 available
+ st %f5, [%g2 + 0x04] ! LSU
+ st %f2, [%g2 + 0x08] ! LSU Group
+ st %f3, [%g2 + 0x0c] ! LSU Group
+ cmp %o1, %g3 ! IEU1
+ bne 1b ! CTI
+ add %g2, 0x10, %g2 ! IEU0 Group
+2:
+ ba __set_v4f_4
+ nop
+
+ .globl _mesa_sparc_transform_points4_2d_no_rot
+_mesa_sparc_transform_points4_2d_no_rot:
+ ld [%o2 + V4F_STRIDE], %o5
+ LDPTR [%o2 + V4F_START], %g1
+ LDPTR [%o0 + V4F_START], %g2
+ ld [%o2 + V4F_COUNT], %g3
+
+ LDMATRIX_0_1_4_5_12_13(%o1)
+
+ cmp %g3, 0
+ st %g3, [%o0 + V4F_COUNT]
+ be 2f
+ clr %o1
+
+1: ld [%g1 + 0x00], %f0
+ ld [%g1 + 0x04], %f1
+ ld [%g1 + 0x08], %f2
+ ld [%g1 + 0x0c], %f3
+ add %o1, 1, %o1
+ add %g1, %o5, %g1
+ fmuls %f0, M0, %f4
+ fmuls %f3, M12, %f8
+ fmuls %f1, M5, %f5
+ fmuls %f3, M13, %f9
+ fadds %f4, %f8, %f4
+ st %f4, [%g2 + 0x00]
+ fadds %f5, %f9, %f5
+ st %f5, [%g2 + 0x04]
+ st %f2, [%g2 + 0x08]
+ st %f3, [%g2 + 0x0c]
+ cmp %o1, %g3
+ bne 1b
+ add %g2, 0x10, %g2
+2:
+ ba __set_v4f_4
+ nop
+
+ .globl _mesa_sparc_transform_points4_3d
+_mesa_sparc_transform_points4_3d:
+ ld [%o2 + V4F_STRIDE], %o5
+ LDPTR [%o2 + V4F_START], %g1
+ LDPTR [%o0 + V4F_START], %g2
+ ld [%o2 + V4F_COUNT], %g3
+
+ LDMATRIX_0_1_2_4_5_6_8_9_10_12_13_14(%o1)
+
+ cmp %g3, 0
+ st %g3, [%o0 + V4F_COUNT]
+ be 2f
+ clr %o1
+
+1: ld [%g1 + 0x00], %f0 ! LSU Group
+ ld [%g1 + 0x04], %f1 ! LSU Group
+ ld [%g1 + 0x08], %f2 ! LSU Group
+ ld [%g1 + 0x0c], %f3 ! LSU Group
+ add %o1, 1, %o1 ! IEU0
+ add %g1, %o5, %g1 ! IEU1
+ fmuls %f0, M0, %f4 ! FGM
+ fmuls %f1, M4, %f7 ! FGM Group
+ fmuls %f0, M1, %f5 ! FGM Group
+ fmuls %f1, M5, %f8 ! FGM Group
+ fmuls %f0, M2, %f6 ! FGM Group f4 available
+ fmuls %f1, M6, %f9 ! FGM Group f7 available
+ fadds %f4, %f7, %f4 ! FGA
+ fmuls %f2, M8, %f10 ! FGM Group f5 available
+ fmuls %f2, M9, %f11 ! FGM Group f8 available
+ fadds %f5, %f8, %f5 ! FGA
+ fmuls %f2, M10, %f12 ! FGM Group f6 available
+ fmuls %f3, M12, %f13 ! FGM Group f9, f4 available
+ fadds %f6, %f9, %f6 ! FGA
+ fmuls %f3, M13, %f14 ! FGM Group f10 available
+ fadds %f4, %f10, %f4 ! FGA
+ fmuls %f3, M14, %f15 ! FGM Group f11, f5 available
+ fadds %f5, %f11, %f5 ! FGA
+ fadds %f6, %f12, %f6 ! FGA Group stall, f12, f13, f6 available
+ fadds %f4, %f13, %f4 ! FGA Group f14, f4 available
+ st %f4, [%g2 + 0x00] ! LSU
+ fadds %f5, %f14, %f5 ! FGA Group f15, f5 available
+ st %f5, [%g2 + 0x04] ! LSU
+ fadds %f6, %f15, %f6 ! FGA Group f6 available
+ st %f6, [%g2 + 0x08] ! LSU
+ st %f3, [%g2 + 0x0c] ! LSU Group
+ cmp %o1, %g3 ! IEU1
+ bne 1b ! CTI
+ add %g2, 0x10, %g2 ! IEU0 Group
+2:
+ ba __set_v4f_4
+ nop
+
+ .globl _mesa_sparc_transform_points4_3d_no_rot
+_mesa_sparc_transform_points4_3d_no_rot:
+ ld [%o2 + V4F_STRIDE], %o5
+ LDPTR [%o2 + V4F_START], %g1
+ LDPTR [%o0 + V4F_START], %g2
+ ld [%o2 + V4F_COUNT], %g3
+
+ LDMATRIX_0_5_10_12_13_14(%o1)
+
+ cmp %g3, 0
+ st %g3, [%o0 + V4F_COUNT]
+ be 2f
+ clr %o1
+
+1: ld [%g1 + 0x00], %f0 ! LSU Group
+ ld [%g1 + 0x04], %f1 ! LSU Group
+ ld [%g1 + 0x08], %f2 ! LSU Group
+ ld [%g1 + 0x0c], %f3 ! LSU Group
+ add %o1, 1, %o1 ! IEU0
+ add %g1, %o5, %g1 ! IEU1
+ fmuls %f0, M0, %f4 ! FGM
+ fmuls %f3, M12, %f7 ! FGM Group
+ fmuls %f1, M5, %f5 ! FGM Group
+ fmuls %f3, M13, %f8 ! FGM Group
+ fmuls %f2, M10, %f6 ! FGM Group f4 available
+ fmuls %f3, M14, %f9 ! FGM Group f7 available
+ fadds %f4, %f7, %f4 ! FGA
+ st %f4, [%g2 + 0x00] ! LSU
+ fadds %f5, %f8, %f5 ! FGA Group stall, f5, f8 available
+ st %f5, [%g2 + 0x04] ! LSU
+ fadds %f6, %f9, %f6 ! FGA Group stall, f6, f9 available
+ st %f6, [%g2 + 0x08] ! LSU
+ st %f3, [%g2 + 0x0c] ! LSU Group
+ cmp %o1, %g3 ! IEU1
+ bne 1b ! CTI
+ add %g2, 0x10, %g2 ! IEU0 Group
+2:
+ ba __set_v4f_4
+ nop
+
+ .globl _mesa_sparc_transform_points4_perspective
+_mesa_sparc_transform_points4_perspective:
+ ld [%o2 + V4F_STRIDE], %o5
+ LDPTR [%o2 + V4F_START], %g1
+ LDPTR [%o0 + V4F_START], %g2
+ ld [%o2 + V4F_COUNT], %g3
+
+ LDMATRIX_0_5_8_9_10_14(%o1)
+
+ cmp %g3, 0
+ st %g3, [%o0 + V4F_COUNT]
+ be 2f
+ clr %o1
+
+1: ld [%g1 + 0x00], %f0 ! LSU Group
+ ld [%g1 + 0x04], %f1 ! LSU Group
+ ld [%g1 + 0x08], %f2 ! LSU Group
+ ld [%g1 + 0x0c], %f3 ! LSU Group
+ add %o1, 1, %o1 ! IEU0
+ add %g1, %o5, %g1 ! IEU1
+ fmuls %f0, M0, %f4 ! FGM
+ fmuls %f2, M8, %f7 ! FGM Group
+ fmuls %f1, M5, %f5 ! FGM Group
+ fmuls %f2, M9, %f8 ! FGM Group
+ fmuls %f2, M10, %f6 ! FGM Group f4 available
+ fmuls %f3, M14, %f9 ! FGM Group f7 available
+ fadds %f4, %f7, %f4 ! FGA
+ st %f4, [%g2 + 0x00] ! LSU
+ fadds %f5, %f8, %f5 ! FGA Group stall, f5, f8 available
+ st %f5, [%g2 + 0x04] ! LSU
+ fadds %f6, %f9, %f6 ! FGA Group stall, f6, f9 available
+ st %f6, [%g2 + 0x08] ! LSU
+ fnegs %f2, %f7 ! FGA Group
+ st %f7, [%g2 + 0x0c] ! LSU
+ cmp %o1, %g3 ! IEU1
+ bne 1b ! CTI
+ add %g2, 0x10, %g2 ! IEU0 Group
+2:
+ ba __set_v4f_4
+ nop