1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
|
/*
* Clip testing in SPARC assembly
*/
#if __arch64__
#define LDPTR ldx
#define V4F_DATA 0x00
#define V4F_START 0x08
#define V4F_COUNT 0x10
#define V4F_STRIDE 0x14
#define V4F_SIZE 0x18
#define V4F_FLAGS 0x1c
#else
#define LDPTR ld
#define V4F_DATA 0x00
#define V4F_START 0x04
#define V4F_COUNT 0x08
#define V4F_STRIDE 0x0c
#define V4F_SIZE 0x10
#define V4F_FLAGS 0x14
#endif
#define VEC_SIZE_1 1
#define VEC_SIZE_2 3
#define VEC_SIZE_3 7
#define VEC_SIZE_4 15
.register %g2, #scratch
.register %g3, #scratch
.text
.align 64
one_dot_zero:
.word 0x3f800000 /* 1.0f */
/* This trick is shamelessly stolen from the x86
* Mesa asm. Very clever, and we can do it too
* since we have the necessary add with carry
* instructions on Sparc.
*/
clip_table:
.byte 0, 1, 0, 2, 4, 5, 4, 6
.byte 0, 1, 0, 2, 8, 9, 8, 10
.byte 32, 33, 32, 34, 36, 37, 36, 38
.byte 32, 33, 32, 34, 40, 41, 40, 42
.byte 0, 1, 0, 2, 4, 5, 4, 6
.byte 0, 1, 0, 2, 8, 9, 8, 10
.byte 16, 17, 16, 18, 20, 21, 20, 22
.byte 16, 17, 16, 18, 24, 25, 24, 26
.byte 63, 61, 63, 62, 55, 53, 55, 54
.byte 63, 61, 63, 62, 59, 57, 59, 58
.byte 47, 45, 47, 46, 39, 37, 39, 38
.byte 47, 45, 47, 46, 43, 41, 43, 42
.byte 63, 61, 63, 62, 55, 53, 55, 54
.byte 63, 61, 63, 62, 59, 57, 59, 58
.byte 31, 29, 31, 30, 23, 21, 23, 22
.byte 31, 29, 31, 30, 27, 25, 27, 26
/* GLvector4f *clip_vec, GLvector4f *proj_vec,
GLubyte clipMask[], GLubyte *orMask, GLubyte *andMask */
.align 64
__pc_tramp:
retl
nop
.globl _mesa_sparc_cliptest_points4
_mesa_sparc_cliptest_points4:
save %sp, -64, %sp
call __pc_tramp
sub %o7, (. - one_dot_zero - 4), %g1
ld [%g1 + 0x0], %f4
add %g1, 0x4, %g1
ld [%i0 + V4F_STRIDE], %l1
ld [%i0 + V4F_COUNT], %l3
LDPTR [%i0 + V4F_START], %i0
LDPTR [%i1 + V4F_START], %i5
ldub [%i3], %g2
ldub [%i4], %g3
sll %g3, 8, %g3
or %g2, %g3, %g2
ld [%i1 + V4F_FLAGS], %g3
or %g3, VEC_SIZE_4, %g3
st %g3, [%i1 + V4F_FLAGS]
mov 3, %g3
st %g3, [%i1 + V4F_SIZE]
st %l3, [%i1 + V4F_COUNT]
clr %l2
clr %l0
/* l0: i
* l3: count
* l1: stride
* l2: c
* g2: (tmpAndMask << 8) | tmpOrMask
* g1: clip_table
* i0: from[stride][i]
* i2: clipMask
* i5: vProj[4][i]
*/
1: ld [%i0 + 0x0c], %f3 ! LSU Group
ld [%i0 + 0x0c], %g5 ! LSU Group
ld [%i0 + 0x08], %g4 ! LSU Group
fdivs %f4, %f3, %f8 ! FGM
addcc %g5, %g5, %g5 ! IEU1 Group
addx %g0, 0x0, %g3 ! IEU1 Group
addcc %g4, %g4, %g4 ! IEU1 Group
addx %g3, %g3, %g3 ! IEU1 Group
subcc %g5, %g4, %g0 ! IEU1 Group
ld [%i0 + 0x04], %g4 ! LSU Group
addx %g3, %g3, %g3 ! IEU1 Group
addcc %g4, %g4, %g4 ! IEU1 Group
addx %g3, %g3, %g3 ! IEU1 Group
subcc %g5, %g4, %g0 ! IEU1 Group
ld [%i0 + 0x00], %g4 ! LSU Group
addx %g3, %g3, %g3 ! IEU1 Group
addcc %g4, %g4, %g4 ! IEU1 Group
addx %g3, %g3, %g3 ! IEU1 Group
subcc %g5, %g4, %g0 ! IEU1 Group
addx %g3, %g3, %g3 ! IEU1 Group
ldub [%g1 + %g3], %g3 ! LSU Group
cmp %g3, 0 ! IEU1 Group, stall
be 2f ! CTI
stb %g3, [%i2] ! LSU
sll %g3, 8, %g4 ! IEU1 Group
add %l2, 1, %l2 ! IEU0
st %g0, [%i5 + 0x00] ! LSU
or %g4, 0xff, %g4 ! IEU0 Group
or %g2, %g3, %g2 ! IEU1
st %g0, [%i5 + 0x04] ! LSU
and %g2, %g4, %g2 ! IEU0 Group
st %g0, [%i5 + 0x08] ! LSU
b 3f ! CTI
st %f4, [%i5 + 0x0c] ! LSU Group
2: ld [%i0 + 0x00], %f0 ! LSU Group
ld [%i0 + 0x04], %f1 ! LSU Group
ld [%i0 + 0x08], %f2 ! LSU Group
fmuls %f0, %f8, %f0 ! FGM
st %f0, [%i5 + 0x00] ! LSU Group
fmuls %f1, %f8, %f1 ! FGM
st %f1, [%i5 + 0x04] ! LSU Group
fmuls %f2, %f8, %f2 ! FGM
st %f2, [%i5 + 0x08] ! LSU Group
st %f8, [%i5 + 0x0c] ! LSU Group
3: add %i5, 0x10, %i5 ! IEU1
add %l0, 1, %l0 ! IEU0 Group
add %i2, 1, %i2 ! IEU0 Group
cmp %l0, %l3 ! IEU1 Group
bne 1b ! CTI
add %i0, %l1, %i0 ! IEU0 Group
stb %g2, [%i3] ! LSU
srl %g2, 8, %g3 ! IEU0 Group
cmp %l2, %l3 ! IEU1 Group
bl,a 1f ! CTI
clr %g3 ! IEU0
1: stb %g3, [%i4] ! LSU Group
ret ! CTI Group
restore %i1, 0x0, %o0
.globl _mesa_sparc_cliptest_points4_np
_mesa_sparc_cliptest_points4_np:
save %sp, -64, %sp
call __pc_tramp
sub %o7, (. - one_dot_zero - 4), %g1
add %g1, 0x4, %g1
ld [%i0 + V4F_STRIDE], %l1
ld [%i0 + V4F_COUNT], %l3
LDPTR [%i0 + V4F_START], %i0
ldub [%i3], %g2
ldub [%i4], %g3
sll %g3, 8, %g3
or %g2, %g3, %g2
clr %l2
clr %l0
/* l0: i
* l3: count
* l1: stride
* l2: c
* g2: (tmpAndMask << 8) | tmpOrMask
* g1: clip_table
* i0: from[stride][i]
* i2: clipMask
*/
1: ld [%i0 + 0x0c], %g5 ! LSU Group
ld [%i0 + 0x08], %g4 ! LSU Group
addcc %g5, %g5, %g5 ! IEU1 Group
addx %g0, 0x0, %g3 ! IEU1 Group
addcc %g4, %g4, %g4 ! IEU1 Group
addx %g3, %g3, %g3 ! IEU1 Group
subcc %g5, %g4, %g0 ! IEU1 Group
ld [%i0 + 0x04], %g4 ! LSU Group
addx %g3, %g3, %g3 ! IEU1 Group
addcc %g4, %g4, %g4 ! IEU1 Group
addx %g3, %g3, %g3 ! IEU1 Group
subcc %g5, %g4, %g0 ! IEU1 Group
ld [%i0 + 0x00], %g4 ! LSU Group
addx %g3, %g3, %g3 ! IEU1 Group
addcc %g4, %g4, %g4 ! IEU1 Group
addx %g3, %g3, %g3 ! IEU1 Group
subcc %g5, %g4, %g0 ! IEU1 Group
addx %g3, %g3, %g3 ! IEU1 Group
ldub [%g1 + %g3], %g3 ! LSU Group
cmp %g3, 0 ! IEU1 Group, stall
be 2f ! CTI
stb %g3, [%i2] ! LSU
sll %g3, 8, %g4 ! IEU1 Group
add %l2, 1, %l2 ! IEU0
or %g4, 0xff, %g4 ! IEU0 Group
or %g2, %g3, %g2 ! IEU1
and %g2, %g4, %g2 ! IEU0 Group
2: add %l0, 1, %l0 ! IEU0 Group
add %i2, 1, %i2 ! IEU0 Group
cmp %l0, %l3 ! IEU1 Group
bne 1b ! CTI
add %i0, %l1, %i0 ! IEU0 Group
stb %g2, [%i3] ! LSU
srl %g2, 8, %g3 ! IEU0 Group
cmp %l2, %l3 ! IEU1 Group
bl,a 1f ! CTI
clr %g3 ! IEU0
1: stb %g3, [%i4] ! LSU Group
ret ! CTI Group
restore %i1, 0x0, %o0
|