]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - arch/sparc/lib/csum_copy.S
Merge tag 'scsi-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi
[mirror_ubuntu-artful-kernel.git] / arch / sparc / lib / csum_copy.S
1 /* csum_copy.S: Checksum+copy code for sparc64
2 *
3 * Copyright (C) 2005 David S. Miller <davem@davemloft.net>
4 */
5
6 #include <asm/export.h>
7
8 #ifdef __KERNEL__
9 #define GLOBAL_SPARE %g7
10 #else
11 #define GLOBAL_SPARE %g5
12 #endif
13
14 #ifndef EX_LD
15 #define EX_LD(x) x
16 #endif
17
18 #ifndef EX_ST
19 #define EX_ST(x) x
20 #endif
21
22 #ifndef EX_RETVAL
23 #define EX_RETVAL(x) x
24 #endif
25
26 #ifndef LOAD
27 #define LOAD(type,addr,dest) type [addr], dest
28 #endif
29
30 #ifndef STORE
31 #define STORE(type,src,addr) type src, [addr]
32 #endif
33
34 #ifndef FUNC_NAME
35 #define FUNC_NAME csum_partial_copy_nocheck
36 #endif
37
38 .register %g2, #scratch
39 .register %g3, #scratch
40
41 .text
42
43 90:
44 /* We checked for zero length already, so there must be
45 * at least one byte.
46 */
47 be,pt %icc, 1f
48 nop
49 EX_LD(LOAD(ldub, %o0 + 0x00, %o4))
50 add %o0, 1, %o0
51 sub %o2, 1, %o2
52 EX_ST(STORE(stb, %o4, %o1 + 0x00))
53 add %o1, 1, %o1
54 1: andcc %o0, 0x2, %g0
55 be,pn %icc, 80f
56 cmp %o2, 2
57 blu,pn %icc, 60f
58 nop
59 EX_LD(LOAD(lduh, %o0 + 0x00, %o5))
60 add %o0, 2, %o0
61 sub %o2, 2, %o2
62 EX_ST(STORE(sth, %o5, %o1 + 0x00))
63 add %o1, 2, %o1
64 ba,pt %xcc, 80f
65 add %o5, %o4, %o4
66
67 .globl FUNC_NAME
68 .type FUNC_NAME,#function
69 EXPORT_SYMBOL(FUNC_NAME)
70 FUNC_NAME: /* %o0=src, %o1=dst, %o2=len, %o3=sum */
71 LOAD(prefetch, %o0 + 0x000, #n_reads)
72 xor %o0, %o1, %g1
73 clr %o4
74 andcc %g1, 0x3, %g0
75 bne,pn %icc, 95f
76 LOAD(prefetch, %o0 + 0x040, #n_reads)
77
78 brz,pn %o2, 70f
79 andcc %o0, 0x3, %g0
80
81 /* We "remember" whether the lowest bit in the address
82 * was set in GLOBAL_SPARE. Because if it is, we have to swap
83 * upper and lower 8 bit fields of the sum we calculate.
84 */
85 bne,pn %icc, 90b
86 andcc %o0, 0x1, GLOBAL_SPARE
87
88 80:
89 LOAD(prefetch, %o0 + 0x080, #n_reads)
90 andncc %o2, 0x3f, %g3
91
92 LOAD(prefetch, %o0 + 0x0c0, #n_reads)
93 sub %o2, %g3, %o2
94 brz,pn %g3, 2f
95 LOAD(prefetch, %o0 + 0x100, #n_reads)
96
97 /* So that we don't need to use the non-pairing
98 * add-with-carry instructions we accumulate 32-bit
99 * values into a 64-bit register. At the end of the
100 * loop we fold it down to 32-bits and so on.
101 */
102 ba,pt %xcc, 1f
103 LOAD(prefetch, %o0 + 0x140, #n_reads)
104
105 .align 32
106 1: EX_LD(LOAD(lduw, %o0 + 0x00, %o5))
107 EX_LD(LOAD(lduw, %o0 + 0x04, %g1))
108 EX_LD(LOAD(lduw, %o0 + 0x08, %g2))
109 add %o4, %o5, %o4
110 EX_ST(STORE(stw, %o5, %o1 + 0x00))
111 EX_LD(LOAD(lduw, %o0 + 0x0c, %o5))
112 add %o4, %g1, %o4
113 EX_ST(STORE(stw, %g1, %o1 + 0x04))
114 EX_LD(LOAD(lduw, %o0 + 0x10, %g1))
115 add %o4, %g2, %o4
116 EX_ST(STORE(stw, %g2, %o1 + 0x08))
117 EX_LD(LOAD(lduw, %o0 + 0x14, %g2))
118 add %o4, %o5, %o4
119 EX_ST(STORE(stw, %o5, %o1 + 0x0c))
120 EX_LD(LOAD(lduw, %o0 + 0x18, %o5))
121 add %o4, %g1, %o4
122 EX_ST(STORE(stw, %g1, %o1 + 0x10))
123 EX_LD(LOAD(lduw, %o0 + 0x1c, %g1))
124 add %o4, %g2, %o4
125 EX_ST(STORE(stw, %g2, %o1 + 0x14))
126 EX_LD(LOAD(lduw, %o0 + 0x20, %g2))
127 add %o4, %o5, %o4
128 EX_ST(STORE(stw, %o5, %o1 + 0x18))
129 EX_LD(LOAD(lduw, %o0 + 0x24, %o5))
130 add %o4, %g1, %o4
131 EX_ST(STORE(stw, %g1, %o1 + 0x1c))
132 EX_LD(LOAD(lduw, %o0 + 0x28, %g1))
133 add %o4, %g2, %o4
134 EX_ST(STORE(stw, %g2, %o1 + 0x20))
135 EX_LD(LOAD(lduw, %o0 + 0x2c, %g2))
136 add %o4, %o5, %o4
137 EX_ST(STORE(stw, %o5, %o1 + 0x24))
138 EX_LD(LOAD(lduw, %o0 + 0x30, %o5))
139 add %o4, %g1, %o4
140 EX_ST(STORE(stw, %g1, %o1 + 0x28))
141 EX_LD(LOAD(lduw, %o0 + 0x34, %g1))
142 add %o4, %g2, %o4
143 EX_ST(STORE(stw, %g2, %o1 + 0x2c))
144 EX_LD(LOAD(lduw, %o0 + 0x38, %g2))
145 add %o4, %o5, %o4
146 EX_ST(STORE(stw, %o5, %o1 + 0x30))
147 EX_LD(LOAD(lduw, %o0 + 0x3c, %o5))
148 add %o4, %g1, %o4
149 EX_ST(STORE(stw, %g1, %o1 + 0x34))
150 LOAD(prefetch, %o0 + 0x180, #n_reads)
151 add %o4, %g2, %o4
152 EX_ST(STORE(stw, %g2, %o1 + 0x38))
153 subcc %g3, 0x40, %g3
154 add %o0, 0x40, %o0
155 add %o4, %o5, %o4
156 EX_ST(STORE(stw, %o5, %o1 + 0x3c))
157 bne,pt %icc, 1b
158 add %o1, 0x40, %o1
159
160 2: and %o2, 0x3c, %g3
161 brz,pn %g3, 2f
162 sub %o2, %g3, %o2
163 1: EX_LD(LOAD(lduw, %o0 + 0x00, %o5))
164 subcc %g3, 0x4, %g3
165 add %o0, 0x4, %o0
166 add %o4, %o5, %o4
167 EX_ST(STORE(stw, %o5, %o1 + 0x00))
168 bne,pt %icc, 1b
169 add %o1, 0x4, %o1
170
171 2:
172 /* fold 64-->32 */
173 srlx %o4, 32, %o5
174 srl %o4, 0, %o4
175 add %o4, %o5, %o4
176 srlx %o4, 32, %o5
177 srl %o4, 0, %o4
178 add %o4, %o5, %o4
179
180 /* fold 32-->16 */
181 sethi %hi(0xffff0000), %g1
182 srl %o4, 16, %o5
183 andn %o4, %g1, %g2
184 add %o5, %g2, %o4
185 srl %o4, 16, %o5
186 andn %o4, %g1, %g2
187 add %o5, %g2, %o4
188
189 60:
190 /* %o4 has the 16-bit sum we have calculated so-far. */
191 cmp %o2, 2
192 blu,pt %icc, 1f
193 nop
194 EX_LD(LOAD(lduh, %o0 + 0x00, %o5))
195 sub %o2, 2, %o2
196 add %o0, 2, %o0
197 add %o4, %o5, %o4
198 EX_ST(STORE(sth, %o5, %o1 + 0x00))
199 add %o1, 0x2, %o1
200 1: brz,pt %o2, 1f
201 nop
202 EX_LD(LOAD(ldub, %o0 + 0x00, %o5))
203 sub %o2, 1, %o2
204 add %o0, 1, %o0
205 EX_ST(STORE(stb, %o5, %o1 + 0x00))
206 sllx %o5, 8, %o5
207 add %o1, 1, %o1
208 add %o4, %o5, %o4
209 1:
210 /* fold 32-->16 */
211 sethi %hi(0xffff0000), %g1
212 srl %o4, 16, %o5
213 andn %o4, %g1, %g2
214 add %o5, %g2, %o4
215 srl %o4, 16, %o5
216 andn %o4, %g1, %g2
217 add %o5, %g2, %o4
218
219 1: brz,pt GLOBAL_SPARE, 1f
220 nop
221
222 /* We started with an odd byte, byte-swap the result. */
223 srl %o4, 8, %o5
224 and %o4, 0xff, %g1
225 sll %g1, 8, %g1
226 or %o5, %g1, %o4
227
228 1: addcc %o3, %o4, %o3
229 addc %g0, %o3, %o3
230
231 70:
232 retl
233 srl %o3, 0, %o0
234
235 95: mov 0, GLOBAL_SPARE
236 brlez,pn %o2, 4f
237 andcc %o0, 1, %o5
238 be,a,pt %icc, 1f
239 srl %o2, 1, %g1
240 sub %o2, 1, %o2
241 EX_LD(LOAD(ldub, %o0, GLOBAL_SPARE))
242 add %o0, 1, %o0
243 EX_ST(STORE(stb, GLOBAL_SPARE, %o1))
244 srl %o2, 1, %g1
245 add %o1, 1, %o1
246 1: brz,a,pn %g1, 3f
247 andcc %o2, 1, %g0
248 andcc %o0, 2, %g0
249 be,a,pt %icc, 1f
250 srl %g1, 1, %g1
251 EX_LD(LOAD(lduh, %o0, %o4))
252 sub %o2, 2, %o2
253 srl %o4, 8, %g2
254 sub %g1, 1, %g1
255 EX_ST(STORE(stb, %g2, %o1))
256 add %o4, GLOBAL_SPARE, GLOBAL_SPARE
257 EX_ST(STORE(stb, %o4, %o1 + 1))
258 add %o0, 2, %o0
259 srl %g1, 1, %g1
260 add %o1, 2, %o1
261 1: brz,a,pn %g1, 2f
262 andcc %o2, 2, %g0
263 EX_LD(LOAD(lduw, %o0, %o4))
264 5: srl %o4, 24, %g2
265 srl %o4, 16, %g3
266 EX_ST(STORE(stb, %g2, %o1))
267 srl %o4, 8, %g2
268 EX_ST(STORE(stb, %g3, %o1 + 1))
269 add %o0, 4, %o0
270 EX_ST(STORE(stb, %g2, %o1 + 2))
271 addcc %o4, GLOBAL_SPARE, GLOBAL_SPARE
272 EX_ST(STORE(stb, %o4, %o1 + 3))
273 addc GLOBAL_SPARE, %g0, GLOBAL_SPARE
274 add %o1, 4, %o1
275 subcc %g1, 1, %g1
276 bne,a,pt %icc, 5b
277 EX_LD(LOAD(lduw, %o0, %o4))
278 sll GLOBAL_SPARE, 16, %g2
279 srl GLOBAL_SPARE, 16, GLOBAL_SPARE
280 srl %g2, 16, %g2
281 andcc %o2, 2, %g0
282 add %g2, GLOBAL_SPARE, GLOBAL_SPARE
283 2: be,a,pt %icc, 3f
284 andcc %o2, 1, %g0
285 EX_LD(LOAD(lduh, %o0, %o4))
286 andcc %o2, 1, %g0
287 srl %o4, 8, %g2
288 add %o0, 2, %o0
289 EX_ST(STORE(stb, %g2, %o1))
290 add GLOBAL_SPARE, %o4, GLOBAL_SPARE
291 EX_ST(STORE(stb, %o4, %o1 + 1))
292 add %o1, 2, %o1
293 3: be,a,pt %icc, 1f
294 sll GLOBAL_SPARE, 16, %o4
295 EX_LD(LOAD(ldub, %o0, %g2))
296 sll %g2, 8, %o4
297 EX_ST(STORE(stb, %g2, %o1))
298 add GLOBAL_SPARE, %o4, GLOBAL_SPARE
299 sll GLOBAL_SPARE, 16, %o4
300 1: addcc %o4, GLOBAL_SPARE, GLOBAL_SPARE
301 srl GLOBAL_SPARE, 16, %o4
302 addc %g0, %o4, GLOBAL_SPARE
303 brz,pt %o5, 4f
304 srl GLOBAL_SPARE, 8, %o4
305 and GLOBAL_SPARE, 0xff, %g2
306 and %o4, 0xff, %o4
307 sll %g2, 8, %g2
308 or %g2, %o4, GLOBAL_SPARE
309 4: addcc %o3, GLOBAL_SPARE, %o3
310 addc %g0, %o3, %o0
311 retl
312 srl %o0, 0, %o0
313 .size FUNC_NAME, .-FUNC_NAME