]> git.proxmox.com Git - mirror_ubuntu-eoan-kernel.git/blob - arch/powerpc/lib/checksum_32.S
treewide: Replace GPLv2 boilerplate/reference with SPDX - rule 152
[mirror_ubuntu-eoan-kernel.git] / arch / powerpc / lib / checksum_32.S
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3 * This file contains assembly-language implementations
4 * of IP-style 1's complement checksum routines.
5 *
6 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
7 *
8 * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
9 */
10
11 #include <linux/sys.h>
12 #include <asm/processor.h>
13 #include <asm/cache.h>
14 #include <asm/errno.h>
15 #include <asm/ppc_asm.h>
16 #include <asm/export.h>
17
18 .text
19
20 /*
21 * computes the checksum of a memory block at buff, length len,
22 * and adds in "sum" (32-bit)
23 *
24 * __csum_partial(buff, len, sum)
25 */
26 _GLOBAL(__csum_partial)
27 subi r3,r3,4
28 srawi. r6,r4,2 /* Divide len by 4 and also clear carry */
29 beq 3f /* if we're doing < 4 bytes */
30 andi. r0,r3,2 /* Align buffer to longword boundary */
31 beq+ 1f
32 lhz r0,4(r3) /* do 2 bytes to get aligned */
33 subi r4,r4,2
34 addi r3,r3,2
35 srwi. r6,r4,2 /* # words to do */
36 adde r5,r5,r0
37 beq 3f
38 1: andi. r6,r6,3 /* Prepare to handle words 4 by 4 */
39 beq 21f
40 mtctr r6
41 2: lwzu r0,4(r3)
42 adde r5,r5,r0
43 bdnz 2b
44 21: srwi. r6,r4,4 /* # blocks of 4 words to do */
45 beq 3f
46 lwz r0,4(r3)
47 mtctr r6
48 lwz r6,8(r3)
49 adde r5,r5,r0
50 lwz r7,12(r3)
51 adde r5,r5,r6
52 lwzu r8,16(r3)
53 adde r5,r5,r7
54 bdz 23f
55 22: lwz r0,4(r3)
56 adde r5,r5,r8
57 lwz r6,8(r3)
58 adde r5,r5,r0
59 lwz r7,12(r3)
60 adde r5,r5,r6
61 lwzu r8,16(r3)
62 adde r5,r5,r7
63 bdnz 22b
64 23: adde r5,r5,r8
65 3: andi. r0,r4,2
66 beq+ 4f
67 lhz r0,4(r3)
68 addi r3,r3,2
69 adde r5,r5,r0
70 4: andi. r0,r4,1
71 beq+ 5f
72 lbz r0,4(r3)
73 slwi r0,r0,8 /* Upper byte of word */
74 adde r5,r5,r0
75 5: addze r3,r5 /* add in final carry */
76 blr
77 EXPORT_SYMBOL(__csum_partial)
78
79 /*
80 * Computes the checksum of a memory block at src, length len,
81 * and adds in "sum" (32-bit), while copying the block to dst.
82 * If an access exception occurs on src or dst, it stores -EFAULT
83 * to *src_err or *dst_err respectively, and (for an error on
84 * src) zeroes the rest of dst.
85 *
86 * csum_partial_copy_generic(src, dst, len, sum, src_err, dst_err)
87 */
88 #define CSUM_COPY_16_BYTES_WITHEX(n) \
89 8 ## n ## 0: \
90 lwz r7,4(r4); \
91 8 ## n ## 1: \
92 lwz r8,8(r4); \
93 8 ## n ## 2: \
94 lwz r9,12(r4); \
95 8 ## n ## 3: \
96 lwzu r10,16(r4); \
97 8 ## n ## 4: \
98 stw r7,4(r6); \
99 adde r12,r12,r7; \
100 8 ## n ## 5: \
101 stw r8,8(r6); \
102 adde r12,r12,r8; \
103 8 ## n ## 6: \
104 stw r9,12(r6); \
105 adde r12,r12,r9; \
106 8 ## n ## 7: \
107 stwu r10,16(r6); \
108 adde r12,r12,r10
109
110 #define CSUM_COPY_16_BYTES_EXCODE(n) \
111 EX_TABLE(8 ## n ## 0b, src_error); \
112 EX_TABLE(8 ## n ## 1b, src_error); \
113 EX_TABLE(8 ## n ## 2b, src_error); \
114 EX_TABLE(8 ## n ## 3b, src_error); \
115 EX_TABLE(8 ## n ## 4b, dst_error); \
116 EX_TABLE(8 ## n ## 5b, dst_error); \
117 EX_TABLE(8 ## n ## 6b, dst_error); \
118 EX_TABLE(8 ## n ## 7b, dst_error);
119
120 .text
121 .stabs "arch/powerpc/lib/",N_SO,0,0,0f
122 .stabs "checksum_32.S",N_SO,0,0,0f
123 0:
124
125 CACHELINE_BYTES = L1_CACHE_BYTES
126 LG_CACHELINE_BYTES = L1_CACHE_SHIFT
127 CACHELINE_MASK = (L1_CACHE_BYTES-1)
128
129 _GLOBAL(csum_partial_copy_generic)
130 stwu r1,-16(r1)
131 stw r7,12(r1)
132 stw r8,8(r1)
133
134 addic r12,r6,0
135 addi r6,r4,-4
136 neg r0,r4
137 addi r4,r3,-4
138 andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */
139 crset 4*cr7+eq
140 beq 58f
141
142 cmplw 0,r5,r0 /* is this more than total to do? */
143 blt 63f /* if not much to do */
144 rlwinm r7,r6,3,0x8
145 rlwnm r12,r12,r7,0,31 /* odd destination address: rotate one byte */
146 cmplwi cr7,r7,0 /* is destination address even ? */
147 andi. r8,r0,3 /* get it word-aligned first */
148 mtctr r8
149 beq+ 61f
150 li r3,0
151 70: lbz r9,4(r4) /* do some bytes */
152 addi r4,r4,1
153 slwi r3,r3,8
154 rlwimi r3,r9,0,24,31
155 71: stb r9,4(r6)
156 addi r6,r6,1
157 bdnz 70b
158 adde r12,r12,r3
159 61: subf r5,r0,r5
160 srwi. r0,r0,2
161 mtctr r0
162 beq 58f
163 72: lwzu r9,4(r4) /* do some words */
164 adde r12,r12,r9
165 73: stwu r9,4(r6)
166 bdnz 72b
167
168 58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
169 clrlwi r5,r5,32-LG_CACHELINE_BYTES
170 li r11,4
171 beq 63f
172
173 /* Here we decide how far ahead to prefetch the source */
174 li r3,4
175 cmpwi r0,1
176 li r7,0
177 ble 114f
178 li r7,1
179 #if MAX_COPY_PREFETCH > 1
180 /* Heuristically, for large transfers we prefetch
181 MAX_COPY_PREFETCH cachelines ahead. For small transfers
182 we prefetch 1 cacheline ahead. */
183 cmpwi r0,MAX_COPY_PREFETCH
184 ble 112f
185 li r7,MAX_COPY_PREFETCH
186 112: mtctr r7
187 111: dcbt r3,r4
188 addi r3,r3,CACHELINE_BYTES
189 bdnz 111b
190 #else
191 dcbt r3,r4
192 addi r3,r3,CACHELINE_BYTES
193 #endif /* MAX_COPY_PREFETCH > 1 */
194
195 114: subf r8,r7,r0
196 mr r0,r7
197 mtctr r8
198
199 53: dcbt r3,r4
200 54: dcbz r11,r6
201 /* the main body of the cacheline loop */
202 CSUM_COPY_16_BYTES_WITHEX(0)
203 #if L1_CACHE_BYTES >= 32
204 CSUM_COPY_16_BYTES_WITHEX(1)
205 #if L1_CACHE_BYTES >= 64
206 CSUM_COPY_16_BYTES_WITHEX(2)
207 CSUM_COPY_16_BYTES_WITHEX(3)
208 #if L1_CACHE_BYTES >= 128
209 CSUM_COPY_16_BYTES_WITHEX(4)
210 CSUM_COPY_16_BYTES_WITHEX(5)
211 CSUM_COPY_16_BYTES_WITHEX(6)
212 CSUM_COPY_16_BYTES_WITHEX(7)
213 #endif
214 #endif
215 #endif
216 bdnz 53b
217 cmpwi r0,0
218 li r3,4
219 li r7,0
220 bne 114b
221
222 63: srwi. r0,r5,2
223 mtctr r0
224 beq 64f
225 30: lwzu r0,4(r4)
226 adde r12,r12,r0
227 31: stwu r0,4(r6)
228 bdnz 30b
229
230 64: andi. r0,r5,2
231 beq+ 65f
232 40: lhz r0,4(r4)
233 addi r4,r4,2
234 41: sth r0,4(r6)
235 adde r12,r12,r0
236 addi r6,r6,2
237 65: andi. r0,r5,1
238 beq+ 66f
239 50: lbz r0,4(r4)
240 51: stb r0,4(r6)
241 slwi r0,r0,8
242 adde r12,r12,r0
243 66: addze r3,r12
244 addi r1,r1,16
245 beqlr+ cr7
246 rlwinm r3,r3,8,0,31 /* odd destination address: rotate one byte */
247 blr
248
249 /* read fault */
250 src_error:
251 lwz r7,12(r1)
252 addi r1,r1,16
253 cmpwi cr0,r7,0
254 beqlr
255 li r0,-EFAULT
256 stw r0,0(r7)
257 blr
258 /* write fault */
259 dst_error:
260 lwz r8,8(r1)
261 addi r1,r1,16
262 cmpwi cr0,r8,0
263 beqlr
264 li r0,-EFAULT
265 stw r0,0(r8)
266 blr
267
268 EX_TABLE(70b, src_error);
269 EX_TABLE(71b, dst_error);
270 EX_TABLE(72b, src_error);
271 EX_TABLE(73b, dst_error);
272 EX_TABLE(54b, dst_error);
273
274 /*
275 * this stuff handles faults in the cacheline loop and branches to either
276 * src_error (if in read part) or dst_error (if in write part)
277 */
278 CSUM_COPY_16_BYTES_EXCODE(0)
279 #if L1_CACHE_BYTES >= 32
280 CSUM_COPY_16_BYTES_EXCODE(1)
281 #if L1_CACHE_BYTES >= 64
282 CSUM_COPY_16_BYTES_EXCODE(2)
283 CSUM_COPY_16_BYTES_EXCODE(3)
284 #if L1_CACHE_BYTES >= 128
285 CSUM_COPY_16_BYTES_EXCODE(4)
286 CSUM_COPY_16_BYTES_EXCODE(5)
287 CSUM_COPY_16_BYTES_EXCODE(6)
288 CSUM_COPY_16_BYTES_EXCODE(7)
289 #endif
290 #endif
291 #endif
292
293 EX_TABLE(30b, src_error);
294 EX_TABLE(31b, dst_error);
295 EX_TABLE(40b, src_error);
296 EX_TABLE(41b, dst_error);
297 EX_TABLE(50b, src_error);
298 EX_TABLE(51b, dst_error);
299
300 EXPORT_SYMBOL(csum_partial_copy_generic)
301
302 /*
303 * __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
304 * const struct in6_addr *daddr,
305 * __u32 len, __u8 proto, __wsum sum)
306 */
307
308 _GLOBAL(csum_ipv6_magic)
309 lwz r8, 0(r3)
310 lwz r9, 4(r3)
311 addc r0, r7, r8
312 lwz r10, 8(r3)
313 adde r0, r0, r9
314 lwz r11, 12(r3)
315 adde r0, r0, r10
316 lwz r8, 0(r4)
317 adde r0, r0, r11
318 lwz r9, 4(r4)
319 adde r0, r0, r8
320 lwz r10, 8(r4)
321 adde r0, r0, r9
322 lwz r11, 12(r4)
323 adde r0, r0, r10
324 add r5, r5, r6 /* assumption: len + proto doesn't carry */
325 adde r0, r0, r11
326 adde r0, r0, r5
327 addze r0, r0
328 rotlwi r3, r0, 16
329 add r3, r0, r3
330 not r3, r3
331 rlwinm r3, r3, 16, 16, 31
332 blr
333 EXPORT_SYMBOL(csum_ipv6_magic)