]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blob - arch/powerpc/lib/copy_32.S
ASoC: wm8904: harmless underflow in wm8904_put_deemph()
[mirror_ubuntu-zesty-kernel.git] / arch / powerpc / lib / copy_32.S
1 /*
2 * Memory copy functions for 32-bit PowerPC.
3 *
4 * Copyright (C) 1996-2005 Paul Mackerras.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11 #include <asm/processor.h>
12 #include <asm/cache.h>
13 #include <asm/errno.h>
14 #include <asm/ppc_asm.h>
15
16 #define COPY_16_BYTES \
17 lwz r7,4(r4); \
18 lwz r8,8(r4); \
19 lwz r9,12(r4); \
20 lwzu r10,16(r4); \
21 stw r7,4(r6); \
22 stw r8,8(r6); \
23 stw r9,12(r6); \
24 stwu r10,16(r6)
25
26 #define COPY_16_BYTES_WITHEX(n) \
27 8 ## n ## 0: \
28 lwz r7,4(r4); \
29 8 ## n ## 1: \
30 lwz r8,8(r4); \
31 8 ## n ## 2: \
32 lwz r9,12(r4); \
33 8 ## n ## 3: \
34 lwzu r10,16(r4); \
35 8 ## n ## 4: \
36 stw r7,4(r6); \
37 8 ## n ## 5: \
38 stw r8,8(r6); \
39 8 ## n ## 6: \
40 stw r9,12(r6); \
41 8 ## n ## 7: \
42 stwu r10,16(r6)
43
44 #define COPY_16_BYTES_EXCODE(n) \
45 9 ## n ## 0: \
46 addi r5,r5,-(16 * n); \
47 b 104f; \
48 9 ## n ## 1: \
49 addi r5,r5,-(16 * n); \
50 b 105f; \
51 .section __ex_table,"a"; \
52 .align 2; \
53 .long 8 ## n ## 0b,9 ## n ## 0b; \
54 .long 8 ## n ## 1b,9 ## n ## 0b; \
55 .long 8 ## n ## 2b,9 ## n ## 0b; \
56 .long 8 ## n ## 3b,9 ## n ## 0b; \
57 .long 8 ## n ## 4b,9 ## n ## 1b; \
58 .long 8 ## n ## 5b,9 ## n ## 1b; \
59 .long 8 ## n ## 6b,9 ## n ## 1b; \
60 .long 8 ## n ## 7b,9 ## n ## 1b; \
61 .text
62
63 .text
64 .stabs "arch/powerpc/lib/",N_SO,0,0,0f
65 .stabs "copy_32.S",N_SO,0,0,0f
66 0:
67
68 CACHELINE_BYTES = L1_CACHE_BYTES
69 LG_CACHELINE_BYTES = L1_CACHE_SHIFT
70 CACHELINE_MASK = (L1_CACHE_BYTES-1)
71
72 /*
73 * Use dcbz on the complete cache lines in the destination
74 * to set them to zero. This requires that the destination
75 * area is cacheable. -- paulus
76 */
77 _GLOBAL(memset)
78 rlwimi r4,r4,8,16,23
79 rlwimi r4,r4,16,0,15
80
81 addi r6,r3,-4
82 cmplwi 0,r5,4
83 blt 7f
84 stwu r4,4(r6)
85 beqlr
86 andi. r0,r6,3
87 add r5,r0,r5
88 subf r6,r0,r6
89 cmplwi 0,r4,0
90 bne 2f /* Use normal procedure if r4 is not zero */
91
92 clrlwi r7,r6,32-LG_CACHELINE_BYTES
93 add r8,r7,r5
94 srwi r9,r8,LG_CACHELINE_BYTES
95 addic. r9,r9,-1 /* total number of complete cachelines */
96 ble 2f
97 xori r0,r7,CACHELINE_MASK & ~3
98 srwi. r0,r0,2
99 beq 3f
100 mtctr r0
101 4: stwu r4,4(r6)
102 bdnz 4b
103 3: mtctr r9
104 li r7,4
105 10: dcbz r7,r6
106 addi r6,r6,CACHELINE_BYTES
107 bdnz 10b
108 clrlwi r5,r8,32-LG_CACHELINE_BYTES
109 addi r5,r5,4
110
111 2: srwi r0,r5,2
112 mtctr r0
113 bdz 6f
114 1: stwu r4,4(r6)
115 bdnz 1b
116 6: andi. r5,r5,3
117 7: cmpwi 0,r5,0
118 beqlr
119 mtctr r5
120 addi r6,r6,3
121 8: stbu r4,1(r6)
122 bdnz 8b
123 blr
124
125 /*
126 * This version uses dcbz on the complete cache lines in the
127 * destination area to reduce memory traffic. This requires that
128 * the destination area is cacheable.
129 * We only use this version if the source and dest don't overlap.
130 * -- paulus.
131 */
132 _GLOBAL(memmove)
133 cmplw 0,r3,r4
134 bgt backwards_memcpy
135 /* fall through */
136
137 _GLOBAL(memcpy)
138 add r7,r3,r5 /* test if the src & dst overlap */
139 add r8,r4,r5
140 cmplw 0,r4,r7
141 cmplw 1,r3,r8
142 crand 0,0,4 /* cr0.lt &= cr1.lt */
143 blt generic_memcpy /* if regions overlap */
144
145 addi r4,r4,-4
146 addi r6,r3,-4
147 neg r0,r3
148 andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */
149 beq 58f
150
151 cmplw 0,r5,r0 /* is this more than total to do? */
152 blt 63f /* if not much to do */
153 andi. r8,r0,3 /* get it word-aligned first */
154 subf r5,r0,r5
155 mtctr r8
156 beq+ 61f
157 70: lbz r9,4(r4) /* do some bytes */
158 addi r4,r4,1
159 addi r6,r6,1
160 stb r9,3(r6)
161 bdnz 70b
162 61: srwi. r0,r0,2
163 mtctr r0
164 beq 58f
165 72: lwzu r9,4(r4) /* do some words */
166 stwu r9,4(r6)
167 bdnz 72b
168
169 58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
170 clrlwi r5,r5,32-LG_CACHELINE_BYTES
171 li r11,4
172 mtctr r0
173 beq 63f
174 53:
175 dcbz r11,r6
176 COPY_16_BYTES
177 #if L1_CACHE_BYTES >= 32
178 COPY_16_BYTES
179 #if L1_CACHE_BYTES >= 64
180 COPY_16_BYTES
181 COPY_16_BYTES
182 #if L1_CACHE_BYTES >= 128
183 COPY_16_BYTES
184 COPY_16_BYTES
185 COPY_16_BYTES
186 COPY_16_BYTES
187 #endif
188 #endif
189 #endif
190 bdnz 53b
191
192 63: srwi. r0,r5,2
193 mtctr r0
194 beq 64f
195 30: lwzu r0,4(r4)
196 stwu r0,4(r6)
197 bdnz 30b
198
199 64: andi. r0,r5,3
200 mtctr r0
201 beq+ 65f
202 addi r4,r4,3
203 addi r6,r6,3
204 40: lbzu r0,1(r4)
205 stbu r0,1(r6)
206 bdnz 40b
207 65: blr
208
209 _GLOBAL(generic_memcpy)
210 srwi. r7,r5,3
211 addi r6,r3,-4
212 addi r4,r4,-4
213 beq 2f /* if less than 8 bytes to do */
214 andi. r0,r6,3 /* get dest word aligned */
215 mtctr r7
216 bne 5f
217 1: lwz r7,4(r4)
218 lwzu r8,8(r4)
219 stw r7,4(r6)
220 stwu r8,8(r6)
221 bdnz 1b
222 andi. r5,r5,7
223 2: cmplwi 0,r5,4
224 blt 3f
225 lwzu r0,4(r4)
226 addi r5,r5,-4
227 stwu r0,4(r6)
228 3: cmpwi 0,r5,0
229 beqlr
230 mtctr r5
231 addi r4,r4,3
232 addi r6,r6,3
233 4: lbzu r0,1(r4)
234 stbu r0,1(r6)
235 bdnz 4b
236 blr
237 5: subfic r0,r0,4
238 mtctr r0
239 6: lbz r7,4(r4)
240 addi r4,r4,1
241 stb r7,4(r6)
242 addi r6,r6,1
243 bdnz 6b
244 subf r5,r0,r5
245 rlwinm. r7,r5,32-3,3,31
246 beq 2b
247 mtctr r7
248 b 1b
249
250 _GLOBAL(backwards_memcpy)
251 rlwinm. r7,r5,32-3,3,31 /* r0 = r5 >> 3 */
252 add r6,r3,r5
253 add r4,r4,r5
254 beq 2f
255 andi. r0,r6,3
256 mtctr r7
257 bne 5f
258 1: lwz r7,-4(r4)
259 lwzu r8,-8(r4)
260 stw r7,-4(r6)
261 stwu r8,-8(r6)
262 bdnz 1b
263 andi. r5,r5,7
264 2: cmplwi 0,r5,4
265 blt 3f
266 lwzu r0,-4(r4)
267 subi r5,r5,4
268 stwu r0,-4(r6)
269 3: cmpwi 0,r5,0
270 beqlr
271 mtctr r5
272 4: lbzu r0,-1(r4)
273 stbu r0,-1(r6)
274 bdnz 4b
275 blr
276 5: mtctr r0
277 6: lbzu r7,-1(r4)
278 stbu r7,-1(r6)
279 bdnz 6b
280 subf r5,r0,r5
281 rlwinm. r7,r5,32-3,3,31
282 beq 2b
283 mtctr r7
284 b 1b
285
286 _GLOBAL(__copy_tofrom_user)
287 addi r4,r4,-4
288 addi r6,r3,-4
289 neg r0,r3
290 andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */
291 beq 58f
292
293 cmplw 0,r5,r0 /* is this more than total to do? */
294 blt 63f /* if not much to do */
295 andi. r8,r0,3 /* get it word-aligned first */
296 mtctr r8
297 beq+ 61f
298 70: lbz r9,4(r4) /* do some bytes */
299 71: stb r9,4(r6)
300 addi r4,r4,1
301 addi r6,r6,1
302 bdnz 70b
303 61: subf r5,r0,r5
304 srwi. r0,r0,2
305 mtctr r0
306 beq 58f
307 72: lwzu r9,4(r4) /* do some words */
308 73: stwu r9,4(r6)
309 bdnz 72b
310
311 .section __ex_table,"a"
312 .align 2
313 .long 70b,100f
314 .long 71b,101f
315 .long 72b,102f
316 .long 73b,103f
317 .text
318
319 58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
320 clrlwi r5,r5,32-LG_CACHELINE_BYTES
321 li r11,4
322 beq 63f
323
324 /* Here we decide how far ahead to prefetch the source */
325 li r3,4
326 cmpwi r0,1
327 li r7,0
328 ble 114f
329 li r7,1
330 #if MAX_COPY_PREFETCH > 1
331 /* Heuristically, for large transfers we prefetch
332 MAX_COPY_PREFETCH cachelines ahead. For small transfers
333 we prefetch 1 cacheline ahead. */
334 cmpwi r0,MAX_COPY_PREFETCH
335 ble 112f
336 li r7,MAX_COPY_PREFETCH
337 112: mtctr r7
338 111: dcbt r3,r4
339 addi r3,r3,CACHELINE_BYTES
340 bdnz 111b
341 #else
342 dcbt r3,r4
343 addi r3,r3,CACHELINE_BYTES
344 #endif /* MAX_COPY_PREFETCH > 1 */
345
346 114: subf r8,r7,r0
347 mr r0,r7
348 mtctr r8
349
350 53: dcbt r3,r4
351 54: dcbz r11,r6
352 .section __ex_table,"a"
353 .align 2
354 .long 54b,105f
355 .text
356 /* the main body of the cacheline loop */
357 COPY_16_BYTES_WITHEX(0)
358 #if L1_CACHE_BYTES >= 32
359 COPY_16_BYTES_WITHEX(1)
360 #if L1_CACHE_BYTES >= 64
361 COPY_16_BYTES_WITHEX(2)
362 COPY_16_BYTES_WITHEX(3)
363 #if L1_CACHE_BYTES >= 128
364 COPY_16_BYTES_WITHEX(4)
365 COPY_16_BYTES_WITHEX(5)
366 COPY_16_BYTES_WITHEX(6)
367 COPY_16_BYTES_WITHEX(7)
368 #endif
369 #endif
370 #endif
371 bdnz 53b
372 cmpwi r0,0
373 li r3,4
374 li r7,0
375 bne 114b
376
377 63: srwi. r0,r5,2
378 mtctr r0
379 beq 64f
380 30: lwzu r0,4(r4)
381 31: stwu r0,4(r6)
382 bdnz 30b
383
384 64: andi. r0,r5,3
385 mtctr r0
386 beq+ 65f
387 40: lbz r0,4(r4)
388 41: stb r0,4(r6)
389 addi r4,r4,1
390 addi r6,r6,1
391 bdnz 40b
392 65: li r3,0
393 blr
394
395 /* read fault, initial single-byte copy */
396 100: li r9,0
397 b 90f
398 /* write fault, initial single-byte copy */
399 101: li r9,1
400 90: subf r5,r8,r5
401 li r3,0
402 b 99f
403 /* read fault, initial word copy */
404 102: li r9,0
405 b 91f
406 /* write fault, initial word copy */
407 103: li r9,1
408 91: li r3,2
409 b 99f
410
411 /*
412 * this stuff handles faults in the cacheline loop and branches to either
413 * 104f (if in read part) or 105f (if in write part), after updating r5
414 */
415 COPY_16_BYTES_EXCODE(0)
416 #if L1_CACHE_BYTES >= 32
417 COPY_16_BYTES_EXCODE(1)
418 #if L1_CACHE_BYTES >= 64
419 COPY_16_BYTES_EXCODE(2)
420 COPY_16_BYTES_EXCODE(3)
421 #if L1_CACHE_BYTES >= 128
422 COPY_16_BYTES_EXCODE(4)
423 COPY_16_BYTES_EXCODE(5)
424 COPY_16_BYTES_EXCODE(6)
425 COPY_16_BYTES_EXCODE(7)
426 #endif
427 #endif
428 #endif
429
430 /* read fault in cacheline loop */
431 104: li r9,0
432 b 92f
433 /* fault on dcbz (effectively a write fault) */
434 /* or write fault in cacheline loop */
435 105: li r9,1
436 92: li r3,LG_CACHELINE_BYTES
437 mfctr r8
438 add r0,r0,r8
439 b 106f
440 /* read fault in final word loop */
441 108: li r9,0
442 b 93f
443 /* write fault in final word loop */
444 109: li r9,1
445 93: andi. r5,r5,3
446 li r3,2
447 b 99f
448 /* read fault in final byte loop */
449 110: li r9,0
450 b 94f
451 /* write fault in final byte loop */
452 111: li r9,1
453 94: li r5,0
454 li r3,0
455 /*
456 * At this stage the number of bytes not copied is
457 * r5 + (ctr << r3), and r9 is 0 for read or 1 for write.
458 */
459 99: mfctr r0
460 106: slw r3,r0,r3
461 add. r3,r3,r5
462 beq 120f /* shouldn't happen */
463 cmpwi 0,r9,0
464 bne 120f
465 /* for a read fault, first try to continue the copy one byte at a time */
466 mtctr r3
467 130: lbz r0,4(r4)
468 131: stb r0,4(r6)
469 addi r4,r4,1
470 addi r6,r6,1
471 bdnz 130b
472 /* then clear out the destination: r3 bytes starting at 4(r6) */
473 132: mfctr r3
474 srwi. r0,r3,2
475 li r9,0
476 mtctr r0
477 beq 113f
478 112: stwu r9,4(r6)
479 bdnz 112b
480 113: andi. r0,r3,3
481 mtctr r0
482 beq 120f
483 114: stb r9,4(r6)
484 addi r6,r6,1
485 bdnz 114b
486 120: blr
487
488 .section __ex_table,"a"
489 .align 2
490 .long 30b,108b
491 .long 31b,109b
492 .long 40b,110b
493 .long 41b,111b
494 .long 130b,132b
495 .long 131b,120b
496 .long 112b,120b
497 .long 114b,120b
498 .text