]>
Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
51533b61 MS |
2 | /* |
3 | * User address space access functions. | |
4 | * The non-inlined parts of asm-cris/uaccess.h are here. | |
5 | * | |
6 | * Copyright (C) 2000, 2003 Axis Communications AB. | |
7 | * | |
8 | * Written by Hans-Peter Nilsson. | |
9 | * Pieces used from memcpy, originally by Kenny Ranerup long time ago. | |
10 | */ | |
11 | ||
7c0f6ba6 | 12 | #include <linux/uaccess.h> |
51533b61 MS |
13 | |
14 | /* Asm:s have been tweaked (within the domain of correctness) to give | |
15 | satisfactory results for "gcc version 3.2.1 Axis release R53/1.53-v32". | |
16 | ||
17 | Check regularly... | |
18 | ||
19 | Note that for CRISv32, the PC saved at a bus-fault is the address | |
20 | *at* the faulting instruction, with a special case for instructions | |
21 | in delay slots: then it's the address of the branch. Note also that | |
22 | in contrast to v10, a postincrement in the instruction is *not* | |
23 | performed at a bus-fault; the register is seen having the original | |
24 | value in fault handlers. */ | |
25 | ||
26 | ||
27 | /* Copy to userspace. This is based on the memcpy used for | |
28 | kernel-to-kernel copying; see "string.c". */ | |
29 | ||
dbd3c7e1 | 30 | unsigned long __copy_user(void __user *pdst, const void *psrc, unsigned long pn) |
51533b61 MS |
31 | { |
32 | /* We want the parameters put in special registers. | |
33 | Make sure the compiler is able to make something useful of this. | |
34 | As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop). | |
35 | ||
36 | FIXME: Comment for old gcc version. Check. | |
49b4ff33 | 37 | If gcc was alright, it really would need no temporaries, and no |
51533b61 MS |
38 | stack space to save stuff on. */ |
39 | ||
40 | register char *dst __asm__ ("r13") = pdst; | |
41 | register const char *src __asm__ ("r11") = psrc; | |
42 | register int n __asm__ ("r12") = pn; | |
43 | register int retn __asm__ ("r10") = 0; | |
44 | ||
45 | ||
46 | /* When src is aligned but not dst, this makes a few extra needless | |
47 | cycles. I believe it would take as many to check that the | |
48 | re-alignment was unnecessary. */ | |
49 | if (((unsigned long) dst & 3) != 0 | |
50 | /* Don't align if we wouldn't copy more than a few bytes; so we | |
51 | don't have to check further for overflows. */ | |
52 | && n >= 3) | |
53 | { | |
54 | if ((unsigned long) dst & 1) | |
55 | { | |
56 | __asm_copy_to_user_1 (dst, src, retn); | |
57 | n--; | |
58 | } | |
59 | ||
60 | if ((unsigned long) dst & 2) | |
61 | { | |
62 | __asm_copy_to_user_2 (dst, src, retn); | |
63 | n -= 2; | |
64 | } | |
65 | } | |
66 | ||
67 | /* Movem is dirt cheap. The overheap is low enough to always use the | |
68 | minimum possible block size as the threshold. */ | |
69 | if (n >= 44) | |
70 | { | |
71 | /* For large copies we use 'movem'. */ | |
72 | ||
73 | /* It is not optimal to tell the compiler about clobbering any | |
74 | registers; that will move the saving/restoring of those registers | |
75 | to the function prologue/epilogue, and make non-movem sizes | |
76 | suboptimal. */ | |
77 | __asm__ volatile ("\ | |
78 | ;; Check that the register asm declaration got right. \n\ | |
79 | ;; The GCC manual explicitly says TRT will happen. \n\ | |
80 | .ifnc %0%1%2%3,$r13$r11$r12$r10 \n\ | |
81 | .err \n\ | |
82 | .endif \n\ | |
83 | \n\ | |
84 | ;; Save the registers we'll use in the movem process \n\ | |
85 | ;; on the stack. \n\ | |
86 | subq 11*4,$sp \n\ | |
87 | movem $r10,[$sp] \n\ | |
88 | \n\ | |
89 | ;; Now we've got this: \n\ | |
90 | ;; r11 - src \n\ | |
91 | ;; r13 - dst \n\ | |
92 | ;; r12 - n \n\ | |
93 | \n\ | |
94 | ;; Update n for the first loop \n\ | |
95 | subq 44,$r12 \n\ | |
96 | 0: \n\ | |
97 | movem [$r11+],$r10 \n\ | |
98 | subq 44,$r12 \n\ | |
99 | 1: bge 0b \n\ | |
100 | movem $r10,[$r13+] \n\ | |
101 | 3: \n\ | |
102 | addq 44,$r12 ;; compensate for last loop underflowing n \n\ | |
103 | \n\ | |
104 | ;; Restore registers from stack \n\ | |
105 | movem [$sp+],$r10 \n\ | |
106 | 2: \n\ | |
107 | .section .fixup,\"ax\" \n\ | |
108 | 4: \n\ | |
109 | ; When failing on any of the 1..44 bytes in a chunk, we adjust back the \n\ | |
110 | ; source pointer and just drop through to the by-16 and by-4 loops to \n\ | |
111 | ; get the correct number of failing bytes. This necessarily means a \n\ | |
112 | ; few extra exceptions, but invalid user pointers shouldn't happen in \n\ | |
113 | ; time-critical code anyway. \n\ | |
114 | jump 3b \n\ | |
115 | subq 44,$r11 \n\ | |
116 | \n\ | |
117 | .previous \n\ | |
118 | .section __ex_table,\"a\" \n\ | |
119 | .dword 1b,4b \n\ | |
120 | .previous" | |
121 | ||
122 | /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn) | |
123 | /* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn)); | |
124 | ||
125 | } | |
126 | ||
127 | while (n >= 16) | |
128 | { | |
129 | __asm_copy_to_user_16 (dst, src, retn); | |
130 | n -= 16; | |
131 | } | |
132 | ||
133 | /* Having a separate by-four loops cuts down on cache footprint. | |
134 | FIXME: Test with and without; increasing switch to be 0..15. */ | |
135 | while (n >= 4) | |
136 | { | |
137 | __asm_copy_to_user_4 (dst, src, retn); | |
138 | n -= 4; | |
139 | } | |
140 | ||
141 | switch (n) | |
142 | { | |
143 | case 0: | |
144 | break; | |
145 | case 1: | |
146 | __asm_copy_to_user_1 (dst, src, retn); | |
147 | break; | |
148 | case 2: | |
149 | __asm_copy_to_user_2 (dst, src, retn); | |
150 | break; | |
151 | case 3: | |
152 | __asm_copy_to_user_3 (dst, src, retn); | |
153 | break; | |
154 | } | |
155 | ||
156 | return retn; | |
157 | } | |
dbd3c7e1 | 158 | EXPORT_SYMBOL(__copy_user); |
51533b61 | 159 | |
b71f1bf5 | 160 | /* Copy from user to kernel. The return-value is the number of bytes that were |
51533b61 | 161 | inaccessible. */ |
b71f1bf5 | 162 | unsigned long __copy_user_in(void *pdst, const void __user *psrc, |
dbd3c7e1 | 163 | unsigned long pn) |
51533b61 MS |
164 | { |
165 | /* We want the parameters put in special registers. | |
166 | Make sure the compiler is able to make something useful of this. | |
167 | As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop). | |
168 | ||
169 | FIXME: Comment for old gcc version. Check. | |
49b4ff33 | 170 | If gcc was alright, it really would need no temporaries, and no |
51533b61 MS |
171 | stack space to save stuff on. */ |
172 | ||
173 | register char *dst __asm__ ("r13") = pdst; | |
174 | register const char *src __asm__ ("r11") = psrc; | |
175 | register int n __asm__ ("r12") = pn; | |
176 | register int retn __asm__ ("r10") = 0; | |
177 | ||
178 | /* The best reason to align src is that we then know that a read-fault | |
179 | was for aligned bytes; there's no 1..3 remaining good bytes to | |
180 | pickle. */ | |
181 | if (((unsigned long) src & 3) != 0) | |
182 | { | |
183 | if (((unsigned long) src & 1) && n != 0) | |
184 | { | |
185 | __asm_copy_from_user_1 (dst, src, retn); | |
186 | n--; | |
de09be34 AV |
187 | if (retn != 0) |
188 | goto exception; | |
51533b61 MS |
189 | } |
190 | ||
191 | if (((unsigned long) src & 2) && n >= 2) | |
192 | { | |
193 | __asm_copy_from_user_2 (dst, src, retn); | |
194 | n -= 2; | |
de09be34 AV |
195 | if (retn != 0) |
196 | goto exception; | |
51533b61 MS |
197 | } |
198 | ||
51533b61 MS |
199 | } |
200 | ||
201 | /* Movem is dirt cheap. The overheap is low enough to always use the | |
202 | minimum possible block size as the threshold. */ | |
203 | if (n >= 44) | |
204 | { | |
205 | /* It is not optimal to tell the compiler about clobbering any | |
206 | registers; that will move the saving/restoring of those registers | |
207 | to the function prologue/epilogue, and make non-movem sizes | |
208 | suboptimal. */ | |
209 | __asm__ volatile ("\ | |
210 | .ifnc %0%1%2%3,$r13$r11$r12$r10 \n\ | |
211 | .err \n\ | |
212 | .endif \n\ | |
213 | \n\ | |
214 | ;; Save the registers we'll use in the movem process \n\ | |
215 | ;; on the stack. \n\ | |
216 | subq 11*4,$sp \n\ | |
217 | movem $r10,[$sp] \n\ | |
218 | \n\ | |
219 | ;; Now we've got this: \n\ | |
220 | ;; r11 - src \n\ | |
221 | ;; r13 - dst \n\ | |
222 | ;; r12 - n \n\ | |
223 | \n\ | |
224 | ;; Update n for the first loop \n\ | |
225 | subq 44,$r12 \n\ | |
226 | 0: \n\ | |
227 | movem [$r11+],$r10 \n\ | |
228 | \n\ | |
229 | subq 44,$r12 \n\ | |
230 | bge 0b \n\ | |
231 | movem $r10,[$r13+] \n\ | |
232 | \n\ | |
233 | 4: \n\ | |
234 | addq 44,$r12 ;; compensate for last loop underflowing n \n\ | |
235 | \n\ | |
236 | ;; Restore registers from stack \n\ | |
237 | movem [$sp+],$r10 \n\ | |
238 | .section .fixup,\"ax\" \n\ | |
239 | \n\ | |
240 | ;; Do not jump back into the loop if we fail. For some uses, we get a \n\ | |
241 | ;; page fault somewhere on the line. Without checking for page limits, \n\ | |
242 | ;; we don't know where, but we need to copy accurately and keep an \n\ | |
243 | ;; accurate count; not just clear the whole line. To do that, we fall \n\ | |
244 | ;; down in the code below, proceeding with smaller amounts. It should \n\ | |
245 | ;; be kept in mind that we have to cater to code like what at one time \n\ | |
246 | ;; was in fs/super.c: \n\ | |
247 | ;; i = size - copy_from_user((void *)page, data, size); \n\ | |
248 | ;; which would cause repeated faults while clearing the remainder of \n\ | |
249 | ;; the SIZE bytes at PAGE after the first fault. \n\ | |
250 | ;; A caveat here is that we must not fall through from a failing page \n\ | |
251 | ;; to a valid page. \n\ | |
252 | \n\ | |
253 | 3: \n\ | |
254 | jump 4b ;; Fall through, pretending the fault didn't happen. \n\ | |
255 | nop \n\ | |
256 | \n\ | |
257 | .previous \n\ | |
258 | .section __ex_table,\"a\" \n\ | |
259 | .dword 0b,3b \n\ | |
260 | .previous" | |
261 | ||
262 | /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn) | |
263 | /* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn)); | |
264 | } | |
265 | ||
266 | /* Either we directly start copying here, using dword copying in a loop, | |
267 | or we copy as much as possible with 'movem' and then the last block | |
268 | (<44 bytes) is copied here. This will work since 'movem' will have | |
269 | updated src, dst and n. (Except with failing src.) | |
270 | ||
271 | Since we want to keep src accurate, we can't use | |
272 | __asm_copy_from_user_N with N != (1, 2, 4); it updates dst and | |
273 | retn, but not src (by design; it's value is ignored elsewhere). */ | |
274 | ||
275 | while (n >= 4) | |
276 | { | |
277 | __asm_copy_from_user_4 (dst, src, retn); | |
278 | n -= 4; | |
279 | ||
280 | if (retn) | |
de09be34 | 281 | goto exception; |
51533b61 MS |
282 | } |
283 | ||
284 | /* If we get here, there were no memory read faults. */ | |
285 | switch (n) | |
286 | { | |
287 | /* These copies are at least "naturally aligned" (so we don't have | |
288 | to check each byte), due to the src alignment code before the | |
289 | movem loop. The *_3 case *will* get the correct count for retn. */ | |
290 | case 0: | |
291 | /* This case deliberately left in (if you have doubts check the | |
292 | generated assembly code). */ | |
293 | break; | |
294 | case 1: | |
295 | __asm_copy_from_user_1 (dst, src, retn); | |
296 | break; | |
297 | case 2: | |
298 | __asm_copy_from_user_2 (dst, src, retn); | |
299 | break; | |
300 | case 3: | |
301 | __asm_copy_from_user_3 (dst, src, retn); | |
302 | break; | |
303 | } | |
304 | ||
305 | /* If we get here, retn correctly reflects the number of failing | |
306 | bytes. */ | |
307 | return retn; | |
308 | ||
de09be34 | 309 | exception: |
51533b61 MS |
310 | return retn + n; |
311 | } | |
b71f1bf5 | 312 | EXPORT_SYMBOL(__copy_user_in); |
51533b61 MS |
313 | |
314 | /* Zero userspace. */ | |
dbd3c7e1 | 315 | unsigned long __do_clear_user(void __user *pto, unsigned long pn) |
51533b61 MS |
316 | { |
317 | /* We want the parameters put in special registers. | |
318 | Make sure the compiler is able to make something useful of this. | |
319 | As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop). | |
320 | ||
321 | FIXME: Comment for old gcc version. Check. | |
49b4ff33 | 322 | If gcc was alright, it really would need no temporaries, and no |
51533b61 MS |
323 | stack space to save stuff on. */ |
324 | ||
325 | register char *dst __asm__ ("r13") = pto; | |
326 | register int n __asm__ ("r12") = pn; | |
327 | register int retn __asm__ ("r10") = 0; | |
328 | ||
329 | ||
330 | if (((unsigned long) dst & 3) != 0 | |
331 | /* Don't align if we wouldn't copy more than a few bytes. */ | |
332 | && n >= 3) | |
333 | { | |
334 | if ((unsigned long) dst & 1) | |
335 | { | |
336 | __asm_clear_1 (dst, retn); | |
337 | n--; | |
338 | } | |
339 | ||
340 | if ((unsigned long) dst & 2) | |
341 | { | |
342 | __asm_clear_2 (dst, retn); | |
343 | n -= 2; | |
344 | } | |
345 | } | |
346 | ||
347 | /* Decide which copying method to use. | |
348 | FIXME: This number is from the "ordinary" kernel memset. */ | |
349 | if (n >= 48) | |
350 | { | |
351 | /* For large clears we use 'movem' */ | |
352 | ||
353 | /* It is not optimal to tell the compiler about clobbering any | |
354 | call-saved registers; that will move the saving/restoring of | |
355 | those registers to the function prologue/epilogue, and make | |
356 | non-movem sizes suboptimal. | |
357 | ||
358 | This method is not foolproof; it assumes that the "asm reg" | |
359 | declarations at the beginning of the function really are used | |
360 | here (beware: they may be moved to temporary registers). | |
361 | This way, we do not have to save/move the registers around into | |
362 | temporaries; we can safely use them straight away. | |
363 | ||
364 | If you want to check that the allocation was right; then | |
365 | check the equalities in the first comment. It should say | |
366 | something like "r13=r13, r11=r11, r12=r12". */ | |
367 | __asm__ volatile ("\ | |
368 | .ifnc %0%1%2,$r13$r12$r10 \n\ | |
369 | .err \n\ | |
370 | .endif \n\ | |
371 | \n\ | |
372 | ;; Save the registers we'll clobber in the movem process \n\ | |
373 | ;; on the stack. Don't mention them to gcc, it will only be \n\ | |
374 | ;; upset. \n\ | |
375 | subq 11*4,$sp \n\ | |
376 | movem $r10,[$sp] \n\ | |
377 | \n\ | |
378 | clear.d $r0 \n\ | |
379 | clear.d $r1 \n\ | |
380 | clear.d $r2 \n\ | |
381 | clear.d $r3 \n\ | |
382 | clear.d $r4 \n\ | |
383 | clear.d $r5 \n\ | |
384 | clear.d $r6 \n\ | |
385 | clear.d $r7 \n\ | |
386 | clear.d $r8 \n\ | |
387 | clear.d $r9 \n\ | |
388 | clear.d $r10 \n\ | |
389 | clear.d $r11 \n\ | |
390 | \n\ | |
391 | ;; Now we've got this: \n\ | |
392 | ;; r13 - dst \n\ | |
393 | ;; r12 - n \n\ | |
394 | \n\ | |
395 | ;; Update n for the first loop \n\ | |
396 | subq 12*4,$r12 \n\ | |
397 | 0: \n\ | |
398 | subq 12*4,$r12 \n\ | |
399 | 1: \n\ | |
400 | bge 0b \n\ | |
401 | movem $r11,[$r13+] \n\ | |
402 | \n\ | |
403 | addq 12*4,$r12 ;; compensate for last loop underflowing n \n\ | |
404 | \n\ | |
405 | ;; Restore registers from stack \n\ | |
406 | movem [$sp+],$r10 \n\ | |
407 | 2: \n\ | |
408 | .section .fixup,\"ax\" \n\ | |
409 | 3: \n\ | |
410 | movem [$sp],$r10 \n\ | |
411 | addq 12*4,$r10 \n\ | |
412 | addq 12*4,$r13 \n\ | |
413 | movem $r10,[$sp] \n\ | |
414 | jump 0b \n\ | |
415 | clear.d $r10 \n\ | |
416 | \n\ | |
417 | .previous \n\ | |
418 | .section __ex_table,\"a\" \n\ | |
419 | .dword 1b,3b \n\ | |
420 | .previous" | |
421 | ||
422 | /* Outputs */ : "=r" (dst), "=r" (n), "=r" (retn) | |
423 | /* Inputs */ : "0" (dst), "1" (n), "2" (retn) | |
424 | /* Clobber */ : "r11"); | |
425 | } | |
426 | ||
427 | while (n >= 16) | |
428 | { | |
429 | __asm_clear_16 (dst, retn); | |
430 | n -= 16; | |
431 | } | |
432 | ||
433 | /* Having a separate by-four loops cuts down on cache footprint. | |
434 | FIXME: Test with and without; increasing switch to be 0..15. */ | |
435 | while (n >= 4) | |
436 | { | |
437 | __asm_clear_4 (dst, retn); | |
438 | n -= 4; | |
439 | } | |
440 | ||
441 | switch (n) | |
442 | { | |
443 | case 0: | |
444 | break; | |
445 | case 1: | |
446 | __asm_clear_1 (dst, retn); | |
447 | break; | |
448 | case 2: | |
449 | __asm_clear_2 (dst, retn); | |
450 | break; | |
451 | case 3: | |
452 | __asm_clear_3 (dst, retn); | |
453 | break; | |
454 | } | |
455 | ||
456 | return retn; | |
457 | } | |
dbd3c7e1 | 458 | EXPORT_SYMBOL(__do_clear_user); |