]>
Commit | Line | Data |
---|---|---|
51533b61 MS |
1 | /* |
2 | * User address space access functions. | |
3 | * The non-inlined parts of asm-cris/uaccess.h are here. | |
4 | * | |
5 | * Copyright (C) 2000, 2003 Axis Communications AB. | |
6 | * | |
7 | * Written by Hans-Peter Nilsson. | |
8 | * Pieces used from memcpy, originally by Kenny Ranerup long time ago. | |
9 | */ | |
10 | ||
7c0f6ba6 | 11 | #include <linux/uaccess.h> |
51533b61 MS |
12 | |
13 | /* Asm:s have been tweaked (within the domain of correctness) to give | |
14 | satisfactory results for "gcc version 3.2.1 Axis release R53/1.53-v32". | |
15 | ||
16 | Check regularly... | |
17 | ||
18 | Note that for CRISv32, the PC saved at a bus-fault is the address | |
19 | *at* the faulting instruction, with a special case for instructions | |
20 | in delay slots: then it's the address of the branch. Note also that | |
21 | in contrast to v10, a postincrement in the instruction is *not* | |
22 | performed at a bus-fault; the register is seen having the original | |
23 | value in fault handlers. */ | |
24 | ||
25 | ||
26 | /* Copy to userspace. This is based on the memcpy used for | |
27 | kernel-to-kernel copying; see "string.c". */ | |
28 | ||
dbd3c7e1 | 29 | unsigned long __copy_user(void __user *pdst, const void *psrc, unsigned long pn) |
51533b61 MS |
30 | { |
31 | /* We want the parameters put in special registers. | |
32 | Make sure the compiler is able to make something useful of this. | |
33 | As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop). | |
34 | ||
35 | FIXME: Comment for old gcc version. Check. | |
49b4ff33 | 36 | If gcc was alright, it really would need no temporaries, and no |
51533b61 MS |
37 | stack space to save stuff on. */ |
38 | ||
39 | register char *dst __asm__ ("r13") = pdst; | |
40 | register const char *src __asm__ ("r11") = psrc; | |
41 | register int n __asm__ ("r12") = pn; | |
42 | register int retn __asm__ ("r10") = 0; | |
43 | ||
44 | ||
45 | /* When src is aligned but not dst, this makes a few extra needless | |
46 | cycles. I believe it would take as many to check that the | |
47 | re-alignment was unnecessary. */ | |
48 | if (((unsigned long) dst & 3) != 0 | |
49 | /* Don't align if we wouldn't copy more than a few bytes; so we | |
50 | don't have to check further for overflows. */ | |
51 | && n >= 3) | |
52 | { | |
53 | if ((unsigned long) dst & 1) | |
54 | { | |
55 | __asm_copy_to_user_1 (dst, src, retn); | |
56 | n--; | |
57 | } | |
58 | ||
59 | if ((unsigned long) dst & 2) | |
60 | { | |
61 | __asm_copy_to_user_2 (dst, src, retn); | |
62 | n -= 2; | |
63 | } | |
64 | } | |
65 | ||
66 | /* Movem is dirt cheap. The overheap is low enough to always use the | |
67 | minimum possible block size as the threshold. */ | |
68 | if (n >= 44) | |
69 | { | |
70 | /* For large copies we use 'movem'. */ | |
71 | ||
72 | /* It is not optimal to tell the compiler about clobbering any | |
73 | registers; that will move the saving/restoring of those registers | |
74 | to the function prologue/epilogue, and make non-movem sizes | |
75 | suboptimal. */ | |
76 | __asm__ volatile ("\ | |
77 | ;; Check that the register asm declaration got right. \n\ | |
78 | ;; The GCC manual explicitly says TRT will happen. \n\ | |
79 | .ifnc %0%1%2%3,$r13$r11$r12$r10 \n\ | |
80 | .err \n\ | |
81 | .endif \n\ | |
82 | \n\ | |
83 | ;; Save the registers we'll use in the movem process \n\ | |
84 | ;; on the stack. \n\ | |
85 | subq 11*4,$sp \n\ | |
86 | movem $r10,[$sp] \n\ | |
87 | \n\ | |
88 | ;; Now we've got this: \n\ | |
89 | ;; r11 - src \n\ | |
90 | ;; r13 - dst \n\ | |
91 | ;; r12 - n \n\ | |
92 | \n\ | |
93 | ;; Update n for the first loop \n\ | |
94 | subq 44,$r12 \n\ | |
95 | 0: \n\ | |
96 | movem [$r11+],$r10 \n\ | |
97 | subq 44,$r12 \n\ | |
98 | 1: bge 0b \n\ | |
99 | movem $r10,[$r13+] \n\ | |
100 | 3: \n\ | |
101 | addq 44,$r12 ;; compensate for last loop underflowing n \n\ | |
102 | \n\ | |
103 | ;; Restore registers from stack \n\ | |
104 | movem [$sp+],$r10 \n\ | |
105 | 2: \n\ | |
106 | .section .fixup,\"ax\" \n\ | |
107 | 4: \n\ | |
108 | ; When failing on any of the 1..44 bytes in a chunk, we adjust back the \n\ | |
109 | ; source pointer and just drop through to the by-16 and by-4 loops to \n\ | |
110 | ; get the correct number of failing bytes. This necessarily means a \n\ | |
111 | ; few extra exceptions, but invalid user pointers shouldn't happen in \n\ | |
112 | ; time-critical code anyway. \n\ | |
113 | jump 3b \n\ | |
114 | subq 44,$r11 \n\ | |
115 | \n\ | |
116 | .previous \n\ | |
117 | .section __ex_table,\"a\" \n\ | |
118 | .dword 1b,4b \n\ | |
119 | .previous" | |
120 | ||
121 | /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn) | |
122 | /* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn)); | |
123 | ||
124 | } | |
125 | ||
126 | while (n >= 16) | |
127 | { | |
128 | __asm_copy_to_user_16 (dst, src, retn); | |
129 | n -= 16; | |
130 | } | |
131 | ||
132 | /* Having a separate by-four loops cuts down on cache footprint. | |
133 | FIXME: Test with and without; increasing switch to be 0..15. */ | |
134 | while (n >= 4) | |
135 | { | |
136 | __asm_copy_to_user_4 (dst, src, retn); | |
137 | n -= 4; | |
138 | } | |
139 | ||
140 | switch (n) | |
141 | { | |
142 | case 0: | |
143 | break; | |
144 | case 1: | |
145 | __asm_copy_to_user_1 (dst, src, retn); | |
146 | break; | |
147 | case 2: | |
148 | __asm_copy_to_user_2 (dst, src, retn); | |
149 | break; | |
150 | case 3: | |
151 | __asm_copy_to_user_3 (dst, src, retn); | |
152 | break; | |
153 | } | |
154 | ||
155 | return retn; | |
156 | } | |
dbd3c7e1 | 157 | EXPORT_SYMBOL(__copy_user); |
51533b61 MS |
158 | |
159 | /* Copy from user to kernel, zeroing the bytes that were inaccessible in | |
160 | userland. The return-value is the number of bytes that were | |
161 | inaccessible. */ | |
dbd3c7e1 JN |
162 | unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc, |
163 | unsigned long pn) | |
51533b61 MS |
164 | { |
165 | /* We want the parameters put in special registers. | |
166 | Make sure the compiler is able to make something useful of this. | |
167 | As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop). | |
168 | ||
169 | FIXME: Comment for old gcc version. Check. | |
49b4ff33 | 170 | If gcc was alright, it really would need no temporaries, and no |
51533b61 MS |
171 | stack space to save stuff on. */ |
172 | ||
173 | register char *dst __asm__ ("r13") = pdst; | |
174 | register const char *src __asm__ ("r11") = psrc; | |
175 | register int n __asm__ ("r12") = pn; | |
176 | register int retn __asm__ ("r10") = 0; | |
177 | ||
178 | /* The best reason to align src is that we then know that a read-fault | |
179 | was for aligned bytes; there's no 1..3 remaining good bytes to | |
180 | pickle. */ | |
181 | if (((unsigned long) src & 3) != 0) | |
182 | { | |
183 | if (((unsigned long) src & 1) && n != 0) | |
184 | { | |
185 | __asm_copy_from_user_1 (dst, src, retn); | |
186 | n--; | |
187 | } | |
188 | ||
189 | if (((unsigned long) src & 2) && n >= 2) | |
190 | { | |
191 | __asm_copy_from_user_2 (dst, src, retn); | |
192 | n -= 2; | |
193 | } | |
194 | ||
195 | /* We only need one check after the unalignment-adjustments, because | |
196 | if both adjustments were done, either both or neither reference | |
197 | had an exception. */ | |
198 | if (retn != 0) | |
199 | goto copy_exception_bytes; | |
200 | } | |
201 | ||
202 | /* Movem is dirt cheap. The overheap is low enough to always use the | |
203 | minimum possible block size as the threshold. */ | |
204 | if (n >= 44) | |
205 | { | |
206 | /* It is not optimal to tell the compiler about clobbering any | |
207 | registers; that will move the saving/restoring of those registers | |
208 | to the function prologue/epilogue, and make non-movem sizes | |
209 | suboptimal. */ | |
210 | __asm__ volatile ("\ | |
211 | .ifnc %0%1%2%3,$r13$r11$r12$r10 \n\ | |
212 | .err \n\ | |
213 | .endif \n\ | |
214 | \n\ | |
215 | ;; Save the registers we'll use in the movem process \n\ | |
216 | ;; on the stack. \n\ | |
217 | subq 11*4,$sp \n\ | |
218 | movem $r10,[$sp] \n\ | |
219 | \n\ | |
220 | ;; Now we've got this: \n\ | |
221 | ;; r11 - src \n\ | |
222 | ;; r13 - dst \n\ | |
223 | ;; r12 - n \n\ | |
224 | \n\ | |
225 | ;; Update n for the first loop \n\ | |
226 | subq 44,$r12 \n\ | |
227 | 0: \n\ | |
228 | movem [$r11+],$r10 \n\ | |
229 | \n\ | |
230 | subq 44,$r12 \n\ | |
231 | bge 0b \n\ | |
232 | movem $r10,[$r13+] \n\ | |
233 | \n\ | |
234 | 4: \n\ | |
235 | addq 44,$r12 ;; compensate for last loop underflowing n \n\ | |
236 | \n\ | |
237 | ;; Restore registers from stack \n\ | |
238 | movem [$sp+],$r10 \n\ | |
239 | .section .fixup,\"ax\" \n\ | |
240 | \n\ | |
241 | ;; Do not jump back into the loop if we fail. For some uses, we get a \n\ | |
242 | ;; page fault somewhere on the line. Without checking for page limits, \n\ | |
243 | ;; we don't know where, but we need to copy accurately and keep an \n\ | |
244 | ;; accurate count; not just clear the whole line. To do that, we fall \n\ | |
245 | ;; down in the code below, proceeding with smaller amounts. It should \n\ | |
246 | ;; be kept in mind that we have to cater to code like what at one time \n\ | |
247 | ;; was in fs/super.c: \n\ | |
248 | ;; i = size - copy_from_user((void *)page, data, size); \n\ | |
249 | ;; which would cause repeated faults while clearing the remainder of \n\ | |
250 | ;; the SIZE bytes at PAGE after the first fault. \n\ | |
251 | ;; A caveat here is that we must not fall through from a failing page \n\ | |
252 | ;; to a valid page. \n\ | |
253 | \n\ | |
254 | 3: \n\ | |
255 | jump 4b ;; Fall through, pretending the fault didn't happen. \n\ | |
256 | nop \n\ | |
257 | \n\ | |
258 | .previous \n\ | |
259 | .section __ex_table,\"a\" \n\ | |
260 | .dword 0b,3b \n\ | |
261 | .previous" | |
262 | ||
263 | /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn) | |
264 | /* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn)); | |
265 | } | |
266 | ||
267 | /* Either we directly start copying here, using dword copying in a loop, | |
268 | or we copy as much as possible with 'movem' and then the last block | |
269 | (<44 bytes) is copied here. This will work since 'movem' will have | |
270 | updated src, dst and n. (Except with failing src.) | |
271 | ||
272 | Since we want to keep src accurate, we can't use | |
273 | __asm_copy_from_user_N with N != (1, 2, 4); it updates dst and | |
274 | retn, but not src (by design; it's value is ignored elsewhere). */ | |
275 | ||
276 | while (n >= 4) | |
277 | { | |
278 | __asm_copy_from_user_4 (dst, src, retn); | |
279 | n -= 4; | |
280 | ||
281 | if (retn) | |
282 | goto copy_exception_bytes; | |
283 | } | |
284 | ||
285 | /* If we get here, there were no memory read faults. */ | |
286 | switch (n) | |
287 | { | |
288 | /* These copies are at least "naturally aligned" (so we don't have | |
289 | to check each byte), due to the src alignment code before the | |
290 | movem loop. The *_3 case *will* get the correct count for retn. */ | |
291 | case 0: | |
292 | /* This case deliberately left in (if you have doubts check the | |
293 | generated assembly code). */ | |
294 | break; | |
295 | case 1: | |
296 | __asm_copy_from_user_1 (dst, src, retn); | |
297 | break; | |
298 | case 2: | |
299 | __asm_copy_from_user_2 (dst, src, retn); | |
300 | break; | |
301 | case 3: | |
302 | __asm_copy_from_user_3 (dst, src, retn); | |
303 | break; | |
304 | } | |
305 | ||
306 | /* If we get here, retn correctly reflects the number of failing | |
307 | bytes. */ | |
308 | return retn; | |
309 | ||
310 | copy_exception_bytes: | |
311 | /* We already have "retn" bytes cleared, and need to clear the | |
312 | remaining "n" bytes. A non-optimized simple byte-for-byte in-line | |
313 | memset is preferred here, since this isn't speed-critical code and | |
314 | we'd rather have this a leaf-function than calling memset. */ | |
315 | { | |
316 | char *endp; | |
317 | for (endp = dst + n; dst < endp; dst++) | |
318 | *dst = 0; | |
319 | } | |
320 | ||
321 | return retn + n; | |
322 | } | |
dbd3c7e1 | 323 | EXPORT_SYMBOL(__copy_user_zeroing); |
51533b61 MS |
324 | |
325 | /* Zero userspace. */ | |
dbd3c7e1 | 326 | unsigned long __do_clear_user(void __user *pto, unsigned long pn) |
51533b61 MS |
327 | { |
328 | /* We want the parameters put in special registers. | |
329 | Make sure the compiler is able to make something useful of this. | |
330 | As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop). | |
331 | ||
332 | FIXME: Comment for old gcc version. Check. | |
49b4ff33 | 333 | If gcc was alright, it really would need no temporaries, and no |
51533b61 MS |
334 | stack space to save stuff on. */ |
335 | ||
336 | register char *dst __asm__ ("r13") = pto; | |
337 | register int n __asm__ ("r12") = pn; | |
338 | register int retn __asm__ ("r10") = 0; | |
339 | ||
340 | ||
341 | if (((unsigned long) dst & 3) != 0 | |
342 | /* Don't align if we wouldn't copy more than a few bytes. */ | |
343 | && n >= 3) | |
344 | { | |
345 | if ((unsigned long) dst & 1) | |
346 | { | |
347 | __asm_clear_1 (dst, retn); | |
348 | n--; | |
349 | } | |
350 | ||
351 | if ((unsigned long) dst & 2) | |
352 | { | |
353 | __asm_clear_2 (dst, retn); | |
354 | n -= 2; | |
355 | } | |
356 | } | |
357 | ||
358 | /* Decide which copying method to use. | |
359 | FIXME: This number is from the "ordinary" kernel memset. */ | |
360 | if (n >= 48) | |
361 | { | |
362 | /* For large clears we use 'movem' */ | |
363 | ||
364 | /* It is not optimal to tell the compiler about clobbering any | |
365 | call-saved registers; that will move the saving/restoring of | |
366 | those registers to the function prologue/epilogue, and make | |
367 | non-movem sizes suboptimal. | |
368 | ||
369 | This method is not foolproof; it assumes that the "asm reg" | |
370 | declarations at the beginning of the function really are used | |
371 | here (beware: they may be moved to temporary registers). | |
372 | This way, we do not have to save/move the registers around into | |
373 | temporaries; we can safely use them straight away. | |
374 | ||
375 | If you want to check that the allocation was right; then | |
376 | check the equalities in the first comment. It should say | |
377 | something like "r13=r13, r11=r11, r12=r12". */ | |
378 | __asm__ volatile ("\ | |
379 | .ifnc %0%1%2,$r13$r12$r10 \n\ | |
380 | .err \n\ | |
381 | .endif \n\ | |
382 | \n\ | |
383 | ;; Save the registers we'll clobber in the movem process \n\ | |
384 | ;; on the stack. Don't mention them to gcc, it will only be \n\ | |
385 | ;; upset. \n\ | |
386 | subq 11*4,$sp \n\ | |
387 | movem $r10,[$sp] \n\ | |
388 | \n\ | |
389 | clear.d $r0 \n\ | |
390 | clear.d $r1 \n\ | |
391 | clear.d $r2 \n\ | |
392 | clear.d $r3 \n\ | |
393 | clear.d $r4 \n\ | |
394 | clear.d $r5 \n\ | |
395 | clear.d $r6 \n\ | |
396 | clear.d $r7 \n\ | |
397 | clear.d $r8 \n\ | |
398 | clear.d $r9 \n\ | |
399 | clear.d $r10 \n\ | |
400 | clear.d $r11 \n\ | |
401 | \n\ | |
402 | ;; Now we've got this: \n\ | |
403 | ;; r13 - dst \n\ | |
404 | ;; r12 - n \n\ | |
405 | \n\ | |
406 | ;; Update n for the first loop \n\ | |
407 | subq 12*4,$r12 \n\ | |
408 | 0: \n\ | |
409 | subq 12*4,$r12 \n\ | |
410 | 1: \n\ | |
411 | bge 0b \n\ | |
412 | movem $r11,[$r13+] \n\ | |
413 | \n\ | |
414 | addq 12*4,$r12 ;; compensate for last loop underflowing n \n\ | |
415 | \n\ | |
416 | ;; Restore registers from stack \n\ | |
417 | movem [$sp+],$r10 \n\ | |
418 | 2: \n\ | |
419 | .section .fixup,\"ax\" \n\ | |
420 | 3: \n\ | |
421 | movem [$sp],$r10 \n\ | |
422 | addq 12*4,$r10 \n\ | |
423 | addq 12*4,$r13 \n\ | |
424 | movem $r10,[$sp] \n\ | |
425 | jump 0b \n\ | |
426 | clear.d $r10 \n\ | |
427 | \n\ | |
428 | .previous \n\ | |
429 | .section __ex_table,\"a\" \n\ | |
430 | .dword 1b,3b \n\ | |
431 | .previous" | |
432 | ||
433 | /* Outputs */ : "=r" (dst), "=r" (n), "=r" (retn) | |
434 | /* Inputs */ : "0" (dst), "1" (n), "2" (retn) | |
435 | /* Clobber */ : "r11"); | |
436 | } | |
437 | ||
438 | while (n >= 16) | |
439 | { | |
440 | __asm_clear_16 (dst, retn); | |
441 | n -= 16; | |
442 | } | |
443 | ||
444 | /* Having a separate by-four loops cuts down on cache footprint. | |
445 | FIXME: Test with and without; increasing switch to be 0..15. */ | |
446 | while (n >= 4) | |
447 | { | |
448 | __asm_clear_4 (dst, retn); | |
449 | n -= 4; | |
450 | } | |
451 | ||
452 | switch (n) | |
453 | { | |
454 | case 0: | |
455 | break; | |
456 | case 1: | |
457 | __asm_clear_1 (dst, retn); | |
458 | break; | |
459 | case 2: | |
460 | __asm_clear_2 (dst, retn); | |
461 | break; | |
462 | case 3: | |
463 | __asm_clear_3 (dst, retn); | |
464 | break; | |
465 | } | |
466 | ||
467 | return retn; | |
468 | } | |
dbd3c7e1 | 469 | EXPORT_SYMBOL(__do_clear_user); |