]>
Commit | Line | Data |
---|---|---|
3fc21751 | 1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
ad2fc2cd VM |
2 | /* |
3 | * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com> | |
4 | * Copyright 2002 Andi Kleen, SuSE Labs. | |
ad2fc2cd VM |
5 | * |
6 | * Functions to copy from and to user space. | |
7 | */ | |
1da177e4 | 8 | |
8d379dad | 9 | #include <linux/linkage.h> |
3022d734 AK |
10 | #include <asm/current.h> |
11 | #include <asm/asm-offsets.h> | |
12 | #include <asm/thread_info.h> | |
cd4d09ec | 13 | #include <asm/cpufeatures.h> |
4307bec9 | 14 | #include <asm/alternative-asm.h> |
9732da8c | 15 | #include <asm/asm.h> |
63bcff2a | 16 | #include <asm/smap.h> |
784d5699 | 17 | #include <asm/export.h> |
3022d734 | 18 | |
3693ca81 PZ |
19 | .macro ALIGN_DESTINATION |
20 | /* check for bad alignment of destination */ | |
21 | movl %edi,%ecx | |
22 | andl $7,%ecx | |
23 | jz 102f /* already aligned */ | |
24 | subl $8,%ecx | |
25 | negl %ecx | |
26 | subl %ecx,%edx | |
27 | 100: movb (%rsi),%al | |
28 | 101: movb %al,(%rdi) | |
29 | incq %rsi | |
30 | incq %rdi | |
31 | decl %ecx | |
32 | jnz 100b | |
33 | 102: | |
34 | .section .fixup,"ax" | |
35 | 103: addl %ecx,%edx /* ecx is zerorest also */ | |
36 | jmp copy_user_handle_tail | |
37 | .previous | |
38 | ||
39 | _ASM_EXTABLE_UA(100b, 103b) | |
40 | _ASM_EXTABLE_UA(101b, 103b) | |
41 | .endm | |
42 | ||
1da177e4 | 43 | /* |
3022d734 | 44 | * copy_user_generic_unrolled - memory copy with exception handling. |
ad2fc2cd VM |
45 | * This version is for CPUs like P4 that don't have efficient micro |
46 | * code for rep movsq | |
47 | * | |
48 | * Input: | |
1da177e4 LT |
49 | * rdi destination |
50 | * rsi source | |
51 | * rdx count | |
52 | * | |
ad2fc2cd | 53 | * Output: |
0d2eb44f | 54 | * eax uncopied bytes or 0 if successful. |
1da177e4 | 55 | */ |
3022d734 | 56 | ENTRY(copy_user_generic_unrolled) |
63bcff2a | 57 | ASM_STAC |
ad2fc2cd VM |
58 | cmpl $8,%edx |
59 | jb 20f /* less then 8 bytes, go to byte copy loop */ | |
60 | ALIGN_DESTINATION | |
61 | movl %edx,%ecx | |
62 | andl $63,%edx | |
63 | shrl $6,%ecx | |
236222d3 | 64 | jz .L_copy_short_string |
ad2fc2cd VM |
65 | 1: movq (%rsi),%r8 |
66 | 2: movq 1*8(%rsi),%r9 | |
67 | 3: movq 2*8(%rsi),%r10 | |
68 | 4: movq 3*8(%rsi),%r11 | |
69 | 5: movq %r8,(%rdi) | |
70 | 6: movq %r9,1*8(%rdi) | |
71 | 7: movq %r10,2*8(%rdi) | |
72 | 8: movq %r11,3*8(%rdi) | |
73 | 9: movq 4*8(%rsi),%r8 | |
74 | 10: movq 5*8(%rsi),%r9 | |
75 | 11: movq 6*8(%rsi),%r10 | |
76 | 12: movq 7*8(%rsi),%r11 | |
77 | 13: movq %r8,4*8(%rdi) | |
78 | 14: movq %r9,5*8(%rdi) | |
79 | 15: movq %r10,6*8(%rdi) | |
80 | 16: movq %r11,7*8(%rdi) | |
7bcd3f34 AK |
81 | leaq 64(%rsi),%rsi |
82 | leaq 64(%rdi),%rdi | |
7bcd3f34 | 83 | decl %ecx |
ad2fc2cd | 84 | jnz 1b |
236222d3 PA |
85 | .L_copy_short_string: |
86 | movl %edx,%ecx | |
ad2fc2cd VM |
87 | andl $7,%edx |
88 | shrl $3,%ecx | |
89 | jz 20f | |
90 | 18: movq (%rsi),%r8 | |
91 | 19: movq %r8,(%rdi) | |
7bcd3f34 | 92 | leaq 8(%rsi),%rsi |
ad2fc2cd VM |
93 | leaq 8(%rdi),%rdi |
94 | decl %ecx | |
95 | jnz 18b | |
96 | 20: andl %edx,%edx | |
97 | jz 23f | |
7bcd3f34 | 98 | movl %edx,%ecx |
ad2fc2cd VM |
99 | 21: movb (%rsi),%al |
100 | 22: movb %al,(%rdi) | |
7bcd3f34 | 101 | incq %rsi |
ad2fc2cd | 102 | incq %rdi |
7bcd3f34 | 103 | decl %ecx |
ad2fc2cd VM |
104 | jnz 21b |
105 | 23: xor %eax,%eax | |
63bcff2a | 106 | ASM_CLAC |
7bcd3f34 AK |
107 | ret |
108 | ||
ad2fc2cd VM |
109 | .section .fixup,"ax" |
110 | 30: shll $6,%ecx | |
111 | addl %ecx,%edx | |
112 | jmp 60f | |
661c8019 | 113 | 40: leal (%rdx,%rcx,8),%edx |
ad2fc2cd VM |
114 | jmp 60f |
115 | 50: movl %ecx,%edx | |
116 | 60: jmp copy_user_handle_tail /* ecx is zerorest also */ | |
117 | .previous | |
7bcd3f34 | 118 | |
75045f77 JH |
119 | _ASM_EXTABLE_UA(1b, 30b) |
120 | _ASM_EXTABLE_UA(2b, 30b) | |
121 | _ASM_EXTABLE_UA(3b, 30b) | |
122 | _ASM_EXTABLE_UA(4b, 30b) | |
123 | _ASM_EXTABLE_UA(5b, 30b) | |
124 | _ASM_EXTABLE_UA(6b, 30b) | |
125 | _ASM_EXTABLE_UA(7b, 30b) | |
126 | _ASM_EXTABLE_UA(8b, 30b) | |
127 | _ASM_EXTABLE_UA(9b, 30b) | |
128 | _ASM_EXTABLE_UA(10b, 30b) | |
129 | _ASM_EXTABLE_UA(11b, 30b) | |
130 | _ASM_EXTABLE_UA(12b, 30b) | |
131 | _ASM_EXTABLE_UA(13b, 30b) | |
132 | _ASM_EXTABLE_UA(14b, 30b) | |
133 | _ASM_EXTABLE_UA(15b, 30b) | |
134 | _ASM_EXTABLE_UA(16b, 30b) | |
135 | _ASM_EXTABLE_UA(18b, 40b) | |
136 | _ASM_EXTABLE_UA(19b, 40b) | |
137 | _ASM_EXTABLE_UA(21b, 50b) | |
138 | _ASM_EXTABLE_UA(22b, 50b) | |
ad2fc2cd | 139 | ENDPROC(copy_user_generic_unrolled) |
784d5699 | 140 | EXPORT_SYMBOL(copy_user_generic_unrolled) |
8d379dad | 141 | |
ad2fc2cd VM |
142 | /* Some CPUs run faster using the string copy instructions. |
143 | * This is also a lot simpler. Use them when possible. | |
144 | * | |
145 | * Only 4GB of copy is supported. This shouldn't be a problem | |
146 | * because the kernel normally only writes from/to page sized chunks | |
147 | * even if user space passed a longer buffer. | |
148 | * And more would be dangerous because both Intel and AMD have | |
149 | * errata with rep movsq > 4GB. If someone feels the need to fix | |
150 | * this please consider this. | |
151 | * | |
152 | * Input: | |
153 | * rdi destination | |
154 | * rsi source | |
155 | * rdx count | |
156 | * | |
157 | * Output: | |
158 | * eax uncopied bytes or 0 if successful. | |
159 | */ | |
3022d734 | 160 | ENTRY(copy_user_generic_string) |
63bcff2a | 161 | ASM_STAC |
ad2fc2cd VM |
162 | cmpl $8,%edx |
163 | jb 2f /* less than 8 bytes, go to byte copy loop */ | |
164 | ALIGN_DESTINATION | |
1da177e4 LT |
165 | movl %edx,%ecx |
166 | shrl $3,%ecx | |
ad2fc2cd VM |
167 | andl $7,%edx |
168 | 1: rep | |
3022d734 | 169 | movsq |
ad2fc2cd VM |
170 | 2: movl %edx,%ecx |
171 | 3: rep | |
172 | movsb | |
f4cb1cc1 | 173 | xorl %eax,%eax |
63bcff2a | 174 | ASM_CLAC |
1da177e4 | 175 | ret |
3022d734 | 176 | |
ad2fc2cd | 177 | .section .fixup,"ax" |
661c8019 | 178 | 11: leal (%rdx,%rcx,8),%ecx |
ad2fc2cd VM |
179 | 12: movl %ecx,%edx /* ecx is zerorest also */ |
180 | jmp copy_user_handle_tail | |
181 | .previous | |
2cbc9ee3 | 182 | |
75045f77 JH |
183 | _ASM_EXTABLE_UA(1b, 11b) |
184 | _ASM_EXTABLE_UA(3b, 12b) | |
ad2fc2cd | 185 | ENDPROC(copy_user_generic_string) |
784d5699 | 186 | EXPORT_SYMBOL(copy_user_generic_string) |
4307bec9 FY |
187 | |
188 | /* | |
189 | * Some CPUs are adding enhanced REP MOVSB/STOSB instructions. | |
190 | * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled. | |
191 | * | |
192 | * Input: | |
193 | * rdi destination | |
194 | * rsi source | |
195 | * rdx count | |
196 | * | |
197 | * Output: | |
198 | * eax uncopied bytes or 0 if successful. | |
199 | */ | |
200 | ENTRY(copy_user_enhanced_fast_string) | |
63bcff2a | 201 | ASM_STAC |
236222d3 PA |
202 | cmpl $64,%edx |
203 | jb .L_copy_short_string /* less then 64 bytes, avoid the costly 'rep' */ | |
4307bec9 FY |
204 | movl %edx,%ecx |
205 | 1: rep | |
206 | movsb | |
f4cb1cc1 | 207 | xorl %eax,%eax |
63bcff2a | 208 | ASM_CLAC |
4307bec9 FY |
209 | ret |
210 | ||
211 | .section .fixup,"ax" | |
212 | 12: movl %ecx,%edx /* ecx is zerorest also */ | |
213 | jmp copy_user_handle_tail | |
214 | .previous | |
215 | ||
75045f77 | 216 | _ASM_EXTABLE_UA(1b, 12b) |
4307bec9 | 217 | ENDPROC(copy_user_enhanced_fast_string) |
784d5699 | 218 | EXPORT_SYMBOL(copy_user_enhanced_fast_string) |
b41e6ec2 | 219 | |
3693ca81 PZ |
220 | /* |
221 | * Try to copy last bytes and clear the rest if needed. | |
222 | * Since protection fault in copy_from/to_user is not a normal situation, | |
223 | * it is not necessary to optimize tail handling. | |
224 | * | |
225 | * Input: | |
226 | * rdi destination | |
227 | * rsi source | |
228 | * rdx count | |
229 | * | |
230 | * Output: | |
231 | * eax uncopied bytes or 0 if successful. | |
232 | */ | |
233 | ALIGN; | |
234 | copy_user_handle_tail: | |
235 | movl %edx,%ecx | |
236 | 1: rep movsb | |
237 | 2: mov %ecx,%eax | |
238 | ASM_CLAC | |
239 | ret | |
240 | ||
241 | _ASM_EXTABLE_UA(1b, 2b) | |
3a6ab4bc | 242 | END(copy_user_handle_tail) |
3693ca81 | 243 | |
b41e6ec2 BP |
244 | /* |
245 | * copy_user_nocache - Uncached memory copy with exception handling | |
ee9737c9 TK |
246 | * This will force destination out of cache for more performance. |
247 | * | |
248 | * Note: Cached memory copy is used when destination or size is not | |
249 | * naturally aligned. That is: | |
250 | * - Require 8-byte alignment when size is 8 bytes or larger. | |
a82eee74 | 251 | * - Require 4-byte alignment when size is 4 bytes. |
b41e6ec2 BP |
252 | */ |
253 | ENTRY(__copy_user_nocache) | |
b41e6ec2 | 254 | ASM_STAC |
ee9737c9 | 255 | |
a82eee74 | 256 | /* If size is less than 8 bytes, go to 4-byte copy */ |
b41e6ec2 | 257 | cmpl $8,%edx |
a82eee74 | 258 | jb .L_4b_nocache_copy_entry |
ee9737c9 TK |
259 | |
260 | /* If destination is not 8-byte aligned, "cache" copy to align it */ | |
b41e6ec2 | 261 | ALIGN_DESTINATION |
ee9737c9 TK |
262 | |
263 | /* Set 4x8-byte copy count and remainder */ | |
b41e6ec2 BP |
264 | movl %edx,%ecx |
265 | andl $63,%edx | |
266 | shrl $6,%ecx | |
ee9737c9 TK |
267 | jz .L_8b_nocache_copy_entry /* jump if count is 0 */ |
268 | ||
269 | /* Perform 4x8-byte nocache loop-copy */ | |
270 | .L_4x8b_nocache_copy_loop: | |
b41e6ec2 BP |
271 | 1: movq (%rsi),%r8 |
272 | 2: movq 1*8(%rsi),%r9 | |
273 | 3: movq 2*8(%rsi),%r10 | |
274 | 4: movq 3*8(%rsi),%r11 | |
275 | 5: movnti %r8,(%rdi) | |
276 | 6: movnti %r9,1*8(%rdi) | |
277 | 7: movnti %r10,2*8(%rdi) | |
278 | 8: movnti %r11,3*8(%rdi) | |
279 | 9: movq 4*8(%rsi),%r8 | |
280 | 10: movq 5*8(%rsi),%r9 | |
281 | 11: movq 6*8(%rsi),%r10 | |
282 | 12: movq 7*8(%rsi),%r11 | |
283 | 13: movnti %r8,4*8(%rdi) | |
284 | 14: movnti %r9,5*8(%rdi) | |
285 | 15: movnti %r10,6*8(%rdi) | |
286 | 16: movnti %r11,7*8(%rdi) | |
287 | leaq 64(%rsi),%rsi | |
288 | leaq 64(%rdi),%rdi | |
289 | decl %ecx | |
ee9737c9 TK |
290 | jnz .L_4x8b_nocache_copy_loop |
291 | ||
292 | /* Set 8-byte copy count and remainder */ | |
293 | .L_8b_nocache_copy_entry: | |
294 | movl %edx,%ecx | |
b41e6ec2 BP |
295 | andl $7,%edx |
296 | shrl $3,%ecx | |
a82eee74 | 297 | jz .L_4b_nocache_copy_entry /* jump if count is 0 */ |
ee9737c9 TK |
298 | |
299 | /* Perform 8-byte nocache loop-copy */ | |
300 | .L_8b_nocache_copy_loop: | |
301 | 20: movq (%rsi),%r8 | |
302 | 21: movnti %r8,(%rdi) | |
b41e6ec2 BP |
303 | leaq 8(%rsi),%rsi |
304 | leaq 8(%rdi),%rdi | |
305 | decl %ecx | |
ee9737c9 TK |
306 | jnz .L_8b_nocache_copy_loop |
307 | ||
308 | /* If no byte left, we're done */ | |
a82eee74 TK |
309 | .L_4b_nocache_copy_entry: |
310 | andl %edx,%edx | |
311 | jz .L_finish_copy | |
312 | ||
313 | /* If destination is not 4-byte aligned, go to byte copy: */ | |
314 | movl %edi,%ecx | |
315 | andl $3,%ecx | |
316 | jnz .L_1b_cache_copy_entry | |
317 | ||
318 | /* Set 4-byte copy count (1 or 0) and remainder */ | |
b41e6ec2 | 319 | movl %edx,%ecx |
a82eee74 TK |
320 | andl $3,%edx |
321 | shrl $2,%ecx | |
322 | jz .L_1b_cache_copy_entry /* jump if count is 0 */ | |
323 | ||
324 | /* Perform 4-byte nocache copy: */ | |
325 | 30: movl (%rsi),%r8d | |
326 | 31: movnti %r8d,(%rdi) | |
327 | leaq 4(%rsi),%rsi | |
328 | leaq 4(%rdi),%rdi | |
329 | ||
330 | /* If no bytes left, we're done: */ | |
ee9737c9 TK |
331 | andl %edx,%edx |
332 | jz .L_finish_copy | |
333 | ||
334 | /* Perform byte "cache" loop-copy for the remainder */ | |
a82eee74 | 335 | .L_1b_cache_copy_entry: |
b41e6ec2 | 336 | movl %edx,%ecx |
ee9737c9 TK |
337 | .L_1b_cache_copy_loop: |
338 | 40: movb (%rsi),%al | |
339 | 41: movb %al,(%rdi) | |
b41e6ec2 BP |
340 | incq %rsi |
341 | incq %rdi | |
342 | decl %ecx | |
ee9737c9 TK |
343 | jnz .L_1b_cache_copy_loop |
344 | ||
345 | /* Finished copying; fence the prior stores */ | |
346 | .L_finish_copy: | |
347 | xorl %eax,%eax | |
b41e6ec2 BP |
348 | ASM_CLAC |
349 | sfence | |
350 | ret | |
351 | ||
352 | .section .fixup,"ax" | |
ee9737c9 TK |
353 | .L_fixup_4x8b_copy: |
354 | shll $6,%ecx | |
b41e6ec2 | 355 | addl %ecx,%edx |
ee9737c9 TK |
356 | jmp .L_fixup_handle_tail |
357 | .L_fixup_8b_copy: | |
358 | lea (%rdx,%rcx,8),%rdx | |
359 | jmp .L_fixup_handle_tail | |
a82eee74 TK |
360 | .L_fixup_4b_copy: |
361 | lea (%rdx,%rcx,4),%rdx | |
362 | jmp .L_fixup_handle_tail | |
ee9737c9 TK |
363 | .L_fixup_1b_copy: |
364 | movl %ecx,%edx | |
365 | .L_fixup_handle_tail: | |
366 | sfence | |
b41e6ec2 BP |
367 | jmp copy_user_handle_tail |
368 | .previous | |
369 | ||
75045f77 JH |
370 | _ASM_EXTABLE_UA(1b, .L_fixup_4x8b_copy) |
371 | _ASM_EXTABLE_UA(2b, .L_fixup_4x8b_copy) | |
372 | _ASM_EXTABLE_UA(3b, .L_fixup_4x8b_copy) | |
373 | _ASM_EXTABLE_UA(4b, .L_fixup_4x8b_copy) | |
374 | _ASM_EXTABLE_UA(5b, .L_fixup_4x8b_copy) | |
375 | _ASM_EXTABLE_UA(6b, .L_fixup_4x8b_copy) | |
376 | _ASM_EXTABLE_UA(7b, .L_fixup_4x8b_copy) | |
377 | _ASM_EXTABLE_UA(8b, .L_fixup_4x8b_copy) | |
378 | _ASM_EXTABLE_UA(9b, .L_fixup_4x8b_copy) | |
379 | _ASM_EXTABLE_UA(10b, .L_fixup_4x8b_copy) | |
380 | _ASM_EXTABLE_UA(11b, .L_fixup_4x8b_copy) | |
381 | _ASM_EXTABLE_UA(12b, .L_fixup_4x8b_copy) | |
382 | _ASM_EXTABLE_UA(13b, .L_fixup_4x8b_copy) | |
383 | _ASM_EXTABLE_UA(14b, .L_fixup_4x8b_copy) | |
384 | _ASM_EXTABLE_UA(15b, .L_fixup_4x8b_copy) | |
385 | _ASM_EXTABLE_UA(16b, .L_fixup_4x8b_copy) | |
386 | _ASM_EXTABLE_UA(20b, .L_fixup_8b_copy) | |
387 | _ASM_EXTABLE_UA(21b, .L_fixup_8b_copy) | |
388 | _ASM_EXTABLE_UA(30b, .L_fixup_4b_copy) | |
389 | _ASM_EXTABLE_UA(31b, .L_fixup_4b_copy) | |
390 | _ASM_EXTABLE_UA(40b, .L_fixup_1b_copy) | |
391 | _ASM_EXTABLE_UA(41b, .L_fixup_1b_copy) | |
b41e6ec2 | 392 | ENDPROC(__copy_user_nocache) |
784d5699 | 393 | EXPORT_SYMBOL(__copy_user_nocache) |