]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - arch/x86/lib/copy_user_64.S
UBUNTU: Ubuntu-5.15.0-39.42
[mirror_ubuntu-jammy-kernel.git] / arch / x86 / lib / copy_user_64.S
CommitLineData
3fc21751 1/* SPDX-License-Identifier: GPL-2.0-only */
ad2fc2cd
VM
2/*
3 * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
4 * Copyright 2002 Andi Kleen, SuSE Labs.
ad2fc2cd
VM
5 *
6 * Functions to copy from and to user space.
7 */
1da177e4 8
8d379dad 9#include <linux/linkage.h>
3022d734
AK
10#include <asm/current.h>
11#include <asm/asm-offsets.h>
12#include <asm/thread_info.h>
cd4d09ec 13#include <asm/cpufeatures.h>
5e21a3ec 14#include <asm/alternative.h>
9732da8c 15#include <asm/asm.h>
63bcff2a 16#include <asm/smap.h>
784d5699 17#include <asm/export.h>
a2f73400 18#include <asm/trapnr.h>
3022d734 19
3693ca81
PZ
20.macro ALIGN_DESTINATION
21 /* check for bad alignment of destination */
22 movl %edi,%ecx
23 andl $7,%ecx
24 jz 102f /* already aligned */
25 subl $8,%ecx
26 negl %ecx
27 subl %ecx,%edx
28100: movb (%rsi),%al
29101: movb %al,(%rdi)
30 incq %rsi
31 incq %rdi
32 decl %ecx
33 jnz 100b
34102:
35 .section .fixup,"ax"
36103: addl %ecx,%edx /* ecx is zerorest also */
98ededb6 37 jmp .Lcopy_user_handle_tail
3693ca81
PZ
38 .previous
39
278b917f
YS
40 _ASM_EXTABLE_CPY(100b, 103b)
41 _ASM_EXTABLE_CPY(101b, 103b)
3693ca81
PZ
42 .endm
43
1da177e4 44/*
3022d734 45 * copy_user_generic_unrolled - memory copy with exception handling.
ad2fc2cd
VM
46 * This version is for CPUs like P4 that don't have efficient micro
47 * code for rep movsq
48 *
49 * Input:
1da177e4
LT
50 * rdi destination
51 * rsi source
52 * rdx count
53 *
ad2fc2cd 54 * Output:
0d2eb44f 55 * eax uncopied bytes or 0 if successful.
1da177e4 56 */
6dcc5627 57SYM_FUNC_START(copy_user_generic_unrolled)
63bcff2a 58 ASM_STAC
ad2fc2cd
VM
59 cmpl $8,%edx
60 jb 20f /* less then 8 bytes, go to byte copy loop */
61 ALIGN_DESTINATION
62 movl %edx,%ecx
63 andl $63,%edx
64 shrl $6,%ecx
236222d3 65 jz .L_copy_short_string
ad2fc2cd
VM
661: movq (%rsi),%r8
672: movq 1*8(%rsi),%r9
683: movq 2*8(%rsi),%r10
694: movq 3*8(%rsi),%r11
705: movq %r8,(%rdi)
716: movq %r9,1*8(%rdi)
727: movq %r10,2*8(%rdi)
738: movq %r11,3*8(%rdi)
749: movq 4*8(%rsi),%r8
7510: movq 5*8(%rsi),%r9
7611: movq 6*8(%rsi),%r10
7712: movq 7*8(%rsi),%r11
7813: movq %r8,4*8(%rdi)
7914: movq %r9,5*8(%rdi)
8015: movq %r10,6*8(%rdi)
8116: movq %r11,7*8(%rdi)
7bcd3f34
AK
82 leaq 64(%rsi),%rsi
83 leaq 64(%rdi),%rdi
7bcd3f34 84 decl %ecx
ad2fc2cd 85 jnz 1b
236222d3
PA
86.L_copy_short_string:
87 movl %edx,%ecx
ad2fc2cd
VM
88 andl $7,%edx
89 shrl $3,%ecx
90 jz 20f
9118: movq (%rsi),%r8
9219: movq %r8,(%rdi)
7bcd3f34 93 leaq 8(%rsi),%rsi
ad2fc2cd
VM
94 leaq 8(%rdi),%rdi
95 decl %ecx
96 jnz 18b
9720: andl %edx,%edx
98 jz 23f
7bcd3f34 99 movl %edx,%ecx
ad2fc2cd
VM
10021: movb (%rsi),%al
10122: movb %al,(%rdi)
7bcd3f34 102 incq %rsi
ad2fc2cd 103 incq %rdi
7bcd3f34 104 decl %ecx
ad2fc2cd
VM
105 jnz 21b
10623: xor %eax,%eax
63bcff2a 107 ASM_CLAC
7bcd3f34
AK
108 ret
109
ad2fc2cd
VM
110 .section .fixup,"ax"
11130: shll $6,%ecx
112 addl %ecx,%edx
113 jmp 60f
661c8019 11440: leal (%rdx,%rcx,8),%edx
ad2fc2cd
VM
115 jmp 60f
11650: movl %ecx,%edx
98ededb6 11760: jmp .Lcopy_user_handle_tail /* ecx is zerorest also */
ad2fc2cd 118 .previous
7bcd3f34 119
278b917f
YS
120 _ASM_EXTABLE_CPY(1b, 30b)
121 _ASM_EXTABLE_CPY(2b, 30b)
122 _ASM_EXTABLE_CPY(3b, 30b)
123 _ASM_EXTABLE_CPY(4b, 30b)
124 _ASM_EXTABLE_CPY(5b, 30b)
125 _ASM_EXTABLE_CPY(6b, 30b)
126 _ASM_EXTABLE_CPY(7b, 30b)
127 _ASM_EXTABLE_CPY(8b, 30b)
128 _ASM_EXTABLE_CPY(9b, 30b)
129 _ASM_EXTABLE_CPY(10b, 30b)
130 _ASM_EXTABLE_CPY(11b, 30b)
131 _ASM_EXTABLE_CPY(12b, 30b)
132 _ASM_EXTABLE_CPY(13b, 30b)
133 _ASM_EXTABLE_CPY(14b, 30b)
134 _ASM_EXTABLE_CPY(15b, 30b)
135 _ASM_EXTABLE_CPY(16b, 30b)
136 _ASM_EXTABLE_CPY(18b, 40b)
137 _ASM_EXTABLE_CPY(19b, 40b)
138 _ASM_EXTABLE_CPY(21b, 50b)
139 _ASM_EXTABLE_CPY(22b, 50b)
6dcc5627 140SYM_FUNC_END(copy_user_generic_unrolled)
784d5699 141EXPORT_SYMBOL(copy_user_generic_unrolled)
8d379dad 142
ad2fc2cd
VM
143/* Some CPUs run faster using the string copy instructions.
144 * This is also a lot simpler. Use them when possible.
145 *
146 * Only 4GB of copy is supported. This shouldn't be a problem
147 * because the kernel normally only writes from/to page sized chunks
148 * even if user space passed a longer buffer.
149 * And more would be dangerous because both Intel and AMD have
150 * errata with rep movsq > 4GB. If someone feels the need to fix
151 * this please consider this.
152 *
153 * Input:
154 * rdi destination
155 * rsi source
156 * rdx count
157 *
158 * Output:
159 * eax uncopied bytes or 0 if successful.
160 */
6dcc5627 161SYM_FUNC_START(copy_user_generic_string)
63bcff2a 162 ASM_STAC
ad2fc2cd
VM
163 cmpl $8,%edx
164 jb 2f /* less than 8 bytes, go to byte copy loop */
165 ALIGN_DESTINATION
1da177e4
LT
166 movl %edx,%ecx
167 shrl $3,%ecx
ad2fc2cd
VM
168 andl $7,%edx
1691: rep
3022d734 170 movsq
ad2fc2cd
VM
1712: movl %edx,%ecx
1723: rep
173 movsb
f4cb1cc1 174 xorl %eax,%eax
63bcff2a 175 ASM_CLAC
1da177e4 176 ret
3022d734 177
ad2fc2cd 178 .section .fixup,"ax"
661c8019 17911: leal (%rdx,%rcx,8),%ecx
ad2fc2cd 18012: movl %ecx,%edx /* ecx is zerorest also */
98ededb6 181 jmp .Lcopy_user_handle_tail
ad2fc2cd 182 .previous
2cbc9ee3 183
278b917f
YS
184 _ASM_EXTABLE_CPY(1b, 11b)
185 _ASM_EXTABLE_CPY(3b, 12b)
6dcc5627 186SYM_FUNC_END(copy_user_generic_string)
784d5699 187EXPORT_SYMBOL(copy_user_generic_string)
4307bec9
FY
188
189/*
190 * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
191 * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
192 *
193 * Input:
194 * rdi destination
195 * rsi source
196 * rdx count
197 *
198 * Output:
199 * eax uncopied bytes or 0 if successful.
200 */
6dcc5627 201SYM_FUNC_START(copy_user_enhanced_fast_string)
63bcff2a 202 ASM_STAC
236222d3
PA
203 cmpl $64,%edx
204 jb .L_copy_short_string /* less then 64 bytes, avoid the costly 'rep' */
4307bec9
FY
205 movl %edx,%ecx
2061: rep
207 movsb
f4cb1cc1 208 xorl %eax,%eax
63bcff2a 209 ASM_CLAC
4307bec9
FY
210 ret
211
212 .section .fixup,"ax"
21312: movl %ecx,%edx /* ecx is zerorest also */
98ededb6 214 jmp .Lcopy_user_handle_tail
4307bec9
FY
215 .previous
216
278b917f 217 _ASM_EXTABLE_CPY(1b, 12b)
6dcc5627 218SYM_FUNC_END(copy_user_enhanced_fast_string)
784d5699 219EXPORT_SYMBOL(copy_user_enhanced_fast_string)
b41e6ec2 220
3693ca81
PZ
221/*
222 * Try to copy last bytes and clear the rest if needed.
223 * Since protection fault in copy_from/to_user is not a normal situation,
224 * it is not necessary to optimize tail handling.
a2f73400 225 * Don't try to copy the tail if machine check happened
3693ca81
PZ
226 *
227 * Input:
228 * rdi destination
229 * rsi source
230 * rdx count
231 *
232 * Output:
233 * eax uncopied bytes or 0 if successful.
234 */
fa972201 235SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail)
3693ca81 236 movl %edx,%ecx
a2f73400
TL
237 cmp $X86_TRAP_MC,%eax /* check if X86_TRAP_MC */
238 je 3f
3693ca81
PZ
2391: rep movsb
2402: mov %ecx,%eax
241 ASM_CLAC
242 ret
243
a2f73400
TL
244 /*
245 * Return zero to pretend that this copy succeeded. This
246 * is counter-intuitive, but needed to prevent the code
247 * in lib/iov_iter.c from retrying and running back into
248 * the poison cache line again. The machine check handler
249 * will ensure that a SIGBUS is sent to the task.
250 */
2513: xorl %eax,%eax
252 ASM_CLAC
253 ret
254
278b917f 255 _ASM_EXTABLE_CPY(1b, 2b)
fa972201 256SYM_CODE_END(.Lcopy_user_handle_tail)
3693ca81 257
b41e6ec2
BP
258/*
259 * copy_user_nocache - Uncached memory copy with exception handling
ee9737c9
TK
260 * This will force destination out of cache for more performance.
261 *
262 * Note: Cached memory copy is used when destination or size is not
263 * naturally aligned. That is:
264 * - Require 8-byte alignment when size is 8 bytes or larger.
a82eee74 265 * - Require 4-byte alignment when size is 4 bytes.
b41e6ec2 266 */
6dcc5627 267SYM_FUNC_START(__copy_user_nocache)
b41e6ec2 268 ASM_STAC
ee9737c9 269
a82eee74 270 /* If size is less than 8 bytes, go to 4-byte copy */
b41e6ec2 271 cmpl $8,%edx
a82eee74 272 jb .L_4b_nocache_copy_entry
ee9737c9
TK
273
274 /* If destination is not 8-byte aligned, "cache" copy to align it */
b41e6ec2 275 ALIGN_DESTINATION
ee9737c9
TK
276
277 /* Set 4x8-byte copy count and remainder */
b41e6ec2
BP
278 movl %edx,%ecx
279 andl $63,%edx
280 shrl $6,%ecx
ee9737c9
TK
281 jz .L_8b_nocache_copy_entry /* jump if count is 0 */
282
283 /* Perform 4x8-byte nocache loop-copy */
284.L_4x8b_nocache_copy_loop:
b41e6ec2
BP
2851: movq (%rsi),%r8
2862: movq 1*8(%rsi),%r9
2873: movq 2*8(%rsi),%r10
2884: movq 3*8(%rsi),%r11
2895: movnti %r8,(%rdi)
2906: movnti %r9,1*8(%rdi)
2917: movnti %r10,2*8(%rdi)
2928: movnti %r11,3*8(%rdi)
2939: movq 4*8(%rsi),%r8
29410: movq 5*8(%rsi),%r9
29511: movq 6*8(%rsi),%r10
29612: movq 7*8(%rsi),%r11
29713: movnti %r8,4*8(%rdi)
29814: movnti %r9,5*8(%rdi)
29915: movnti %r10,6*8(%rdi)
30016: movnti %r11,7*8(%rdi)
301 leaq 64(%rsi),%rsi
302 leaq 64(%rdi),%rdi
303 decl %ecx
ee9737c9
TK
304 jnz .L_4x8b_nocache_copy_loop
305
306 /* Set 8-byte copy count and remainder */
307.L_8b_nocache_copy_entry:
308 movl %edx,%ecx
b41e6ec2
BP
309 andl $7,%edx
310 shrl $3,%ecx
a82eee74 311 jz .L_4b_nocache_copy_entry /* jump if count is 0 */
ee9737c9
TK
312
313 /* Perform 8-byte nocache loop-copy */
314.L_8b_nocache_copy_loop:
31520: movq (%rsi),%r8
31621: movnti %r8,(%rdi)
b41e6ec2
BP
317 leaq 8(%rsi),%rsi
318 leaq 8(%rdi),%rdi
319 decl %ecx
ee9737c9
TK
320 jnz .L_8b_nocache_copy_loop
321
322 /* If no byte left, we're done */
a82eee74
TK
323.L_4b_nocache_copy_entry:
324 andl %edx,%edx
325 jz .L_finish_copy
326
327 /* If destination is not 4-byte aligned, go to byte copy: */
328 movl %edi,%ecx
329 andl $3,%ecx
330 jnz .L_1b_cache_copy_entry
331
332 /* Set 4-byte copy count (1 or 0) and remainder */
b41e6ec2 333 movl %edx,%ecx
a82eee74
TK
334 andl $3,%edx
335 shrl $2,%ecx
336 jz .L_1b_cache_copy_entry /* jump if count is 0 */
337
338 /* Perform 4-byte nocache copy: */
33930: movl (%rsi),%r8d
34031: movnti %r8d,(%rdi)
341 leaq 4(%rsi),%rsi
342 leaq 4(%rdi),%rdi
343
344 /* If no bytes left, we're done: */
ee9737c9
TK
345 andl %edx,%edx
346 jz .L_finish_copy
347
348 /* Perform byte "cache" loop-copy for the remainder */
a82eee74 349.L_1b_cache_copy_entry:
b41e6ec2 350 movl %edx,%ecx
ee9737c9
TK
351.L_1b_cache_copy_loop:
35240: movb (%rsi),%al
35341: movb %al,(%rdi)
b41e6ec2
BP
354 incq %rsi
355 incq %rdi
356 decl %ecx
ee9737c9
TK
357 jnz .L_1b_cache_copy_loop
358
359 /* Finished copying; fence the prior stores */
360.L_finish_copy:
361 xorl %eax,%eax
b41e6ec2
BP
362 ASM_CLAC
363 sfence
364 ret
365
366 .section .fixup,"ax"
ee9737c9
TK
367.L_fixup_4x8b_copy:
368 shll $6,%ecx
b41e6ec2 369 addl %ecx,%edx
ee9737c9
TK
370 jmp .L_fixup_handle_tail
371.L_fixup_8b_copy:
372 lea (%rdx,%rcx,8),%rdx
373 jmp .L_fixup_handle_tail
a82eee74
TK
374.L_fixup_4b_copy:
375 lea (%rdx,%rcx,4),%rdx
376 jmp .L_fixup_handle_tail
ee9737c9
TK
377.L_fixup_1b_copy:
378 movl %ecx,%edx
379.L_fixup_handle_tail:
380 sfence
98ededb6 381 jmp .Lcopy_user_handle_tail
b41e6ec2
BP
382 .previous
383
278b917f
YS
384 _ASM_EXTABLE_CPY(1b, .L_fixup_4x8b_copy)
385 _ASM_EXTABLE_CPY(2b, .L_fixup_4x8b_copy)
386 _ASM_EXTABLE_CPY(3b, .L_fixup_4x8b_copy)
387 _ASM_EXTABLE_CPY(4b, .L_fixup_4x8b_copy)
388 _ASM_EXTABLE_CPY(5b, .L_fixup_4x8b_copy)
389 _ASM_EXTABLE_CPY(6b, .L_fixup_4x8b_copy)
390 _ASM_EXTABLE_CPY(7b, .L_fixup_4x8b_copy)
391 _ASM_EXTABLE_CPY(8b, .L_fixup_4x8b_copy)
392 _ASM_EXTABLE_CPY(9b, .L_fixup_4x8b_copy)
393 _ASM_EXTABLE_CPY(10b, .L_fixup_4x8b_copy)
394 _ASM_EXTABLE_CPY(11b, .L_fixup_4x8b_copy)
395 _ASM_EXTABLE_CPY(12b, .L_fixup_4x8b_copy)
396 _ASM_EXTABLE_CPY(13b, .L_fixup_4x8b_copy)
397 _ASM_EXTABLE_CPY(14b, .L_fixup_4x8b_copy)
398 _ASM_EXTABLE_CPY(15b, .L_fixup_4x8b_copy)
399 _ASM_EXTABLE_CPY(16b, .L_fixup_4x8b_copy)
400 _ASM_EXTABLE_CPY(20b, .L_fixup_8b_copy)
401 _ASM_EXTABLE_CPY(21b, .L_fixup_8b_copy)
402 _ASM_EXTABLE_CPY(30b, .L_fixup_4b_copy)
403 _ASM_EXTABLE_CPY(31b, .L_fixup_4b_copy)
404 _ASM_EXTABLE_CPY(40b, .L_fixup_1b_copy)
405 _ASM_EXTABLE_CPY(41b, .L_fixup_1b_copy)
6dcc5627 406SYM_FUNC_END(__copy_user_nocache)
784d5699 407EXPORT_SYMBOL(__copy_user_nocache)