]> git.proxmox.com Git - mirror_ubuntu-eoan-kernel.git/blame - arch/x86/lib/copy_user_64.S
Merge tag 'soundwire-5.3-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/vkoul...
[mirror_ubuntu-eoan-kernel.git] / arch / x86 / lib / copy_user_64.S
CommitLineData
3fc21751 1/* SPDX-License-Identifier: GPL-2.0-only */
ad2fc2cd
VM
2/*
3 * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
4 * Copyright 2002 Andi Kleen, SuSE Labs.
ad2fc2cd
VM
5 *
6 * Functions to copy from and to user space.
7 */
1da177e4 8
8d379dad 9#include <linux/linkage.h>
3022d734
AK
10#include <asm/current.h>
11#include <asm/asm-offsets.h>
12#include <asm/thread_info.h>
cd4d09ec 13#include <asm/cpufeatures.h>
4307bec9 14#include <asm/alternative-asm.h>
9732da8c 15#include <asm/asm.h>
63bcff2a 16#include <asm/smap.h>
784d5699 17#include <asm/export.h>
3022d734 18
3693ca81
PZ
19.macro ALIGN_DESTINATION
20 /* check for bad alignment of destination */
21 movl %edi,%ecx
22 andl $7,%ecx
23 jz 102f /* already aligned */
24 subl $8,%ecx
25 negl %ecx
26 subl %ecx,%edx
27100: movb (%rsi),%al
28101: movb %al,(%rdi)
29 incq %rsi
30 incq %rdi
31 decl %ecx
32 jnz 100b
33102:
34 .section .fixup,"ax"
35103: addl %ecx,%edx /* ecx is zerorest also */
36 jmp copy_user_handle_tail
37 .previous
38
39 _ASM_EXTABLE_UA(100b, 103b)
40 _ASM_EXTABLE_UA(101b, 103b)
41 .endm
42
1da177e4 43/*
3022d734 44 * copy_user_generic_unrolled - memory copy with exception handling.
ad2fc2cd
VM
45 * This version is for CPUs like P4 that don't have efficient micro
46 * code for rep movsq
47 *
48 * Input:
1da177e4
LT
49 * rdi destination
50 * rsi source
51 * rdx count
52 *
ad2fc2cd 53 * Output:
0d2eb44f 54 * eax uncopied bytes or 0 if successful.
1da177e4 55 */
3022d734 56ENTRY(copy_user_generic_unrolled)
63bcff2a 57 ASM_STAC
ad2fc2cd
VM
58 cmpl $8,%edx
59 jb 20f /* less then 8 bytes, go to byte copy loop */
60 ALIGN_DESTINATION
61 movl %edx,%ecx
62 andl $63,%edx
63 shrl $6,%ecx
236222d3 64 jz .L_copy_short_string
ad2fc2cd
VM
651: movq (%rsi),%r8
662: movq 1*8(%rsi),%r9
673: movq 2*8(%rsi),%r10
684: movq 3*8(%rsi),%r11
695: movq %r8,(%rdi)
706: movq %r9,1*8(%rdi)
717: movq %r10,2*8(%rdi)
728: movq %r11,3*8(%rdi)
739: movq 4*8(%rsi),%r8
7410: movq 5*8(%rsi),%r9
7511: movq 6*8(%rsi),%r10
7612: movq 7*8(%rsi),%r11
7713: movq %r8,4*8(%rdi)
7814: movq %r9,5*8(%rdi)
7915: movq %r10,6*8(%rdi)
8016: movq %r11,7*8(%rdi)
7bcd3f34
AK
81 leaq 64(%rsi),%rsi
82 leaq 64(%rdi),%rdi
7bcd3f34 83 decl %ecx
ad2fc2cd 84 jnz 1b
236222d3
PA
85.L_copy_short_string:
86 movl %edx,%ecx
ad2fc2cd
VM
87 andl $7,%edx
88 shrl $3,%ecx
89 jz 20f
9018: movq (%rsi),%r8
9119: movq %r8,(%rdi)
7bcd3f34 92 leaq 8(%rsi),%rsi
ad2fc2cd
VM
93 leaq 8(%rdi),%rdi
94 decl %ecx
95 jnz 18b
9620: andl %edx,%edx
97 jz 23f
7bcd3f34 98 movl %edx,%ecx
ad2fc2cd
VM
9921: movb (%rsi),%al
10022: movb %al,(%rdi)
7bcd3f34 101 incq %rsi
ad2fc2cd 102 incq %rdi
7bcd3f34 103 decl %ecx
ad2fc2cd
VM
104 jnz 21b
10523: xor %eax,%eax
63bcff2a 106 ASM_CLAC
7bcd3f34
AK
107 ret
108
ad2fc2cd
VM
109 .section .fixup,"ax"
11030: shll $6,%ecx
111 addl %ecx,%edx
112 jmp 60f
661c8019 11340: leal (%rdx,%rcx,8),%edx
ad2fc2cd
VM
114 jmp 60f
11550: movl %ecx,%edx
11660: jmp copy_user_handle_tail /* ecx is zerorest also */
117 .previous
7bcd3f34 118
75045f77
JH
119 _ASM_EXTABLE_UA(1b, 30b)
120 _ASM_EXTABLE_UA(2b, 30b)
121 _ASM_EXTABLE_UA(3b, 30b)
122 _ASM_EXTABLE_UA(4b, 30b)
123 _ASM_EXTABLE_UA(5b, 30b)
124 _ASM_EXTABLE_UA(6b, 30b)
125 _ASM_EXTABLE_UA(7b, 30b)
126 _ASM_EXTABLE_UA(8b, 30b)
127 _ASM_EXTABLE_UA(9b, 30b)
128 _ASM_EXTABLE_UA(10b, 30b)
129 _ASM_EXTABLE_UA(11b, 30b)
130 _ASM_EXTABLE_UA(12b, 30b)
131 _ASM_EXTABLE_UA(13b, 30b)
132 _ASM_EXTABLE_UA(14b, 30b)
133 _ASM_EXTABLE_UA(15b, 30b)
134 _ASM_EXTABLE_UA(16b, 30b)
135 _ASM_EXTABLE_UA(18b, 40b)
136 _ASM_EXTABLE_UA(19b, 40b)
137 _ASM_EXTABLE_UA(21b, 50b)
138 _ASM_EXTABLE_UA(22b, 50b)
ad2fc2cd 139ENDPROC(copy_user_generic_unrolled)
784d5699 140EXPORT_SYMBOL(copy_user_generic_unrolled)
8d379dad 141
ad2fc2cd
VM
142/* Some CPUs run faster using the string copy instructions.
143 * This is also a lot simpler. Use them when possible.
144 *
145 * Only 4GB of copy is supported. This shouldn't be a problem
146 * because the kernel normally only writes from/to page sized chunks
147 * even if user space passed a longer buffer.
148 * And more would be dangerous because both Intel and AMD have
149 * errata with rep movsq > 4GB. If someone feels the need to fix
150 * this please consider this.
151 *
152 * Input:
153 * rdi destination
154 * rsi source
155 * rdx count
156 *
157 * Output:
158 * eax uncopied bytes or 0 if successful.
159 */
3022d734 160ENTRY(copy_user_generic_string)
63bcff2a 161 ASM_STAC
ad2fc2cd
VM
162 cmpl $8,%edx
163 jb 2f /* less than 8 bytes, go to byte copy loop */
164 ALIGN_DESTINATION
1da177e4
LT
165 movl %edx,%ecx
166 shrl $3,%ecx
ad2fc2cd
VM
167 andl $7,%edx
1681: rep
3022d734 169 movsq
ad2fc2cd
VM
1702: movl %edx,%ecx
1713: rep
172 movsb
f4cb1cc1 173 xorl %eax,%eax
63bcff2a 174 ASM_CLAC
1da177e4 175 ret
3022d734 176
ad2fc2cd 177 .section .fixup,"ax"
661c8019 17811: leal (%rdx,%rcx,8),%ecx
ad2fc2cd
VM
17912: movl %ecx,%edx /* ecx is zerorest also */
180 jmp copy_user_handle_tail
181 .previous
2cbc9ee3 182
75045f77
JH
183 _ASM_EXTABLE_UA(1b, 11b)
184 _ASM_EXTABLE_UA(3b, 12b)
ad2fc2cd 185ENDPROC(copy_user_generic_string)
784d5699 186EXPORT_SYMBOL(copy_user_generic_string)
4307bec9
FY
187
188/*
189 * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
190 * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
191 *
192 * Input:
193 * rdi destination
194 * rsi source
195 * rdx count
196 *
197 * Output:
198 * eax uncopied bytes or 0 if successful.
199 */
200ENTRY(copy_user_enhanced_fast_string)
63bcff2a 201 ASM_STAC
236222d3
PA
202 cmpl $64,%edx
203 jb .L_copy_short_string /* less then 64 bytes, avoid the costly 'rep' */
4307bec9
FY
204 movl %edx,%ecx
2051: rep
206 movsb
f4cb1cc1 207 xorl %eax,%eax
63bcff2a 208 ASM_CLAC
4307bec9
FY
209 ret
210
211 .section .fixup,"ax"
21212: movl %ecx,%edx /* ecx is zerorest also */
213 jmp copy_user_handle_tail
214 .previous
215
75045f77 216 _ASM_EXTABLE_UA(1b, 12b)
4307bec9 217ENDPROC(copy_user_enhanced_fast_string)
784d5699 218EXPORT_SYMBOL(copy_user_enhanced_fast_string)
b41e6ec2 219
3693ca81
PZ
220/*
221 * Try to copy last bytes and clear the rest if needed.
222 * Since protection fault in copy_from/to_user is not a normal situation,
223 * it is not necessary to optimize tail handling.
224 *
225 * Input:
226 * rdi destination
227 * rsi source
228 * rdx count
229 *
230 * Output:
231 * eax uncopied bytes or 0 if successful.
232 */
233ALIGN;
234copy_user_handle_tail:
235 movl %edx,%ecx
2361: rep movsb
2372: mov %ecx,%eax
238 ASM_CLAC
239 ret
240
241 _ASM_EXTABLE_UA(1b, 2b)
3a6ab4bc 242END(copy_user_handle_tail)
3693ca81 243
b41e6ec2
BP
244/*
245 * copy_user_nocache - Uncached memory copy with exception handling
ee9737c9
TK
246 * This will force destination out of cache for more performance.
247 *
248 * Note: Cached memory copy is used when destination or size is not
249 * naturally aligned. That is:
250 * - Require 8-byte alignment when size is 8 bytes or larger.
a82eee74 251 * - Require 4-byte alignment when size is 4 bytes.
b41e6ec2
BP
252 */
253ENTRY(__copy_user_nocache)
b41e6ec2 254 ASM_STAC
ee9737c9 255
a82eee74 256 /* If size is less than 8 bytes, go to 4-byte copy */
b41e6ec2 257 cmpl $8,%edx
a82eee74 258 jb .L_4b_nocache_copy_entry
ee9737c9
TK
259
260 /* If destination is not 8-byte aligned, "cache" copy to align it */
b41e6ec2 261 ALIGN_DESTINATION
ee9737c9
TK
262
263 /* Set 4x8-byte copy count and remainder */
b41e6ec2
BP
264 movl %edx,%ecx
265 andl $63,%edx
266 shrl $6,%ecx
ee9737c9
TK
267 jz .L_8b_nocache_copy_entry /* jump if count is 0 */
268
269 /* Perform 4x8-byte nocache loop-copy */
270.L_4x8b_nocache_copy_loop:
b41e6ec2
BP
2711: movq (%rsi),%r8
2722: movq 1*8(%rsi),%r9
2733: movq 2*8(%rsi),%r10
2744: movq 3*8(%rsi),%r11
2755: movnti %r8,(%rdi)
2766: movnti %r9,1*8(%rdi)
2777: movnti %r10,2*8(%rdi)
2788: movnti %r11,3*8(%rdi)
2799: movq 4*8(%rsi),%r8
28010: movq 5*8(%rsi),%r9
28111: movq 6*8(%rsi),%r10
28212: movq 7*8(%rsi),%r11
28313: movnti %r8,4*8(%rdi)
28414: movnti %r9,5*8(%rdi)
28515: movnti %r10,6*8(%rdi)
28616: movnti %r11,7*8(%rdi)
287 leaq 64(%rsi),%rsi
288 leaq 64(%rdi),%rdi
289 decl %ecx
ee9737c9
TK
290 jnz .L_4x8b_nocache_copy_loop
291
292 /* Set 8-byte copy count and remainder */
293.L_8b_nocache_copy_entry:
294 movl %edx,%ecx
b41e6ec2
BP
295 andl $7,%edx
296 shrl $3,%ecx
a82eee74 297 jz .L_4b_nocache_copy_entry /* jump if count is 0 */
ee9737c9
TK
298
299 /* Perform 8-byte nocache loop-copy */
300.L_8b_nocache_copy_loop:
30120: movq (%rsi),%r8
30221: movnti %r8,(%rdi)
b41e6ec2
BP
303 leaq 8(%rsi),%rsi
304 leaq 8(%rdi),%rdi
305 decl %ecx
ee9737c9
TK
306 jnz .L_8b_nocache_copy_loop
307
308 /* If no byte left, we're done */
a82eee74
TK
309.L_4b_nocache_copy_entry:
310 andl %edx,%edx
311 jz .L_finish_copy
312
313 /* If destination is not 4-byte aligned, go to byte copy: */
314 movl %edi,%ecx
315 andl $3,%ecx
316 jnz .L_1b_cache_copy_entry
317
318 /* Set 4-byte copy count (1 or 0) and remainder */
b41e6ec2 319 movl %edx,%ecx
a82eee74
TK
320 andl $3,%edx
321 shrl $2,%ecx
322 jz .L_1b_cache_copy_entry /* jump if count is 0 */
323
324 /* Perform 4-byte nocache copy: */
32530: movl (%rsi),%r8d
32631: movnti %r8d,(%rdi)
327 leaq 4(%rsi),%rsi
328 leaq 4(%rdi),%rdi
329
330 /* If no bytes left, we're done: */
ee9737c9
TK
331 andl %edx,%edx
332 jz .L_finish_copy
333
334 /* Perform byte "cache" loop-copy for the remainder */
a82eee74 335.L_1b_cache_copy_entry:
b41e6ec2 336 movl %edx,%ecx
ee9737c9
TK
337.L_1b_cache_copy_loop:
33840: movb (%rsi),%al
33941: movb %al,(%rdi)
b41e6ec2
BP
340 incq %rsi
341 incq %rdi
342 decl %ecx
ee9737c9
TK
343 jnz .L_1b_cache_copy_loop
344
345 /* Finished copying; fence the prior stores */
346.L_finish_copy:
347 xorl %eax,%eax
b41e6ec2
BP
348 ASM_CLAC
349 sfence
350 ret
351
352 .section .fixup,"ax"
ee9737c9
TK
353.L_fixup_4x8b_copy:
354 shll $6,%ecx
b41e6ec2 355 addl %ecx,%edx
ee9737c9
TK
356 jmp .L_fixup_handle_tail
357.L_fixup_8b_copy:
358 lea (%rdx,%rcx,8),%rdx
359 jmp .L_fixup_handle_tail
a82eee74
TK
360.L_fixup_4b_copy:
361 lea (%rdx,%rcx,4),%rdx
362 jmp .L_fixup_handle_tail
ee9737c9
TK
363.L_fixup_1b_copy:
364 movl %ecx,%edx
365.L_fixup_handle_tail:
366 sfence
b41e6ec2
BP
367 jmp copy_user_handle_tail
368 .previous
369
75045f77
JH
370 _ASM_EXTABLE_UA(1b, .L_fixup_4x8b_copy)
371 _ASM_EXTABLE_UA(2b, .L_fixup_4x8b_copy)
372 _ASM_EXTABLE_UA(3b, .L_fixup_4x8b_copy)
373 _ASM_EXTABLE_UA(4b, .L_fixup_4x8b_copy)
374 _ASM_EXTABLE_UA(5b, .L_fixup_4x8b_copy)
375 _ASM_EXTABLE_UA(6b, .L_fixup_4x8b_copy)
376 _ASM_EXTABLE_UA(7b, .L_fixup_4x8b_copy)
377 _ASM_EXTABLE_UA(8b, .L_fixup_4x8b_copy)
378 _ASM_EXTABLE_UA(9b, .L_fixup_4x8b_copy)
379 _ASM_EXTABLE_UA(10b, .L_fixup_4x8b_copy)
380 _ASM_EXTABLE_UA(11b, .L_fixup_4x8b_copy)
381 _ASM_EXTABLE_UA(12b, .L_fixup_4x8b_copy)
382 _ASM_EXTABLE_UA(13b, .L_fixup_4x8b_copy)
383 _ASM_EXTABLE_UA(14b, .L_fixup_4x8b_copy)
384 _ASM_EXTABLE_UA(15b, .L_fixup_4x8b_copy)
385 _ASM_EXTABLE_UA(16b, .L_fixup_4x8b_copy)
386 _ASM_EXTABLE_UA(20b, .L_fixup_8b_copy)
387 _ASM_EXTABLE_UA(21b, .L_fixup_8b_copy)
388 _ASM_EXTABLE_UA(30b, .L_fixup_4b_copy)
389 _ASM_EXTABLE_UA(31b, .L_fixup_4b_copy)
390 _ASM_EXTABLE_UA(40b, .L_fixup_1b_copy)
391 _ASM_EXTABLE_UA(41b, .L_fixup_1b_copy)
b41e6ec2 392ENDPROC(__copy_user_nocache)
784d5699 393EXPORT_SYMBOL(__copy_user_nocache)