]>
Commit | Line | Data |
---|---|---|
ad2fc2cd VM |
1 | /* |
2 | * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com> | |
3 | * Copyright 2002 Andi Kleen, SuSE Labs. | |
1da177e4 | 4 | * Subject to the GNU Public License v2. |
ad2fc2cd VM |
5 | * |
6 | * Functions to copy from and to user space. | |
7 | */ | |
1da177e4 | 8 | |
8d379dad | 9 | #include <linux/linkage.h> |
3022d734 AK |
10 | #include <asm/current.h> |
11 | #include <asm/asm-offsets.h> | |
12 | #include <asm/thread_info.h> | |
cd4d09ec | 13 | #include <asm/cpufeatures.h> |
4307bec9 | 14 | #include <asm/alternative-asm.h> |
9732da8c | 15 | #include <asm/asm.h> |
63bcff2a | 16 | #include <asm/smap.h> |
784d5699 | 17 | #include <asm/export.h> |
3022d734 | 18 | |
1da177e4 | 19 | /* |
3022d734 | 20 | * copy_user_generic_unrolled - memory copy with exception handling. |
ad2fc2cd VM |
21 | * This version is for CPUs like P4 that don't have efficient micro |
22 | * code for rep movsq | |
23 | * | |
24 | * Input: | |
1da177e4 LT |
25 | * rdi destination |
26 | * rsi source | |
27 | * rdx count | |
28 | * | |
ad2fc2cd | 29 | * Output: |
0d2eb44f | 30 | * eax uncopied bytes or 0 if successful. |
1da177e4 | 31 | */ |
3022d734 | 32 | ENTRY(copy_user_generic_unrolled) |
63bcff2a | 33 | ASM_STAC |
ad2fc2cd VM |
34 | cmpl $8,%edx |
35 | jb 20f /* less then 8 bytes, go to byte copy loop */ | |
36 | ALIGN_DESTINATION | |
37 | movl %edx,%ecx | |
38 | andl $63,%edx | |
39 | shrl $6,%ecx | |
236222d3 | 40 | jz .L_copy_short_string |
ad2fc2cd VM |
41 | 1: movq (%rsi),%r8 |
42 | 2: movq 1*8(%rsi),%r9 | |
43 | 3: movq 2*8(%rsi),%r10 | |
44 | 4: movq 3*8(%rsi),%r11 | |
45 | 5: movq %r8,(%rdi) | |
46 | 6: movq %r9,1*8(%rdi) | |
47 | 7: movq %r10,2*8(%rdi) | |
48 | 8: movq %r11,3*8(%rdi) | |
49 | 9: movq 4*8(%rsi),%r8 | |
50 | 10: movq 5*8(%rsi),%r9 | |
51 | 11: movq 6*8(%rsi),%r10 | |
52 | 12: movq 7*8(%rsi),%r11 | |
53 | 13: movq %r8,4*8(%rdi) | |
54 | 14: movq %r9,5*8(%rdi) | |
55 | 15: movq %r10,6*8(%rdi) | |
56 | 16: movq %r11,7*8(%rdi) | |
7bcd3f34 AK |
57 | leaq 64(%rsi),%rsi |
58 | leaq 64(%rdi),%rdi | |
7bcd3f34 | 59 | decl %ecx |
ad2fc2cd | 60 | jnz 1b |
236222d3 PA |
61 | .L_copy_short_string: |
62 | movl %edx,%ecx | |
ad2fc2cd VM |
63 | andl $7,%edx |
64 | shrl $3,%ecx | |
65 | jz 20f | |
66 | 18: movq (%rsi),%r8 | |
67 | 19: movq %r8,(%rdi) | |
7bcd3f34 | 68 | leaq 8(%rsi),%rsi |
ad2fc2cd VM |
69 | leaq 8(%rdi),%rdi |
70 | decl %ecx | |
71 | jnz 18b | |
72 | 20: andl %edx,%edx | |
73 | jz 23f | |
7bcd3f34 | 74 | movl %edx,%ecx |
ad2fc2cd VM |
75 | 21: movb (%rsi),%al |
76 | 22: movb %al,(%rdi) | |
7bcd3f34 | 77 | incq %rsi |
ad2fc2cd | 78 | incq %rdi |
7bcd3f34 | 79 | decl %ecx |
ad2fc2cd VM |
80 | jnz 21b |
81 | 23: xor %eax,%eax | |
63bcff2a | 82 | ASM_CLAC |
7bcd3f34 AK |
83 | ret |
84 | ||
ad2fc2cd VM |
85 | .section .fixup,"ax" |
86 | 30: shll $6,%ecx | |
87 | addl %ecx,%edx | |
88 | jmp 60f | |
661c8019 | 89 | 40: leal (%rdx,%rcx,8),%edx |
ad2fc2cd VM |
90 | jmp 60f |
91 | 50: movl %ecx,%edx | |
92 | 60: jmp copy_user_handle_tail /* ecx is zerorest also */ | |
93 | .previous | |
7bcd3f34 | 94 | |
9732da8c PA |
95 | _ASM_EXTABLE(1b,30b) |
96 | _ASM_EXTABLE(2b,30b) | |
97 | _ASM_EXTABLE(3b,30b) | |
98 | _ASM_EXTABLE(4b,30b) | |
99 | _ASM_EXTABLE(5b,30b) | |
100 | _ASM_EXTABLE(6b,30b) | |
101 | _ASM_EXTABLE(7b,30b) | |
102 | _ASM_EXTABLE(8b,30b) | |
103 | _ASM_EXTABLE(9b,30b) | |
104 | _ASM_EXTABLE(10b,30b) | |
105 | _ASM_EXTABLE(11b,30b) | |
106 | _ASM_EXTABLE(12b,30b) | |
107 | _ASM_EXTABLE(13b,30b) | |
108 | _ASM_EXTABLE(14b,30b) | |
109 | _ASM_EXTABLE(15b,30b) | |
110 | _ASM_EXTABLE(16b,30b) | |
111 | _ASM_EXTABLE(18b,40b) | |
112 | _ASM_EXTABLE(19b,40b) | |
113 | _ASM_EXTABLE(21b,50b) | |
114 | _ASM_EXTABLE(22b,50b) | |
ad2fc2cd | 115 | ENDPROC(copy_user_generic_unrolled) |
784d5699 | 116 | EXPORT_SYMBOL(copy_user_generic_unrolled) |
8d379dad | 117 | |
ad2fc2cd VM |
118 | /* Some CPUs run faster using the string copy instructions. |
119 | * This is also a lot simpler. Use them when possible. | |
120 | * | |
121 | * Only 4GB of copy is supported. This shouldn't be a problem | |
122 | * because the kernel normally only writes from/to page sized chunks | |
123 | * even if user space passed a longer buffer. | |
124 | * And more would be dangerous because both Intel and AMD have | |
125 | * errata with rep movsq > 4GB. If someone feels the need to fix | |
126 | * this please consider this. | |
127 | * | |
128 | * Input: | |
129 | * rdi destination | |
130 | * rsi source | |
131 | * rdx count | |
132 | * | |
133 | * Output: | |
134 | * eax uncopied bytes or 0 if successful. | |
135 | */ | |
3022d734 | 136 | ENTRY(copy_user_generic_string) |
63bcff2a | 137 | ASM_STAC |
ad2fc2cd VM |
138 | cmpl $8,%edx |
139 | jb 2f /* less than 8 bytes, go to byte copy loop */ | |
140 | ALIGN_DESTINATION | |
1da177e4 LT |
141 | movl %edx,%ecx |
142 | shrl $3,%ecx | |
ad2fc2cd VM |
143 | andl $7,%edx |
144 | 1: rep | |
3022d734 | 145 | movsq |
ad2fc2cd VM |
146 | 2: movl %edx,%ecx |
147 | 3: rep | |
148 | movsb | |
f4cb1cc1 | 149 | xorl %eax,%eax |
63bcff2a | 150 | ASM_CLAC |
1da177e4 | 151 | ret |
3022d734 | 152 | |
ad2fc2cd | 153 | .section .fixup,"ax" |
661c8019 | 154 | 11: leal (%rdx,%rcx,8),%ecx |
ad2fc2cd VM |
155 | 12: movl %ecx,%edx /* ecx is zerorest also */ |
156 | jmp copy_user_handle_tail | |
157 | .previous | |
2cbc9ee3 | 158 | |
9732da8c PA |
159 | _ASM_EXTABLE(1b,11b) |
160 | _ASM_EXTABLE(3b,12b) | |
ad2fc2cd | 161 | ENDPROC(copy_user_generic_string) |
784d5699 | 162 | EXPORT_SYMBOL(copy_user_generic_string) |
4307bec9 FY |
163 | |
164 | /* | |
165 | * Some CPUs are adding enhanced REP MOVSB/STOSB instructions. | |
166 | * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled. | |
167 | * | |
168 | * Input: | |
169 | * rdi destination | |
170 | * rsi source | |
171 | * rdx count | |
172 | * | |
173 | * Output: | |
174 | * eax uncopied bytes or 0 if successful. | |
175 | */ | |
176 | ENTRY(copy_user_enhanced_fast_string) | |
63bcff2a | 177 | ASM_STAC |
236222d3 PA |
178 | cmpl $64,%edx |
179 | jb .L_copy_short_string /* less then 64 bytes, avoid the costly 'rep' */ | |
4307bec9 FY |
180 | movl %edx,%ecx |
181 | 1: rep | |
182 | movsb | |
f4cb1cc1 | 183 | xorl %eax,%eax |
63bcff2a | 184 | ASM_CLAC |
4307bec9 FY |
185 | ret |
186 | ||
187 | .section .fixup,"ax" | |
188 | 12: movl %ecx,%edx /* ecx is zerorest also */ | |
189 | jmp copy_user_handle_tail | |
190 | .previous | |
191 | ||
9732da8c | 192 | _ASM_EXTABLE(1b,12b) |
4307bec9 | 193 | ENDPROC(copy_user_enhanced_fast_string) |
784d5699 | 194 | EXPORT_SYMBOL(copy_user_enhanced_fast_string) |
b41e6ec2 BP |
195 | |
196 | /* | |
197 | * copy_user_nocache - Uncached memory copy with exception handling | |
ee9737c9 TK |
198 | * This will force destination out of cache for more performance. |
199 | * | |
200 | * Note: Cached memory copy is used when destination or size is not | |
201 | * naturally aligned. That is: | |
202 | * - Require 8-byte alignment when size is 8 bytes or larger. | |
a82eee74 | 203 | * - Require 4-byte alignment when size is 4 bytes. |
b41e6ec2 BP |
204 | */ |
205 | ENTRY(__copy_user_nocache) | |
b41e6ec2 | 206 | ASM_STAC |
ee9737c9 | 207 | |
a82eee74 | 208 | /* If size is less than 8 bytes, go to 4-byte copy */ |
b41e6ec2 | 209 | cmpl $8,%edx |
a82eee74 | 210 | jb .L_4b_nocache_copy_entry |
ee9737c9 TK |
211 | |
212 | /* If destination is not 8-byte aligned, "cache" copy to align it */ | |
b41e6ec2 | 213 | ALIGN_DESTINATION |
ee9737c9 TK |
214 | |
215 | /* Set 4x8-byte copy count and remainder */ | |
b41e6ec2 BP |
216 | movl %edx,%ecx |
217 | andl $63,%edx | |
218 | shrl $6,%ecx | |
ee9737c9 TK |
219 | jz .L_8b_nocache_copy_entry /* jump if count is 0 */ |
220 | ||
221 | /* Perform 4x8-byte nocache loop-copy */ | |
222 | .L_4x8b_nocache_copy_loop: | |
b41e6ec2 BP |
223 | 1: movq (%rsi),%r8 |
224 | 2: movq 1*8(%rsi),%r9 | |
225 | 3: movq 2*8(%rsi),%r10 | |
226 | 4: movq 3*8(%rsi),%r11 | |
227 | 5: movnti %r8,(%rdi) | |
228 | 6: movnti %r9,1*8(%rdi) | |
229 | 7: movnti %r10,2*8(%rdi) | |
230 | 8: movnti %r11,3*8(%rdi) | |
231 | 9: movq 4*8(%rsi),%r8 | |
232 | 10: movq 5*8(%rsi),%r9 | |
233 | 11: movq 6*8(%rsi),%r10 | |
234 | 12: movq 7*8(%rsi),%r11 | |
235 | 13: movnti %r8,4*8(%rdi) | |
236 | 14: movnti %r9,5*8(%rdi) | |
237 | 15: movnti %r10,6*8(%rdi) | |
238 | 16: movnti %r11,7*8(%rdi) | |
239 | leaq 64(%rsi),%rsi | |
240 | leaq 64(%rdi),%rdi | |
241 | decl %ecx | |
ee9737c9 TK |
242 | jnz .L_4x8b_nocache_copy_loop |
243 | ||
244 | /* Set 8-byte copy count and remainder */ | |
245 | .L_8b_nocache_copy_entry: | |
246 | movl %edx,%ecx | |
b41e6ec2 BP |
247 | andl $7,%edx |
248 | shrl $3,%ecx | |
a82eee74 | 249 | jz .L_4b_nocache_copy_entry /* jump if count is 0 */ |
ee9737c9 TK |
250 | |
251 | /* Perform 8-byte nocache loop-copy */ | |
252 | .L_8b_nocache_copy_loop: | |
253 | 20: movq (%rsi),%r8 | |
254 | 21: movnti %r8,(%rdi) | |
b41e6ec2 BP |
255 | leaq 8(%rsi),%rsi |
256 | leaq 8(%rdi),%rdi | |
257 | decl %ecx | |
ee9737c9 TK |
258 | jnz .L_8b_nocache_copy_loop |
259 | ||
260 | /* If no byte left, we're done */ | |
a82eee74 TK |
261 | .L_4b_nocache_copy_entry: |
262 | andl %edx,%edx | |
263 | jz .L_finish_copy | |
264 | ||
265 | /* If destination is not 4-byte aligned, go to byte copy: */ | |
266 | movl %edi,%ecx | |
267 | andl $3,%ecx | |
268 | jnz .L_1b_cache_copy_entry | |
269 | ||
270 | /* Set 4-byte copy count (1 or 0) and remainder */ | |
b41e6ec2 | 271 | movl %edx,%ecx |
a82eee74 TK |
272 | andl $3,%edx |
273 | shrl $2,%ecx | |
274 | jz .L_1b_cache_copy_entry /* jump if count is 0 */ | |
275 | ||
276 | /* Perform 4-byte nocache copy: */ | |
277 | 30: movl (%rsi),%r8d | |
278 | 31: movnti %r8d,(%rdi) | |
279 | leaq 4(%rsi),%rsi | |
280 | leaq 4(%rdi),%rdi | |
281 | ||
282 | /* If no bytes left, we're done: */ | |
ee9737c9 TK |
283 | andl %edx,%edx |
284 | jz .L_finish_copy | |
285 | ||
286 | /* Perform byte "cache" loop-copy for the remainder */ | |
a82eee74 | 287 | .L_1b_cache_copy_entry: |
b41e6ec2 | 288 | movl %edx,%ecx |
ee9737c9 TK |
289 | .L_1b_cache_copy_loop: |
290 | 40: movb (%rsi),%al | |
291 | 41: movb %al,(%rdi) | |
b41e6ec2 BP |
292 | incq %rsi |
293 | incq %rdi | |
294 | decl %ecx | |
ee9737c9 TK |
295 | jnz .L_1b_cache_copy_loop |
296 | ||
297 | /* Finished copying; fence the prior stores */ | |
298 | .L_finish_copy: | |
299 | xorl %eax,%eax | |
b41e6ec2 BP |
300 | ASM_CLAC |
301 | sfence | |
302 | ret | |
303 | ||
304 | .section .fixup,"ax" | |
ee9737c9 TK |
305 | .L_fixup_4x8b_copy: |
306 | shll $6,%ecx | |
b41e6ec2 | 307 | addl %ecx,%edx |
ee9737c9 TK |
308 | jmp .L_fixup_handle_tail |
309 | .L_fixup_8b_copy: | |
310 | lea (%rdx,%rcx,8),%rdx | |
311 | jmp .L_fixup_handle_tail | |
a82eee74 TK |
312 | .L_fixup_4b_copy: |
313 | lea (%rdx,%rcx,4),%rdx | |
314 | jmp .L_fixup_handle_tail | |
ee9737c9 TK |
315 | .L_fixup_1b_copy: |
316 | movl %ecx,%edx | |
317 | .L_fixup_handle_tail: | |
318 | sfence | |
b41e6ec2 BP |
319 | jmp copy_user_handle_tail |
320 | .previous | |
321 | ||
ee9737c9 TK |
322 | _ASM_EXTABLE(1b,.L_fixup_4x8b_copy) |
323 | _ASM_EXTABLE(2b,.L_fixup_4x8b_copy) | |
324 | _ASM_EXTABLE(3b,.L_fixup_4x8b_copy) | |
325 | _ASM_EXTABLE(4b,.L_fixup_4x8b_copy) | |
326 | _ASM_EXTABLE(5b,.L_fixup_4x8b_copy) | |
327 | _ASM_EXTABLE(6b,.L_fixup_4x8b_copy) | |
328 | _ASM_EXTABLE(7b,.L_fixup_4x8b_copy) | |
329 | _ASM_EXTABLE(8b,.L_fixup_4x8b_copy) | |
330 | _ASM_EXTABLE(9b,.L_fixup_4x8b_copy) | |
331 | _ASM_EXTABLE(10b,.L_fixup_4x8b_copy) | |
332 | _ASM_EXTABLE(11b,.L_fixup_4x8b_copy) | |
333 | _ASM_EXTABLE(12b,.L_fixup_4x8b_copy) | |
334 | _ASM_EXTABLE(13b,.L_fixup_4x8b_copy) | |
335 | _ASM_EXTABLE(14b,.L_fixup_4x8b_copy) | |
336 | _ASM_EXTABLE(15b,.L_fixup_4x8b_copy) | |
337 | _ASM_EXTABLE(16b,.L_fixup_4x8b_copy) | |
338 | _ASM_EXTABLE(20b,.L_fixup_8b_copy) | |
339 | _ASM_EXTABLE(21b,.L_fixup_8b_copy) | |
a82eee74 TK |
340 | _ASM_EXTABLE(30b,.L_fixup_4b_copy) |
341 | _ASM_EXTABLE(31b,.L_fixup_4b_copy) | |
ee9737c9 TK |
342 | _ASM_EXTABLE(40b,.L_fixup_1b_copy) |
343 | _ASM_EXTABLE(41b,.L_fixup_1b_copy) | |
b41e6ec2 | 344 | ENDPROC(__copy_user_nocache) |
784d5699 | 345 | EXPORT_SYMBOL(__copy_user_nocache) |