]> git.proxmox.com Git - mirror_ubuntu-eoan-kernel.git/blob - arch/x86/lib/csum-copy_64.S
Merge tag 'xfs-5.3-fixes-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
[mirror_ubuntu-eoan-kernel.git] / arch / x86 / lib / csum-copy_64.S
1 /*
2 * Copyright 2002, 2003 Andi Kleen, SuSE Labs.
3 *
4 * This file is subject to the terms and conditions of the GNU General Public
5 * License. See the file COPYING in the main directory of this archive
6 * for more details. No warranty for anything given at all.
7 */
8 #include <linux/linkage.h>
9 #include <asm/errno.h>
10 #include <asm/asm.h>
11
12 /*
13 * Checksum copy with exception handling.
14 * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the
15 * destination is zeroed.
16 *
17 * Input
18 * rdi source
19 * rsi destination
20 * edx len (32bit)
21 * ecx sum (32bit)
22 * r8 src_err_ptr (int)
23 * r9 dst_err_ptr (int)
24 *
25 * Output
26 * eax 64bit sum. undefined in case of exception.
27 *
28 * Wrappers need to take care of valid exception sum and zeroing.
29 * They also should align source or destination to 8 bytes.
30 */
31
32 .macro source
33 10:
34 _ASM_EXTABLE_UA(10b, .Lbad_source)
35 .endm
36
37 .macro dest
38 20:
39 _ASM_EXTABLE_UA(20b, .Lbad_dest)
40 .endm
41
42 /*
43 * No _ASM_EXTABLE_UA; this is used for intentional prefetch on a
44 * potentially unmapped kernel address.
45 */
46 .macro ignore L=.Lignore
47 30:
48 _ASM_EXTABLE(30b, \L)
49 .endm
50
51
52 ENTRY(csum_partial_copy_generic)
53 cmpl $3*64, %edx
54 jle .Lignore
55
56 .Lignore:
57 subq $7*8, %rsp
58 movq %rbx, 2*8(%rsp)
59 movq %r12, 3*8(%rsp)
60 movq %r14, 4*8(%rsp)
61 movq %r13, 5*8(%rsp)
62 movq %r15, 6*8(%rsp)
63
64 movq %r8, (%rsp)
65 movq %r9, 1*8(%rsp)
66
67 movl %ecx, %eax
68 movl %edx, %ecx
69
70 xorl %r9d, %r9d
71 movq %rcx, %r12
72
73 shrq $6, %r12
74 jz .Lhandle_tail /* < 64 */
75
76 clc
77
78 /* main loop. clear in 64 byte blocks */
79 /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
80 /* r11: temp3, rdx: temp4, r12 loopcnt */
81 /* r10: temp5, r15: temp6, r14 temp7, r13 temp8 */
82 .p2align 4
83 .Lloop:
84 source
85 movq (%rdi), %rbx
86 source
87 movq 8(%rdi), %r8
88 source
89 movq 16(%rdi), %r11
90 source
91 movq 24(%rdi), %rdx
92
93 source
94 movq 32(%rdi), %r10
95 source
96 movq 40(%rdi), %r15
97 source
98 movq 48(%rdi), %r14
99 source
100 movq 56(%rdi), %r13
101
102 ignore 2f
103 prefetcht0 5*64(%rdi)
104 2:
105 adcq %rbx, %rax
106 adcq %r8, %rax
107 adcq %r11, %rax
108 adcq %rdx, %rax
109 adcq %r10, %rax
110 adcq %r15, %rax
111 adcq %r14, %rax
112 adcq %r13, %rax
113
114 decl %r12d
115
116 dest
117 movq %rbx, (%rsi)
118 dest
119 movq %r8, 8(%rsi)
120 dest
121 movq %r11, 16(%rsi)
122 dest
123 movq %rdx, 24(%rsi)
124
125 dest
126 movq %r10, 32(%rsi)
127 dest
128 movq %r15, 40(%rsi)
129 dest
130 movq %r14, 48(%rsi)
131 dest
132 movq %r13, 56(%rsi)
133
134 3:
135
136 leaq 64(%rdi), %rdi
137 leaq 64(%rsi), %rsi
138
139 jnz .Lloop
140
141 adcq %r9, %rax
142
143 /* do last up to 56 bytes */
144 .Lhandle_tail:
145 /* ecx: count */
146 movl %ecx, %r10d
147 andl $63, %ecx
148 shrl $3, %ecx
149 jz .Lfold
150 clc
151 .p2align 4
152 .Lloop_8:
153 source
154 movq (%rdi), %rbx
155 adcq %rbx, %rax
156 decl %ecx
157 dest
158 movq %rbx, (%rsi)
159 leaq 8(%rsi), %rsi /* preserve carry */
160 leaq 8(%rdi), %rdi
161 jnz .Lloop_8
162 adcq %r9, %rax /* add in carry */
163
164 .Lfold:
165 /* reduce checksum to 32bits */
166 movl %eax, %ebx
167 shrq $32, %rax
168 addl %ebx, %eax
169 adcl %r9d, %eax
170
171 /* do last up to 6 bytes */
172 .Lhandle_7:
173 movl %r10d, %ecx
174 andl $7, %ecx
175 shrl $1, %ecx
176 jz .Lhandle_1
177 movl $2, %edx
178 xorl %ebx, %ebx
179 clc
180 .p2align 4
181 .Lloop_1:
182 source
183 movw (%rdi), %bx
184 adcl %ebx, %eax
185 decl %ecx
186 dest
187 movw %bx, (%rsi)
188 leaq 2(%rdi), %rdi
189 leaq 2(%rsi), %rsi
190 jnz .Lloop_1
191 adcl %r9d, %eax /* add in carry */
192
193 /* handle last odd byte */
194 .Lhandle_1:
195 testb $1, %r10b
196 jz .Lende
197 xorl %ebx, %ebx
198 source
199 movb (%rdi), %bl
200 dest
201 movb %bl, (%rsi)
202 addl %ebx, %eax
203 adcl %r9d, %eax /* carry */
204
205 .Lende:
206 movq 2*8(%rsp), %rbx
207 movq 3*8(%rsp), %r12
208 movq 4*8(%rsp), %r14
209 movq 5*8(%rsp), %r13
210 movq 6*8(%rsp), %r15
211 addq $7*8, %rsp
212 ret
213
214 /* Exception handlers. Very simple, zeroing is done in the wrappers */
215 .Lbad_source:
216 movq (%rsp), %rax
217 testq %rax, %rax
218 jz .Lende
219 movl $-EFAULT, (%rax)
220 jmp .Lende
221
222 .Lbad_dest:
223 movq 8(%rsp), %rax
224 testq %rax, %rax
225 jz .Lende
226 movl $-EFAULT, (%rax)
227 jmp .Lende
228 ENDPROC(csum_partial_copy_generic)