]>
Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | |
3 | * operating system. INET is implemented using the BSD Socket | |
4 | * interface as the means of communication with the user level. | |
5 | * | |
6 | * IP/TCP/UDP checksumming routines | |
7 | * | |
8 | * Authors: Jorge Cwik, <jorge@laser.satlink.net> | |
9 | * Arnt Gulbrandsen, <agulbra@nvg.unit.no> | |
10 | * Tom May, <ftom@netcom.com> | |
11 | * Pentium Pro/II routines: | |
12 | * Alexander Kjeldaas <astor@guardian.no> | |
13 | * Finn Arne Gangstad <finnag@guardian.no> | |
14 | * Lots of code moved from tcp.c and ip.c; see those files | |
15 | * for more names. | |
16 | * | |
17 | * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception | |
18 | * handling. | |
19 | * Andi Kleen, add zeroing on error | |
20 | * converted to pure assembler | |
21 | * | |
22 | * This program is free software; you can redistribute it and/or | |
23 | * modify it under the terms of the GNU General Public License | |
24 | * as published by the Free Software Foundation; either version | |
25 | * 2 of the License, or (at your option) any later version. | |
26 | */ | |
27 | ||
1da177e4 | 28 | #include <asm/errno.h> |
f542c5d6 | 29 | #include <asm/asm.h> |
784d5699 | 30 | #include <asm/export.h> |
1da177e4 LT |
31 | |
32 | /* | |
33 | * computes a partial checksum, e.g. for TCP/UDP fragments | |
34 | */ | |
35 | ||
36 | /* | |
37 | unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) | |
38 | */ | |
39 | ||
40 | .text | |
41 | .align 4 | |
7d37c6d5 | 42 | .globl csum_partial |
1da177e4 LT |
43 | |
44 | #ifndef CONFIG_X86_USE_PPRO_CHECKSUM | |
45 | ||
46 | /* | |
47 | * Experiments with Ethernet and SLIP connections show that buff | |
48 | * is aligned on either a 2-byte or 4-byte boundary. We get at | |
49 | * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. | |
50 | * Fortunately, it is easy to convert 2-byte alignment to 4-byte | |
51 | * alignment for the unrolled loop. | |
52 | */ | |
7d37c6d5 | 53 | csum_partial: |
1da177e4 LT |
54 | pushl %esi |
55 | pushl %ebx | |
56 | movl 20(%esp),%eax # Function arg: unsigned int sum | |
57 | movl 16(%esp),%ecx # Function arg: int len | |
58 | movl 12(%esp),%esi # Function arg: unsigned char *buff | |
59 | testl $2, %esi # Check alignment. | |
60 | jz 2f # Jump if alignment is ok. | |
61 | subl $2, %ecx # Alignment uses up two bytes. | |
62 | jae 1f # Jump if we had at least two bytes. | |
63 | addl $2, %ecx # ecx was < 2. Deal with it. | |
64 | jmp 4f | |
65 | 1: movw (%esi), %bx | |
66 | addl $2, %esi | |
67 | addw %bx, %ax | |
68 | adcl $0, %eax | |
69 | 2: | |
70 | movl %ecx, %edx | |
71 | shrl $5, %ecx | |
72 | jz 2f | |
73 | testl %esi, %esi | |
74 | 1: movl (%esi), %ebx | |
75 | adcl %ebx, %eax | |
76 | movl 4(%esi), %ebx | |
77 | adcl %ebx, %eax | |
78 | movl 8(%esi), %ebx | |
79 | adcl %ebx, %eax | |
80 | movl 12(%esi), %ebx | |
81 | adcl %ebx, %eax | |
82 | movl 16(%esi), %ebx | |
83 | adcl %ebx, %eax | |
84 | movl 20(%esi), %ebx | |
85 | adcl %ebx, %eax | |
86 | movl 24(%esi), %ebx | |
87 | adcl %ebx, %eax | |
88 | movl 28(%esi), %ebx | |
89 | adcl %ebx, %eax | |
90 | lea 32(%esi), %esi | |
91 | dec %ecx | |
92 | jne 1b | |
93 | adcl $0, %eax | |
94 | 2: movl %edx, %ecx | |
95 | andl $0x1c, %edx | |
96 | je 4f | |
97 | shrl $2, %edx # This clears CF | |
98 | 3: adcl (%esi), %eax | |
99 | lea 4(%esi), %esi | |
100 | dec %edx | |
101 | jne 3b | |
102 | adcl $0, %eax | |
103 | 4: andl $3, %ecx | |
104 | jz 7f | |
105 | cmpl $2, %ecx | |
106 | jb 5f | |
107 | movw (%esi),%cx | |
108 | leal 2(%esi),%esi | |
109 | je 6f | |
110 | shll $16,%ecx | |
111 | 5: movb (%esi),%cl | |
112 | 6: addl %ecx,%eax | |
113 | adcl $0, %eax | |
114 | 7: | |
115 | popl %ebx | |
116 | popl %esi | |
117 | ret | |
118 | ||
119 | #else | |
120 | ||
121 | /* Version for PentiumII/PPro */ | |
122 | ||
7d37c6d5 | 123 | csum_partial: |
1da177e4 LT |
124 | pushl %esi |
125 | pushl %ebx | |
126 | movl 20(%esp),%eax # Function arg: unsigned int sum | |
127 | movl 16(%esp),%ecx # Function arg: int len | |
128 | movl 12(%esp),%esi # Function arg: const unsigned char *buf | |
129 | ||
130 | testl $2, %esi | |
131 | jnz 30f | |
132 | 10: | |
133 | movl %ecx, %edx | |
134 | movl %ecx, %ebx | |
135 | andl $0x7c, %ebx | |
136 | shrl $7, %ecx | |
137 | addl %ebx,%esi | |
138 | shrl $2, %ebx | |
139 | negl %ebx | |
140 | lea 45f(%ebx,%ebx,2), %ebx | |
141 | testl %esi, %esi | |
142 | jmp *%ebx | |
143 | ||
144 | # Handle 2-byte-aligned regions | |
145 | 20: addw (%esi), %ax | |
146 | lea 2(%esi), %esi | |
147 | adcl $0, %eax | |
148 | jmp 10b | |
149 | ||
150 | 30: subl $2, %ecx | |
151 | ja 20b | |
152 | je 32f | |
153 | movzbl (%esi),%ebx # csumming 1 byte, 2-aligned | |
154 | addl %ebx, %eax | |
155 | adcl $0, %eax | |
156 | jmp 80f | |
157 | 32: | |
158 | addw (%esi), %ax # csumming 2 bytes, 2-aligned | |
159 | adcl $0, %eax | |
160 | jmp 80f | |
161 | ||
162 | 40: | |
163 | addl -128(%esi), %eax | |
164 | adcl -124(%esi), %eax | |
165 | adcl -120(%esi), %eax | |
166 | adcl -116(%esi), %eax | |
167 | adcl -112(%esi), %eax | |
168 | adcl -108(%esi), %eax | |
169 | adcl -104(%esi), %eax | |
170 | adcl -100(%esi), %eax | |
171 | adcl -96(%esi), %eax | |
172 | adcl -92(%esi), %eax | |
173 | adcl -88(%esi), %eax | |
174 | adcl -84(%esi), %eax | |
175 | adcl -80(%esi), %eax | |
176 | adcl -76(%esi), %eax | |
177 | adcl -72(%esi), %eax | |
178 | adcl -68(%esi), %eax | |
179 | adcl -64(%esi), %eax | |
180 | adcl -60(%esi), %eax | |
181 | adcl -56(%esi), %eax | |
182 | adcl -52(%esi), %eax | |
183 | adcl -48(%esi), %eax | |
184 | adcl -44(%esi), %eax | |
185 | adcl -40(%esi), %eax | |
186 | adcl -36(%esi), %eax | |
187 | adcl -32(%esi), %eax | |
188 | adcl -28(%esi), %eax | |
189 | adcl -24(%esi), %eax | |
190 | adcl -20(%esi), %eax | |
191 | adcl -16(%esi), %eax | |
192 | adcl -12(%esi), %eax | |
193 | adcl -8(%esi), %eax | |
194 | adcl -4(%esi), %eax | |
195 | 45: | |
196 | lea 128(%esi), %esi | |
197 | adcl $0, %eax | |
198 | dec %ecx | |
199 | jge 40b | |
200 | movl %edx, %ecx | |
201 | 50: andl $3, %ecx | |
202 | jz 80f | |
203 | ||
204 | # Handle the last 1-3 bytes without jumping | |
205 | notl %ecx # 1->2, 2->1, 3->0, higher bits are masked | |
206 | movl $0xffffff,%ebx # by the shll and shrl instructions | |
207 | shll $3,%ecx | |
208 | shrl %cl,%ebx | |
209 | andl -128(%esi),%ebx # esi is 4-aligned so should be ok | |
210 | addl %ebx,%eax | |
211 | adcl $0,%eax | |
212 | 80: | |
213 | popl %ebx | |
214 | popl %esi | |
215 | ret | |
216 | ||
217 | #endif | |
784d5699 | 218 | EXPORT_SYMBOL(csum_partial) |