]>
Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | |
3 | * operating system. INET is implemented using the BSD Socket | |
4 | * interface as the means of communication with the user level. | |
5 | * | |
6 | * IP/TCP/UDP checksumming routines | |
7 | * | |
8 | * Authors: Jorge Cwik, <jorge@laser.satlink.net> | |
9 | * Arnt Gulbrandsen, <agulbra@nvg.unit.no> | |
10 | * Tom May, <ftom@netcom.com> | |
11 | * Pentium Pro/II routines: | |
12 | * Alexander Kjeldaas <astor@guardian.no> | |
13 | * Finn Arne Gangstad <finnag@guardian.no> | |
14 | * Lots of code moved from tcp.c and ip.c; see those files | |
15 | * for more names. | |
16 | * | |
17 | * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception | |
18 | * handling. | |
19 | * Andi Kleen, add zeroing on error | |
20 | * converted to pure assembler | |
21 | * Hirokazu Takata,Hiroyuki Kondo rewrite for the m32r architecture. | |
22 | * | |
23 | * This program is free software; you can redistribute it and/or | |
24 | * modify it under the terms of the GNU General Public License | |
25 | * as published by the Free Software Foundation; either version | |
26 | * 2 of the License, or (at your option) any later version. | |
27 | */ | |
1da177e4 | 28 | |
1da177e4 LT |
29 | #include <linux/linkage.h> |
30 | #include <asm/assembler.h> | |
31 | #include <asm/errno.h> | |
32 | ||
33 | /* | |
34 | * computes a partial checksum, e.g. for TCP/UDP fragments | |
35 | */ | |
36 | ||
37 | /* | |
38 | unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) | |
39 | */ | |
40 | ||
41 | ||
42 | #ifdef CONFIG_ISA_DUAL_ISSUE | |
43 | ||
44 | /* | |
45 | * Experiments with Ethernet and SLIP connections show that buff | |
46 | * is aligned on either a 2-byte or 4-byte boundary. We get at | |
47 | * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. | |
48 | * Fortunately, it is easy to convert 2-byte alignment to 4-byte | |
49 | * alignment for the unrolled loop. | |
50 | */ | |
51 | ||
52 | .text | |
53 | ENTRY(csum_partial) | |
54 | ; Function args | |
55 | ; r0: unsigned char *buff | |
56 | ; r1: int len | |
57 | ; r2: unsigned int sum | |
58 | ||
59 | push r2 || ldi r2, #0 | |
60 | and3 r7, r0, #1 ; Check alignment. | |
61 | beqz r7, 1f ; Jump if alignment is ok. | |
62 | ; 1-byte mis aligned | |
63 | ldub r4, @r0 || addi r0, #1 | |
64 | ; clear c-bit || Alignment uses up bytes. | |
65 | cmp r0, r0 || addi r1, #-1 | |
66 | ldi r3, #0 || addx r2, r4 | |
67 | addx r2, r3 | |
68 | .fillinsn | |
69 | 1: | |
70 | and3 r4, r0, #2 ; Check alignment. | |
71 | beqz r4, 2f ; Jump if alignment is ok. | |
72 | ; clear c-bit || Alignment uses up two bytes. | |
73 | cmp r0, r0 || addi r1, #-2 | |
74 | bgtz r1, 1f ; Jump if we had at least two bytes. | |
75 | bra 4f || addi r1, #2 | |
76 | .fillinsn ; len(r1) was < 2. Deal with it. | |
77 | 1: | |
78 | ; 2-byte aligned | |
79 | lduh r4, @r0 || ldi r3, #0 | |
80 | addx r2, r4 || addi r0, #2 | |
81 | addx r2, r3 | |
82 | .fillinsn | |
83 | 2: | |
84 | ; 4-byte aligned | |
85 | cmp r0, r0 ; clear c-bit | |
86 | srl3 r6, r1, #5 | |
87 | beqz r6, 2f | |
88 | .fillinsn | |
89 | ||
90 | 1: ld r3, @r0+ | |
91 | ld r4, @r0+ ; +4 | |
92 | ld r5, @r0+ ; +8 | |
93 | ld r3, @r0+ || addx r2, r3 ; +12 | |
94 | ld r4, @r0+ || addx r2, r4 ; +16 | |
95 | ld r5, @r0+ || addx r2, r5 ; +20 | |
96 | ld r3, @r0+ || addx r2, r3 ; +24 | |
97 | ld r4, @r0+ || addx r2, r4 ; +28 | |
98 | addx r2, r5 || addi r6, #-1 | |
99 | addx r2, r3 | |
100 | addx r2, r4 | |
101 | bnez r6, 1b | |
102 | ||
103 | addx r2, r6 ; r6=0 | |
104 | cmp r0, r0 ; This clears c-bit | |
105 | .fillinsn | |
106 | 2: and3 r6, r1, #0x1c ; withdraw len | |
107 | beqz r6, 4f | |
108 | srli r6, #2 | |
109 | .fillinsn | |
110 | ||
111 | 3: ld r4, @r0+ || addi r6, #-1 | |
112 | addx r2, r4 | |
113 | bnez r6, 3b | |
114 | ||
115 | addx r2, r6 ; r6=0 | |
116 | cmp r0, r0 ; This clears c-bit | |
117 | .fillinsn | |
118 | 4: and3 r1, r1, #3 | |
119 | beqz r1, 7f ; if len == 0 goto end | |
120 | and3 r6, r1, #2 | |
121 | beqz r6, 5f ; if len < 2 goto 5f(1byte) | |
122 | lduh r4, @r0 || addi r0, #2 | |
123 | addi r1, #-2 || slli r4, #16 | |
124 | addx r2, r4 | |
125 | beqz r1, 6f | |
126 | .fillinsn | |
127 | 5: ldub r4, @r0 || ldi r1, #0 | |
128 | #ifndef __LITTLE_ENDIAN__ | |
129 | slli r4, #8 | |
130 | #endif | |
131 | addx r2, r4 | |
132 | .fillinsn | |
133 | 6: addx r2, r1 | |
134 | .fillinsn | |
135 | 7: | |
136 | and3 r0, r2, #0xffff | |
137 | srli r2, #16 | |
138 | add r0, r2 | |
139 | srl3 r2, r0, #16 | |
140 | beqz r2, 1f | |
141 | addi r0, #1 | |
142 | and3 r0, r0, #0xffff | |
143 | .fillinsn | |
144 | 1: | |
145 | beqz r7, 1f ; swap the upper byte for the lower | |
146 | and3 r2, r0, #0xff | |
147 | srl3 r0, r0, #8 | |
148 | slli r2, #8 | |
149 | or r0, r2 | |
150 | .fillinsn | |
151 | 1: | |
152 | pop r2 || cmp r0, r0 | |
153 | addx r0, r2 || ldi r2, #0 | |
154 | addx r0, r2 | |
155 | jmp r14 | |
156 | ||
157 | #else /* not CONFIG_ISA_DUAL_ISSUE */ | |
158 | ||
159 | /* | |
160 | * Experiments with Ethernet and SLIP connections show that buff | |
161 | * is aligned on either a 2-byte or 4-byte boundary. We get at | |
162 | * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. | |
163 | * Fortunately, it is easy to convert 2-byte alignment to 4-byte | |
164 | * alignment for the unrolled loop. | |
165 | */ | |
166 | ||
167 | .text | |
168 | ENTRY(csum_partial) | |
169 | ; Function args | |
170 | ; r0: unsigned char *buff | |
171 | ; r1: int len | |
172 | ; r2: unsigned int sum | |
173 | ||
174 | push r2 | |
175 | ldi r2, #0 | |
176 | and3 r7, r0, #1 ; Check alignment. | |
177 | beqz r7, 1f ; Jump if alignment is ok. | |
178 | ; 1-byte mis aligned | |
179 | ldub r4, @r0 | |
180 | addi r0, #1 | |
181 | addi r1, #-1 ; Alignment uses up bytes. | |
182 | cmp r0, r0 ; clear c-bit | |
183 | ldi r3, #0 | |
184 | addx r2, r4 | |
185 | addx r2, r3 | |
186 | .fillinsn | |
187 | 1: | |
188 | and3 r4, r0, #2 ; Check alignment. | |
189 | beqz r4, 2f ; Jump if alignment is ok. | |
190 | addi r1, #-2 ; Alignment uses up two bytes. | |
191 | cmp r0, r0 ; clear c-bit | |
192 | bgtz r1, 1f ; Jump if we had at least two bytes. | |
193 | addi r1, #2 ; len(r1) was < 2. Deal with it. | |
194 | bra 4f | |
195 | .fillinsn | |
196 | 1: | |
197 | ; 2-byte aligned | |
198 | lduh r4, @r0 | |
199 | addi r0, #2 | |
200 | ldi r3, #0 | |
201 | addx r2, r4 | |
202 | addx r2, r3 | |
203 | .fillinsn | |
204 | 2: | |
205 | ; 4-byte aligned | |
206 | cmp r0, r0 ; clear c-bit | |
207 | srl3 r6, r1, #5 | |
208 | beqz r6, 2f | |
209 | .fillinsn | |
210 | ||
211 | 1: ld r3, @r0+ | |
212 | ld r4, @r0+ ; +4 | |
213 | ld r5, @r0+ ; +8 | |
214 | addx r2, r3 | |
215 | addx r2, r4 | |
216 | addx r2, r5 | |
217 | ld r3, @r0+ ; +12 | |
218 | ld r4, @r0+ ; +16 | |
219 | ld r5, @r0+ ; +20 | |
220 | addx r2, r3 | |
221 | addx r2, r4 | |
222 | addx r2, r5 | |
223 | ld r3, @r0+ ; +24 | |
224 | ld r4, @r0+ ; +28 | |
225 | addi r6, #-1 | |
226 | addx r2, r3 | |
227 | addx r2, r4 | |
228 | bnez r6, 1b | |
229 | addx r2, r6 ; r6=0 | |
230 | cmp r0, r0 ; This clears c-bit | |
231 | .fillinsn | |
232 | ||
233 | 2: and3 r6, r1, #0x1c ; withdraw len | |
234 | beqz r6, 4f | |
235 | srli r6, #2 | |
236 | .fillinsn | |
237 | ||
238 | 3: ld r4, @r0+ | |
239 | addi r6, #-1 | |
240 | addx r2, r4 | |
241 | bnez r6, 3b | |
242 | addx r2, r6 ; r6=0 | |
243 | cmp r0, r0 ; This clears c-bit | |
244 | .fillinsn | |
245 | ||
246 | 4: and3 r1, r1, #3 | |
247 | beqz r1, 7f ; if len == 0 goto end | |
248 | and3 r6, r1, #2 | |
249 | beqz r6, 5f ; if len < 2 goto 5f(1byte) | |
250 | ||
251 | lduh r4, @r0 | |
252 | addi r0, #2 | |
253 | addi r1, #-2 | |
254 | slli r4, #16 | |
255 | addx r2, r4 | |
256 | beqz r1, 6f | |
257 | .fillinsn | |
258 | 5: ldub r4, @r0 | |
259 | #ifndef __LITTLE_ENDIAN__ | |
260 | slli r4, #8 | |
261 | #endif | |
262 | addx r2, r4 | |
263 | .fillinsn | |
264 | 6: ldi r5, #0 | |
265 | addx r2, r5 | |
266 | .fillinsn | |
267 | 7: | |
268 | and3 r0, r2, #0xffff | |
269 | srli r2, #16 | |
270 | add r0, r2 | |
271 | srl3 r2, r0, #16 | |
272 | beqz r2, 1f | |
273 | addi r0, #1 | |
274 | and3 r0, r0, #0xffff | |
275 | .fillinsn | |
276 | 1: | |
277 | beqz r7, 1f | |
278 | mv r2, r0 | |
279 | srl3 r0, r2, #8 | |
280 | and3 r2, r2, #0xff | |
281 | slli r2, #8 | |
282 | or r0, r2 | |
283 | .fillinsn | |
284 | 1: | |
285 | pop r2 | |
286 | cmp r0, r0 | |
287 | addx r0, r2 | |
288 | ldi r2, #0 | |
289 | addx r0, r2 | |
290 | jmp r14 | |
291 | ||
292 | #endif /* not CONFIG_ISA_DUAL_ISSUE */ | |
293 | ||
294 | /* | |
295 | unsigned int csum_partial_copy_generic (const char *src, char *dst, | |
296 | int len, int sum, int *src_err_ptr, int *dst_err_ptr) | |
297 | */ | |
298 | ||
299 | /* | |
300 | * Copy from ds while checksumming, otherwise like csum_partial | |
301 | * | |
302 | * The macros SRC and DST specify the type of access for the instruction. | |
303 | * thus we can call a custom exception handler for all access types. | |
304 | * | |
305 | * FIXME: could someone double-check whether I haven't mixed up some SRC and | |
306 | * DST definitions? It's damn hard to trigger all cases. I hope I got | |
307 | * them all but there's no guarantee. | |
308 | */ | |
309 | ||
310 | ENTRY(csum_partial_copy_generic) | |
311 | nop | |
312 | nop | |
313 | nop | |
314 | nop | |
315 | jmp r14 | |
316 | nop | |
317 | nop | |
318 | nop | |
319 | ||
fabb626a | 320 | .end |