]>
Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * linux/arch/arm/lib/csumpartialcopygeneric.S | |
3 | * | |
4 | * Copyright (C) 1995-2001 Russell King | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 as | |
8 | * published by the Free Software Foundation. | |
9 | */ | |
6ebbf2ce | 10 | #include <asm/assembler.h> |
1da177e4 LT |
11 | |
12 | /* | |
13 | * unsigned int | |
14 | * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, ) | |
15 | * r0 = src, r1 = dst, r2 = len, r3 = sum | |
16 | * Returns : r0 = checksum | |
17 | * | |
18 | * Note that 'tst' and 'teq' preserve the carry flag. | |
19 | */ | |
20 | ||
21 | src .req r0 | |
22 | dst .req r1 | |
23 | len .req r2 | |
24 | sum .req r3 | |
25 | ||
8adbb371 | 26 | .Lzero: mov r0, sum |
90303b10 | 27 | load_regs |
1da177e4 LT |
28 | |
29 | /* | |
30 | * Align an unaligned destination pointer. We know that | |
31 | * we have >= 8 bytes here, so we don't need to check | |
32 | * the length. Note that the source pointer hasn't been | |
33 | * aligned yet. | |
34 | */ | |
8adbb371 NP |
35 | .Ldst_unaligned: |
36 | tst dst, #1 | |
37 | beq .Ldst_16bit | |
1da177e4 LT |
38 | |
39 | load1b ip | |
40 | sub len, len, #1 | |
41 | adcs sum, sum, ip, put_byte_1 @ update checksum | |
42 | strb ip, [dst], #1 | |
43 | tst dst, #2 | |
6ebbf2ce | 44 | reteq lr @ dst is now 32bit aligned |
1da177e4 | 45 | |
8adbb371 | 46 | .Ldst_16bit: load2b r8, ip |
1da177e4 LT |
47 | sub len, len, #2 |
48 | adcs sum, sum, r8, put_byte_0 | |
49 | strb r8, [dst], #1 | |
50 | adcs sum, sum, ip, put_byte_1 | |
51 | strb ip, [dst], #1 | |
6ebbf2ce | 52 | ret lr @ dst is now 32bit aligned |
1da177e4 LT |
53 | |
54 | /* | |
55 | * Handle 0 to 7 bytes, with any alignment of source and | |
56 | * destination pointers. Note that when we get here, C = 0 | |
57 | */ | |
8adbb371 NP |
58 | .Lless8: teq len, #0 @ check for zero count |
59 | beq .Lzero | |
1da177e4 LT |
60 | |
61 | /* we must have at least one byte. */ | |
62 | tst dst, #1 @ dst 16-bit aligned | |
8adbb371 | 63 | beq .Lless8_aligned |
1da177e4 LT |
64 | |
65 | /* Align dst */ | |
66 | load1b ip | |
67 | sub len, len, #1 | |
68 | adcs sum, sum, ip, put_byte_1 @ update checksum | |
69 | strb ip, [dst], #1 | |
70 | tst len, #6 | |
8adbb371 | 71 | beq .Lless8_byteonly |
1da177e4 LT |
72 | |
73 | 1: load2b r8, ip | |
74 | sub len, len, #2 | |
75 | adcs sum, sum, r8, put_byte_0 | |
76 | strb r8, [dst], #1 | |
77 | adcs sum, sum, ip, put_byte_1 | |
78 | strb ip, [dst], #1 | |
8adbb371 NP |
79 | .Lless8_aligned: |
80 | tst len, #6 | |
1da177e4 | 81 | bne 1b |
8adbb371 | 82 | .Lless8_byteonly: |
1da177e4 | 83 | tst len, #1 |
8adbb371 | 84 | beq .Ldone |
1da177e4 LT |
85 | load1b r8 |
86 | adcs sum, sum, r8, put_byte_0 @ update checksum | |
87 | strb r8, [dst], #1 | |
8adbb371 | 88 | b .Ldone |
1da177e4 LT |
89 | |
90 | FN_ENTRY | |
1da177e4 | 91 | save_regs |
1da177e4 LT |
92 | |
93 | cmp len, #8 @ Ensure that we have at least | |
8adbb371 | 94 | blo .Lless8 @ 8 bytes to copy. |
1da177e4 LT |
95 | |
96 | adds sum, sum, #0 @ C = 0 | |
97 | tst dst, #3 @ Test destination alignment | |
8adbb371 | 98 | blne .Ldst_unaligned @ align destination, return here |
1da177e4 LT |
99 | |
100 | /* | |
101 | * Ok, the dst pointer is now 32bit aligned, and we know | |
102 | * that we must have more than 4 bytes to copy. Note | |
103 | * that C contains the carry from the dst alignment above. | |
104 | */ | |
105 | ||
106 | tst src, #3 @ Test source alignment | |
8adbb371 | 107 | bne .Lsrc_not_aligned |
1da177e4 LT |
108 | |
109 | /* Routine for src & dst aligned */ | |
110 | ||
111 | bics ip, len, #15 | |
112 | beq 2f | |
113 | ||
114 | 1: load4l r4, r5, r6, r7 | |
115 | stmia dst!, {r4, r5, r6, r7} | |
116 | adcs sum, sum, r4 | |
117 | adcs sum, sum, r5 | |
118 | adcs sum, sum, r6 | |
119 | adcs sum, sum, r7 | |
120 | sub ip, ip, #16 | |
121 | teq ip, #0 | |
122 | bne 1b | |
123 | ||
124 | 2: ands ip, len, #12 | |
125 | beq 4f | |
126 | tst ip, #8 | |
127 | beq 3f | |
128 | load2l r4, r5 | |
129 | stmia dst!, {r4, r5} | |
130 | adcs sum, sum, r4 | |
131 | adcs sum, sum, r5 | |
132 | tst ip, #4 | |
133 | beq 4f | |
134 | ||
135 | 3: load1l r4 | |
136 | str r4, [dst], #4 | |
137 | adcs sum, sum, r4 | |
138 | ||
139 | 4: ands len, len, #3 | |
8adbb371 | 140 | beq .Ldone |
1da177e4 LT |
141 | load1l r4 |
142 | tst len, #2 | |
143 | mov r5, r4, get_byte_0 | |
8adbb371 | 144 | beq .Lexit |
d98b90ea | 145 | adcs sum, sum, r4, lspush #16 |
1da177e4 LT |
146 | strb r5, [dst], #1 |
147 | mov r5, r4, get_byte_1 | |
148 | strb r5, [dst], #1 | |
149 | mov r5, r4, get_byte_2 | |
8adbb371 | 150 | .Lexit: tst len, #1 |
1da177e4 LT |
151 | strneb r5, [dst], #1 |
152 | andne r5, r5, #255 | |
153 | adcnes sum, sum, r5, put_byte_0 | |
154 | ||
155 | /* | |
156 | * If the dst pointer was not 16-bit aligned, we | |
157 | * need to rotate the checksum here to get around | |
158 | * the inefficient byte manipulations in the | |
159 | * architecture independent code. | |
160 | */ | |
8adbb371 | 161 | .Ldone: adc r0, sum, #0 |
1da177e4 LT |
162 | ldr sum, [sp, #0] @ dst |
163 | tst sum, #1 | |
164 | movne r0, r0, ror #8 | |
90303b10 | 165 | load_regs |
1da177e4 | 166 | |
8adbb371 | 167 | .Lsrc_not_aligned: |
1da177e4 LT |
168 | adc sum, sum, #0 @ include C from dst alignment |
169 | and ip, src, #3 | |
170 | bic src, src, #3 | |
171 | load1l r5 | |
172 | cmp ip, #2 | |
8adbb371 NP |
173 | beq .Lsrc2_aligned |
174 | bhi .Lsrc3_aligned | |
d98b90ea | 175 | mov r4, r5, lspull #8 @ C = 0 |
1da177e4 LT |
176 | bics ip, len, #15 |
177 | beq 2f | |
178 | 1: load4l r5, r6, r7, r8 | |
d98b90ea VK |
179 | orr r4, r4, r5, lspush #24 |
180 | mov r5, r5, lspull #8 | |
181 | orr r5, r5, r6, lspush #24 | |
182 | mov r6, r6, lspull #8 | |
183 | orr r6, r6, r7, lspush #24 | |
184 | mov r7, r7, lspull #8 | |
185 | orr r7, r7, r8, lspush #24 | |
1da177e4 LT |
186 | stmia dst!, {r4, r5, r6, r7} |
187 | adcs sum, sum, r4 | |
188 | adcs sum, sum, r5 | |
189 | adcs sum, sum, r6 | |
190 | adcs sum, sum, r7 | |
d98b90ea | 191 | mov r4, r8, lspull #8 |
1da177e4 LT |
192 | sub ip, ip, #16 |
193 | teq ip, #0 | |
194 | bne 1b | |
195 | 2: ands ip, len, #12 | |
196 | beq 4f | |
197 | tst ip, #8 | |
198 | beq 3f | |
199 | load2l r5, r6 | |
d98b90ea VK |
200 | orr r4, r4, r5, lspush #24 |
201 | mov r5, r5, lspull #8 | |
202 | orr r5, r5, r6, lspush #24 | |
1da177e4 LT |
203 | stmia dst!, {r4, r5} |
204 | adcs sum, sum, r4 | |
205 | adcs sum, sum, r5 | |
d98b90ea | 206 | mov r4, r6, lspull #8 |
1da177e4 LT |
207 | tst ip, #4 |
208 | beq 4f | |
209 | 3: load1l r5 | |
d98b90ea | 210 | orr r4, r4, r5, lspush #24 |
1da177e4 LT |
211 | str r4, [dst], #4 |
212 | adcs sum, sum, r4 | |
d98b90ea | 213 | mov r4, r5, lspull #8 |
1da177e4 | 214 | 4: ands len, len, #3 |
8adbb371 | 215 | beq .Ldone |
1da177e4 LT |
216 | mov r5, r4, get_byte_0 |
217 | tst len, #2 | |
8adbb371 | 218 | beq .Lexit |
d98b90ea | 219 | adcs sum, sum, r4, lspush #16 |
1da177e4 LT |
220 | strb r5, [dst], #1 |
221 | mov r5, r4, get_byte_1 | |
222 | strb r5, [dst], #1 | |
223 | mov r5, r4, get_byte_2 | |
8adbb371 | 224 | b .Lexit |
1da177e4 | 225 | |
d98b90ea | 226 | .Lsrc2_aligned: mov r4, r5, lspull #16 |
1da177e4 LT |
227 | adds sum, sum, #0 |
228 | bics ip, len, #15 | |
229 | beq 2f | |
230 | 1: load4l r5, r6, r7, r8 | |
d98b90ea VK |
231 | orr r4, r4, r5, lspush #16 |
232 | mov r5, r5, lspull #16 | |
233 | orr r5, r5, r6, lspush #16 | |
234 | mov r6, r6, lspull #16 | |
235 | orr r6, r6, r7, lspush #16 | |
236 | mov r7, r7, lspull #16 | |
237 | orr r7, r7, r8, lspush #16 | |
1da177e4 LT |
238 | stmia dst!, {r4, r5, r6, r7} |
239 | adcs sum, sum, r4 | |
240 | adcs sum, sum, r5 | |
241 | adcs sum, sum, r6 | |
242 | adcs sum, sum, r7 | |
d98b90ea | 243 | mov r4, r8, lspull #16 |
1da177e4 LT |
244 | sub ip, ip, #16 |
245 | teq ip, #0 | |
246 | bne 1b | |
247 | 2: ands ip, len, #12 | |
248 | beq 4f | |
249 | tst ip, #8 | |
250 | beq 3f | |
251 | load2l r5, r6 | |
d98b90ea VK |
252 | orr r4, r4, r5, lspush #16 |
253 | mov r5, r5, lspull #16 | |
254 | orr r5, r5, r6, lspush #16 | |
1da177e4 LT |
255 | stmia dst!, {r4, r5} |
256 | adcs sum, sum, r4 | |
257 | adcs sum, sum, r5 | |
d98b90ea | 258 | mov r4, r6, lspull #16 |
1da177e4 LT |
259 | tst ip, #4 |
260 | beq 4f | |
261 | 3: load1l r5 | |
d98b90ea | 262 | orr r4, r4, r5, lspush #16 |
1da177e4 LT |
263 | str r4, [dst], #4 |
264 | adcs sum, sum, r4 | |
d98b90ea | 265 | mov r4, r5, lspull #16 |
1da177e4 | 266 | 4: ands len, len, #3 |
8adbb371 | 267 | beq .Ldone |
1da177e4 LT |
268 | mov r5, r4, get_byte_0 |
269 | tst len, #2 | |
8adbb371 | 270 | beq .Lexit |
1da177e4 LT |
271 | adcs sum, sum, r4 |
272 | strb r5, [dst], #1 | |
273 | mov r5, r4, get_byte_1 | |
274 | strb r5, [dst], #1 | |
275 | tst len, #1 | |
8adbb371 | 276 | beq .Ldone |
1da177e4 | 277 | load1b r5 |
8adbb371 | 278 | b .Lexit |
1da177e4 | 279 | |
d98b90ea | 280 | .Lsrc3_aligned: mov r4, r5, lspull #24 |
1da177e4 LT |
281 | adds sum, sum, #0 |
282 | bics ip, len, #15 | |
283 | beq 2f | |
284 | 1: load4l r5, r6, r7, r8 | |
d98b90ea VK |
285 | orr r4, r4, r5, lspush #8 |
286 | mov r5, r5, lspull #24 | |
287 | orr r5, r5, r6, lspush #8 | |
288 | mov r6, r6, lspull #24 | |
289 | orr r6, r6, r7, lspush #8 | |
290 | mov r7, r7, lspull #24 | |
291 | orr r7, r7, r8, lspush #8 | |
1da177e4 LT |
292 | stmia dst!, {r4, r5, r6, r7} |
293 | adcs sum, sum, r4 | |
294 | adcs sum, sum, r5 | |
295 | adcs sum, sum, r6 | |
296 | adcs sum, sum, r7 | |
d98b90ea | 297 | mov r4, r8, lspull #24 |
1da177e4 LT |
298 | sub ip, ip, #16 |
299 | teq ip, #0 | |
300 | bne 1b | |
301 | 2: ands ip, len, #12 | |
302 | beq 4f | |
303 | tst ip, #8 | |
304 | beq 3f | |
305 | load2l r5, r6 | |
d98b90ea VK |
306 | orr r4, r4, r5, lspush #8 |
307 | mov r5, r5, lspull #24 | |
308 | orr r5, r5, r6, lspush #8 | |
1da177e4 LT |
309 | stmia dst!, {r4, r5} |
310 | adcs sum, sum, r4 | |
311 | adcs sum, sum, r5 | |
d98b90ea | 312 | mov r4, r6, lspull #24 |
1da177e4 LT |
313 | tst ip, #4 |
314 | beq 4f | |
315 | 3: load1l r5 | |
d98b90ea | 316 | orr r4, r4, r5, lspush #8 |
1da177e4 LT |
317 | str r4, [dst], #4 |
318 | adcs sum, sum, r4 | |
d98b90ea | 319 | mov r4, r5, lspull #24 |
1da177e4 | 320 | 4: ands len, len, #3 |
8adbb371 | 321 | beq .Ldone |
1da177e4 LT |
322 | mov r5, r4, get_byte_0 |
323 | tst len, #2 | |
8adbb371 | 324 | beq .Lexit |
1da177e4 LT |
325 | strb r5, [dst], #1 |
326 | adcs sum, sum, r4 | |
327 | load1l r4 | |
328 | mov r5, r4, get_byte_0 | |
329 | strb r5, [dst], #1 | |
d98b90ea | 330 | adcs sum, sum, r4, lspush #24 |
1da177e4 | 331 | mov r5, r4, get_byte_1 |
8adbb371 | 332 | b .Lexit |
93ed3970 | 333 | FN_EXIT |