]>
Commit | Line | Data |
---|---|---|
af1a8899 | 1 | /* SPDX-License-Identifier: GPL-2.0-or-later */ |
1da177e4 LT |
2 | /* |
3 | * include/asm-generic/xor.h | |
4 | * | |
5 | * Generic optimized RAID-5 checksumming functions. | |
1da177e4 LT |
6 | */ |
7 | ||
268bb0ce | 8 | #include <linux/prefetch.h> |
1da177e4 LT |
9 | |
10 | static void | |
11 | xor_8regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) | |
12 | { | |
13 | long lines = bytes / (sizeof (long)) / 8; | |
14 | ||
15 | do { | |
16 | p1[0] ^= p2[0]; | |
17 | p1[1] ^= p2[1]; | |
18 | p1[2] ^= p2[2]; | |
19 | p1[3] ^= p2[3]; | |
20 | p1[4] ^= p2[4]; | |
21 | p1[5] ^= p2[5]; | |
22 | p1[6] ^= p2[6]; | |
23 | p1[7] ^= p2[7]; | |
24 | p1 += 8; | |
25 | p2 += 8; | |
26 | } while (--lines > 0); | |
27 | } | |
28 | ||
29 | static void | |
30 | xor_8regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |
31 | unsigned long *p3) | |
32 | { | |
33 | long lines = bytes / (sizeof (long)) / 8; | |
34 | ||
35 | do { | |
36 | p1[0] ^= p2[0] ^ p3[0]; | |
37 | p1[1] ^= p2[1] ^ p3[1]; | |
38 | p1[2] ^= p2[2] ^ p3[2]; | |
39 | p1[3] ^= p2[3] ^ p3[3]; | |
40 | p1[4] ^= p2[4] ^ p3[4]; | |
41 | p1[5] ^= p2[5] ^ p3[5]; | |
42 | p1[6] ^= p2[6] ^ p3[6]; | |
43 | p1[7] ^= p2[7] ^ p3[7]; | |
44 | p1 += 8; | |
45 | p2 += 8; | |
46 | p3 += 8; | |
47 | } while (--lines > 0); | |
48 | } | |
49 | ||
50 | static void | |
51 | xor_8regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |
52 | unsigned long *p3, unsigned long *p4) | |
53 | { | |
54 | long lines = bytes / (sizeof (long)) / 8; | |
55 | ||
56 | do { | |
57 | p1[0] ^= p2[0] ^ p3[0] ^ p4[0]; | |
58 | p1[1] ^= p2[1] ^ p3[1] ^ p4[1]; | |
59 | p1[2] ^= p2[2] ^ p3[2] ^ p4[2]; | |
60 | p1[3] ^= p2[3] ^ p3[3] ^ p4[3]; | |
61 | p1[4] ^= p2[4] ^ p3[4] ^ p4[4]; | |
62 | p1[5] ^= p2[5] ^ p3[5] ^ p4[5]; | |
63 | p1[6] ^= p2[6] ^ p3[6] ^ p4[6]; | |
64 | p1[7] ^= p2[7] ^ p3[7] ^ p4[7]; | |
65 | p1 += 8; | |
66 | p2 += 8; | |
67 | p3 += 8; | |
68 | p4 += 8; | |
69 | } while (--lines > 0); | |
70 | } | |
71 | ||
72 | static void | |
73 | xor_8regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |
74 | unsigned long *p3, unsigned long *p4, unsigned long *p5) | |
75 | { | |
76 | long lines = bytes / (sizeof (long)) / 8; | |
77 | ||
78 | do { | |
79 | p1[0] ^= p2[0] ^ p3[0] ^ p4[0] ^ p5[0]; | |
80 | p1[1] ^= p2[1] ^ p3[1] ^ p4[1] ^ p5[1]; | |
81 | p1[2] ^= p2[2] ^ p3[2] ^ p4[2] ^ p5[2]; | |
82 | p1[3] ^= p2[3] ^ p3[3] ^ p4[3] ^ p5[3]; | |
83 | p1[4] ^= p2[4] ^ p3[4] ^ p4[4] ^ p5[4]; | |
84 | p1[5] ^= p2[5] ^ p3[5] ^ p4[5] ^ p5[5]; | |
85 | p1[6] ^= p2[6] ^ p3[6] ^ p4[6] ^ p5[6]; | |
86 | p1[7] ^= p2[7] ^ p3[7] ^ p4[7] ^ p5[7]; | |
87 | p1 += 8; | |
88 | p2 += 8; | |
89 | p3 += 8; | |
90 | p4 += 8; | |
91 | p5 += 8; | |
92 | } while (--lines > 0); | |
93 | } | |
94 | ||
95 | static void | |
96 | xor_32regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) | |
97 | { | |
98 | long lines = bytes / (sizeof (long)) / 8; | |
99 | ||
100 | do { | |
101 | register long d0, d1, d2, d3, d4, d5, d6, d7; | |
102 | d0 = p1[0]; /* Pull the stuff into registers */ | |
103 | d1 = p1[1]; /* ... in bursts, if possible. */ | |
104 | d2 = p1[2]; | |
105 | d3 = p1[3]; | |
106 | d4 = p1[4]; | |
107 | d5 = p1[5]; | |
108 | d6 = p1[6]; | |
109 | d7 = p1[7]; | |
110 | d0 ^= p2[0]; | |
111 | d1 ^= p2[1]; | |
112 | d2 ^= p2[2]; | |
113 | d3 ^= p2[3]; | |
114 | d4 ^= p2[4]; | |
115 | d5 ^= p2[5]; | |
116 | d6 ^= p2[6]; | |
117 | d7 ^= p2[7]; | |
118 | p1[0] = d0; /* Store the result (in bursts) */ | |
119 | p1[1] = d1; | |
120 | p1[2] = d2; | |
121 | p1[3] = d3; | |
122 | p1[4] = d4; | |
123 | p1[5] = d5; | |
124 | p1[6] = d6; | |
125 | p1[7] = d7; | |
126 | p1 += 8; | |
127 | p2 += 8; | |
128 | } while (--lines > 0); | |
129 | } | |
130 | ||
131 | static void | |
132 | xor_32regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |
133 | unsigned long *p3) | |
134 | { | |
135 | long lines = bytes / (sizeof (long)) / 8; | |
136 | ||
137 | do { | |
138 | register long d0, d1, d2, d3, d4, d5, d6, d7; | |
139 | d0 = p1[0]; /* Pull the stuff into registers */ | |
140 | d1 = p1[1]; /* ... in bursts, if possible. */ | |
141 | d2 = p1[2]; | |
142 | d3 = p1[3]; | |
143 | d4 = p1[4]; | |
144 | d5 = p1[5]; | |
145 | d6 = p1[6]; | |
146 | d7 = p1[7]; | |
147 | d0 ^= p2[0]; | |
148 | d1 ^= p2[1]; | |
149 | d2 ^= p2[2]; | |
150 | d3 ^= p2[3]; | |
151 | d4 ^= p2[4]; | |
152 | d5 ^= p2[5]; | |
153 | d6 ^= p2[6]; | |
154 | d7 ^= p2[7]; | |
155 | d0 ^= p3[0]; | |
156 | d1 ^= p3[1]; | |
157 | d2 ^= p3[2]; | |
158 | d3 ^= p3[3]; | |
159 | d4 ^= p3[4]; | |
160 | d5 ^= p3[5]; | |
161 | d6 ^= p3[6]; | |
162 | d7 ^= p3[7]; | |
163 | p1[0] = d0; /* Store the result (in bursts) */ | |
164 | p1[1] = d1; | |
165 | p1[2] = d2; | |
166 | p1[3] = d3; | |
167 | p1[4] = d4; | |
168 | p1[5] = d5; | |
169 | p1[6] = d6; | |
170 | p1[7] = d7; | |
171 | p1 += 8; | |
172 | p2 += 8; | |
173 | p3 += 8; | |
174 | } while (--lines > 0); | |
175 | } | |
176 | ||
177 | static void | |
178 | xor_32regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |
179 | unsigned long *p3, unsigned long *p4) | |
180 | { | |
181 | long lines = bytes / (sizeof (long)) / 8; | |
182 | ||
183 | do { | |
184 | register long d0, d1, d2, d3, d4, d5, d6, d7; | |
185 | d0 = p1[0]; /* Pull the stuff into registers */ | |
186 | d1 = p1[1]; /* ... in bursts, if possible. */ | |
187 | d2 = p1[2]; | |
188 | d3 = p1[3]; | |
189 | d4 = p1[4]; | |
190 | d5 = p1[5]; | |
191 | d6 = p1[6]; | |
192 | d7 = p1[7]; | |
193 | d0 ^= p2[0]; | |
194 | d1 ^= p2[1]; | |
195 | d2 ^= p2[2]; | |
196 | d3 ^= p2[3]; | |
197 | d4 ^= p2[4]; | |
198 | d5 ^= p2[5]; | |
199 | d6 ^= p2[6]; | |
200 | d7 ^= p2[7]; | |
201 | d0 ^= p3[0]; | |
202 | d1 ^= p3[1]; | |
203 | d2 ^= p3[2]; | |
204 | d3 ^= p3[3]; | |
205 | d4 ^= p3[4]; | |
206 | d5 ^= p3[5]; | |
207 | d6 ^= p3[6]; | |
208 | d7 ^= p3[7]; | |
209 | d0 ^= p4[0]; | |
210 | d1 ^= p4[1]; | |
211 | d2 ^= p4[2]; | |
212 | d3 ^= p4[3]; | |
213 | d4 ^= p4[4]; | |
214 | d5 ^= p4[5]; | |
215 | d6 ^= p4[6]; | |
216 | d7 ^= p4[7]; | |
217 | p1[0] = d0; /* Store the result (in bursts) */ | |
218 | p1[1] = d1; | |
219 | p1[2] = d2; | |
220 | p1[3] = d3; | |
221 | p1[4] = d4; | |
222 | p1[5] = d5; | |
223 | p1[6] = d6; | |
224 | p1[7] = d7; | |
225 | p1 += 8; | |
226 | p2 += 8; | |
227 | p3 += 8; | |
228 | p4 += 8; | |
229 | } while (--lines > 0); | |
230 | } | |
231 | ||
232 | static void | |
233 | xor_32regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |
234 | unsigned long *p3, unsigned long *p4, unsigned long *p5) | |
235 | { | |
236 | long lines = bytes / (sizeof (long)) / 8; | |
237 | ||
238 | do { | |
239 | register long d0, d1, d2, d3, d4, d5, d6, d7; | |
240 | d0 = p1[0]; /* Pull the stuff into registers */ | |
241 | d1 = p1[1]; /* ... in bursts, if possible. */ | |
242 | d2 = p1[2]; | |
243 | d3 = p1[3]; | |
244 | d4 = p1[4]; | |
245 | d5 = p1[5]; | |
246 | d6 = p1[6]; | |
247 | d7 = p1[7]; | |
248 | d0 ^= p2[0]; | |
249 | d1 ^= p2[1]; | |
250 | d2 ^= p2[2]; | |
251 | d3 ^= p2[3]; | |
252 | d4 ^= p2[4]; | |
253 | d5 ^= p2[5]; | |
254 | d6 ^= p2[6]; | |
255 | d7 ^= p2[7]; | |
256 | d0 ^= p3[0]; | |
257 | d1 ^= p3[1]; | |
258 | d2 ^= p3[2]; | |
259 | d3 ^= p3[3]; | |
260 | d4 ^= p3[4]; | |
261 | d5 ^= p3[5]; | |
262 | d6 ^= p3[6]; | |
263 | d7 ^= p3[7]; | |
264 | d0 ^= p4[0]; | |
265 | d1 ^= p4[1]; | |
266 | d2 ^= p4[2]; | |
267 | d3 ^= p4[3]; | |
268 | d4 ^= p4[4]; | |
269 | d5 ^= p4[5]; | |
270 | d6 ^= p4[6]; | |
271 | d7 ^= p4[7]; | |
272 | d0 ^= p5[0]; | |
273 | d1 ^= p5[1]; | |
274 | d2 ^= p5[2]; | |
275 | d3 ^= p5[3]; | |
276 | d4 ^= p5[4]; | |
277 | d5 ^= p5[5]; | |
278 | d6 ^= p5[6]; | |
279 | d7 ^= p5[7]; | |
280 | p1[0] = d0; /* Store the result (in bursts) */ | |
281 | p1[1] = d1; | |
282 | p1[2] = d2; | |
283 | p1[3] = d3; | |
284 | p1[4] = d4; | |
285 | p1[5] = d5; | |
286 | p1[6] = d6; | |
287 | p1[7] = d7; | |
288 | p1 += 8; | |
289 | p2 += 8; | |
290 | p3 += 8; | |
291 | p4 += 8; | |
292 | p5 += 8; | |
293 | } while (--lines > 0); | |
294 | } | |
295 | ||
296 | static void | |
297 | xor_8regs_p_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) | |
298 | { | |
299 | long lines = bytes / (sizeof (long)) / 8 - 1; | |
300 | prefetchw(p1); | |
301 | prefetch(p2); | |
302 | ||
303 | do { | |
304 | prefetchw(p1+8); | |
305 | prefetch(p2+8); | |
306 | once_more: | |
307 | p1[0] ^= p2[0]; | |
308 | p1[1] ^= p2[1]; | |
309 | p1[2] ^= p2[2]; | |
310 | p1[3] ^= p2[3]; | |
311 | p1[4] ^= p2[4]; | |
312 | p1[5] ^= p2[5]; | |
313 | p1[6] ^= p2[6]; | |
314 | p1[7] ^= p2[7]; | |
315 | p1 += 8; | |
316 | p2 += 8; | |
317 | } while (--lines > 0); | |
318 | if (lines == 0) | |
319 | goto once_more; | |
320 | } | |
321 | ||
322 | static void | |
323 | xor_8regs_p_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |
324 | unsigned long *p3) | |
325 | { | |
326 | long lines = bytes / (sizeof (long)) / 8 - 1; | |
327 | prefetchw(p1); | |
328 | prefetch(p2); | |
329 | prefetch(p3); | |
330 | ||
331 | do { | |
332 | prefetchw(p1+8); | |
333 | prefetch(p2+8); | |
334 | prefetch(p3+8); | |
335 | once_more: | |
336 | p1[0] ^= p2[0] ^ p3[0]; | |
337 | p1[1] ^= p2[1] ^ p3[1]; | |
338 | p1[2] ^= p2[2] ^ p3[2]; | |
339 | p1[3] ^= p2[3] ^ p3[3]; | |
340 | p1[4] ^= p2[4] ^ p3[4]; | |
341 | p1[5] ^= p2[5] ^ p3[5]; | |
342 | p1[6] ^= p2[6] ^ p3[6]; | |
343 | p1[7] ^= p2[7] ^ p3[7]; | |
344 | p1 += 8; | |
345 | p2 += 8; | |
346 | p3 += 8; | |
347 | } while (--lines > 0); | |
348 | if (lines == 0) | |
349 | goto once_more; | |
350 | } | |
351 | ||
352 | static void | |
353 | xor_8regs_p_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |
354 | unsigned long *p3, unsigned long *p4) | |
355 | { | |
356 | long lines = bytes / (sizeof (long)) / 8 - 1; | |
357 | ||
358 | prefetchw(p1); | |
359 | prefetch(p2); | |
360 | prefetch(p3); | |
361 | prefetch(p4); | |
362 | ||
363 | do { | |
364 | prefetchw(p1+8); | |
365 | prefetch(p2+8); | |
366 | prefetch(p3+8); | |
367 | prefetch(p4+8); | |
368 | once_more: | |
369 | p1[0] ^= p2[0] ^ p3[0] ^ p4[0]; | |
370 | p1[1] ^= p2[1] ^ p3[1] ^ p4[1]; | |
371 | p1[2] ^= p2[2] ^ p3[2] ^ p4[2]; | |
372 | p1[3] ^= p2[3] ^ p3[3] ^ p4[3]; | |
373 | p1[4] ^= p2[4] ^ p3[4] ^ p4[4]; | |
374 | p1[5] ^= p2[5] ^ p3[5] ^ p4[5]; | |
375 | p1[6] ^= p2[6] ^ p3[6] ^ p4[6]; | |
376 | p1[7] ^= p2[7] ^ p3[7] ^ p4[7]; | |
377 | p1 += 8; | |
378 | p2 += 8; | |
379 | p3 += 8; | |
380 | p4 += 8; | |
381 | } while (--lines > 0); | |
382 | if (lines == 0) | |
383 | goto once_more; | |
384 | } | |
385 | ||
386 | static void | |
387 | xor_8regs_p_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |
388 | unsigned long *p3, unsigned long *p4, unsigned long *p5) | |
389 | { | |
390 | long lines = bytes / (sizeof (long)) / 8 - 1; | |
391 | ||
392 | prefetchw(p1); | |
393 | prefetch(p2); | |
394 | prefetch(p3); | |
395 | prefetch(p4); | |
396 | prefetch(p5); | |
397 | ||
398 | do { | |
399 | prefetchw(p1+8); | |
400 | prefetch(p2+8); | |
401 | prefetch(p3+8); | |
402 | prefetch(p4+8); | |
403 | prefetch(p5+8); | |
404 | once_more: | |
405 | p1[0] ^= p2[0] ^ p3[0] ^ p4[0] ^ p5[0]; | |
406 | p1[1] ^= p2[1] ^ p3[1] ^ p4[1] ^ p5[1]; | |
407 | p1[2] ^= p2[2] ^ p3[2] ^ p4[2] ^ p5[2]; | |
408 | p1[3] ^= p2[3] ^ p3[3] ^ p4[3] ^ p5[3]; | |
409 | p1[4] ^= p2[4] ^ p3[4] ^ p4[4] ^ p5[4]; | |
410 | p1[5] ^= p2[5] ^ p3[5] ^ p4[5] ^ p5[5]; | |
411 | p1[6] ^= p2[6] ^ p3[6] ^ p4[6] ^ p5[6]; | |
412 | p1[7] ^= p2[7] ^ p3[7] ^ p4[7] ^ p5[7]; | |
413 | p1 += 8; | |
414 | p2 += 8; | |
415 | p3 += 8; | |
416 | p4 += 8; | |
417 | p5 += 8; | |
418 | } while (--lines > 0); | |
419 | if (lines == 0) | |
420 | goto once_more; | |
421 | } | |
422 | ||
423 | static void | |
424 | xor_32regs_p_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) | |
425 | { | |
426 | long lines = bytes / (sizeof (long)) / 8 - 1; | |
427 | ||
428 | prefetchw(p1); | |
429 | prefetch(p2); | |
430 | ||
431 | do { | |
432 | register long d0, d1, d2, d3, d4, d5, d6, d7; | |
433 | ||
434 | prefetchw(p1+8); | |
435 | prefetch(p2+8); | |
436 | once_more: | |
437 | d0 = p1[0]; /* Pull the stuff into registers */ | |
438 | d1 = p1[1]; /* ... in bursts, if possible. */ | |
439 | d2 = p1[2]; | |
440 | d3 = p1[3]; | |
441 | d4 = p1[4]; | |
442 | d5 = p1[5]; | |
443 | d6 = p1[6]; | |
444 | d7 = p1[7]; | |
445 | d0 ^= p2[0]; | |
446 | d1 ^= p2[1]; | |
447 | d2 ^= p2[2]; | |
448 | d3 ^= p2[3]; | |
449 | d4 ^= p2[4]; | |
450 | d5 ^= p2[5]; | |
451 | d6 ^= p2[6]; | |
452 | d7 ^= p2[7]; | |
453 | p1[0] = d0; /* Store the result (in bursts) */ | |
454 | p1[1] = d1; | |
455 | p1[2] = d2; | |
456 | p1[3] = d3; | |
457 | p1[4] = d4; | |
458 | p1[5] = d5; | |
459 | p1[6] = d6; | |
460 | p1[7] = d7; | |
461 | p1 += 8; | |
462 | p2 += 8; | |
463 | } while (--lines > 0); | |
464 | if (lines == 0) | |
465 | goto once_more; | |
466 | } | |
467 | ||
468 | static void | |
469 | xor_32regs_p_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |
470 | unsigned long *p3) | |
471 | { | |
472 | long lines = bytes / (sizeof (long)) / 8 - 1; | |
473 | ||
474 | prefetchw(p1); | |
475 | prefetch(p2); | |
476 | prefetch(p3); | |
477 | ||
478 | do { | |
479 | register long d0, d1, d2, d3, d4, d5, d6, d7; | |
480 | ||
481 | prefetchw(p1+8); | |
482 | prefetch(p2+8); | |
483 | prefetch(p3+8); | |
484 | once_more: | |
485 | d0 = p1[0]; /* Pull the stuff into registers */ | |
486 | d1 = p1[1]; /* ... in bursts, if possible. */ | |
487 | d2 = p1[2]; | |
488 | d3 = p1[3]; | |
489 | d4 = p1[4]; | |
490 | d5 = p1[5]; | |
491 | d6 = p1[6]; | |
492 | d7 = p1[7]; | |
493 | d0 ^= p2[0]; | |
494 | d1 ^= p2[1]; | |
495 | d2 ^= p2[2]; | |
496 | d3 ^= p2[3]; | |
497 | d4 ^= p2[4]; | |
498 | d5 ^= p2[5]; | |
499 | d6 ^= p2[6]; | |
500 | d7 ^= p2[7]; | |
501 | d0 ^= p3[0]; | |
502 | d1 ^= p3[1]; | |
503 | d2 ^= p3[2]; | |
504 | d3 ^= p3[3]; | |
505 | d4 ^= p3[4]; | |
506 | d5 ^= p3[5]; | |
507 | d6 ^= p3[6]; | |
508 | d7 ^= p3[7]; | |
509 | p1[0] = d0; /* Store the result (in bursts) */ | |
510 | p1[1] = d1; | |
511 | p1[2] = d2; | |
512 | p1[3] = d3; | |
513 | p1[4] = d4; | |
514 | p1[5] = d5; | |
515 | p1[6] = d6; | |
516 | p1[7] = d7; | |
517 | p1 += 8; | |
518 | p2 += 8; | |
519 | p3 += 8; | |
520 | } while (--lines > 0); | |
521 | if (lines == 0) | |
522 | goto once_more; | |
523 | } | |
524 | ||
525 | static void | |
526 | xor_32regs_p_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |
527 | unsigned long *p3, unsigned long *p4) | |
528 | { | |
529 | long lines = bytes / (sizeof (long)) / 8 - 1; | |
530 | ||
531 | prefetchw(p1); | |
532 | prefetch(p2); | |
533 | prefetch(p3); | |
534 | prefetch(p4); | |
535 | ||
536 | do { | |
537 | register long d0, d1, d2, d3, d4, d5, d6, d7; | |
538 | ||
539 | prefetchw(p1+8); | |
540 | prefetch(p2+8); | |
541 | prefetch(p3+8); | |
542 | prefetch(p4+8); | |
543 | once_more: | |
544 | d0 = p1[0]; /* Pull the stuff into registers */ | |
545 | d1 = p1[1]; /* ... in bursts, if possible. */ | |
546 | d2 = p1[2]; | |
547 | d3 = p1[3]; | |
548 | d4 = p1[4]; | |
549 | d5 = p1[5]; | |
550 | d6 = p1[6]; | |
551 | d7 = p1[7]; | |
552 | d0 ^= p2[0]; | |
553 | d1 ^= p2[1]; | |
554 | d2 ^= p2[2]; | |
555 | d3 ^= p2[3]; | |
556 | d4 ^= p2[4]; | |
557 | d5 ^= p2[5]; | |
558 | d6 ^= p2[6]; | |
559 | d7 ^= p2[7]; | |
560 | d0 ^= p3[0]; | |
561 | d1 ^= p3[1]; | |
562 | d2 ^= p3[2]; | |
563 | d3 ^= p3[3]; | |
564 | d4 ^= p3[4]; | |
565 | d5 ^= p3[5]; | |
566 | d6 ^= p3[6]; | |
567 | d7 ^= p3[7]; | |
568 | d0 ^= p4[0]; | |
569 | d1 ^= p4[1]; | |
570 | d2 ^= p4[2]; | |
571 | d3 ^= p4[3]; | |
572 | d4 ^= p4[4]; | |
573 | d5 ^= p4[5]; | |
574 | d6 ^= p4[6]; | |
575 | d7 ^= p4[7]; | |
576 | p1[0] = d0; /* Store the result (in bursts) */ | |
577 | p1[1] = d1; | |
578 | p1[2] = d2; | |
579 | p1[3] = d3; | |
580 | p1[4] = d4; | |
581 | p1[5] = d5; | |
582 | p1[6] = d6; | |
583 | p1[7] = d7; | |
584 | p1 += 8; | |
585 | p2 += 8; | |
586 | p3 += 8; | |
587 | p4 += 8; | |
588 | } while (--lines > 0); | |
589 | if (lines == 0) | |
590 | goto once_more; | |
591 | } | |
592 | ||
593 | static void | |
594 | xor_32regs_p_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |
595 | unsigned long *p3, unsigned long *p4, unsigned long *p5) | |
596 | { | |
597 | long lines = bytes / (sizeof (long)) / 8 - 1; | |
598 | ||
599 | prefetchw(p1); | |
600 | prefetch(p2); | |
601 | prefetch(p3); | |
602 | prefetch(p4); | |
603 | prefetch(p5); | |
604 | ||
605 | do { | |
606 | register long d0, d1, d2, d3, d4, d5, d6, d7; | |
607 | ||
608 | prefetchw(p1+8); | |
609 | prefetch(p2+8); | |
610 | prefetch(p3+8); | |
611 | prefetch(p4+8); | |
612 | prefetch(p5+8); | |
613 | once_more: | |
614 | d0 = p1[0]; /* Pull the stuff into registers */ | |
615 | d1 = p1[1]; /* ... in bursts, if possible. */ | |
616 | d2 = p1[2]; | |
617 | d3 = p1[3]; | |
618 | d4 = p1[4]; | |
619 | d5 = p1[5]; | |
620 | d6 = p1[6]; | |
621 | d7 = p1[7]; | |
622 | d0 ^= p2[0]; | |
623 | d1 ^= p2[1]; | |
624 | d2 ^= p2[2]; | |
625 | d3 ^= p2[3]; | |
626 | d4 ^= p2[4]; | |
627 | d5 ^= p2[5]; | |
628 | d6 ^= p2[6]; | |
629 | d7 ^= p2[7]; | |
630 | d0 ^= p3[0]; | |
631 | d1 ^= p3[1]; | |
632 | d2 ^= p3[2]; | |
633 | d3 ^= p3[3]; | |
634 | d4 ^= p3[4]; | |
635 | d5 ^= p3[5]; | |
636 | d6 ^= p3[6]; | |
637 | d7 ^= p3[7]; | |
638 | d0 ^= p4[0]; | |
639 | d1 ^= p4[1]; | |
640 | d2 ^= p4[2]; | |
641 | d3 ^= p4[3]; | |
642 | d4 ^= p4[4]; | |
643 | d5 ^= p4[5]; | |
644 | d6 ^= p4[6]; | |
645 | d7 ^= p4[7]; | |
646 | d0 ^= p5[0]; | |
647 | d1 ^= p5[1]; | |
648 | d2 ^= p5[2]; | |
649 | d3 ^= p5[3]; | |
650 | d4 ^= p5[4]; | |
651 | d5 ^= p5[5]; | |
652 | d6 ^= p5[6]; | |
653 | d7 ^= p5[7]; | |
654 | p1[0] = d0; /* Store the result (in bursts) */ | |
655 | p1[1] = d1; | |
656 | p1[2] = d2; | |
657 | p1[3] = d3; | |
658 | p1[4] = d4; | |
659 | p1[5] = d5; | |
660 | p1[6] = d6; | |
661 | p1[7] = d7; | |
662 | p1 += 8; | |
663 | p2 += 8; | |
664 | p3 += 8; | |
665 | p4 += 8; | |
666 | p5 += 8; | |
667 | } while (--lines > 0); | |
668 | if (lines == 0) | |
669 | goto once_more; | |
670 | } | |
671 | ||
672 | static struct xor_block_template xor_block_8regs = { | |
673 | .name = "8regs", | |
674 | .do_2 = xor_8regs_2, | |
675 | .do_3 = xor_8regs_3, | |
676 | .do_4 = xor_8regs_4, | |
677 | .do_5 = xor_8regs_5, | |
678 | }; | |
679 | ||
680 | static struct xor_block_template xor_block_32regs = { | |
681 | .name = "32regs", | |
682 | .do_2 = xor_32regs_2, | |
683 | .do_3 = xor_32regs_3, | |
684 | .do_4 = xor_32regs_4, | |
685 | .do_5 = xor_32regs_5, | |
686 | }; | |
687 | ||
720fb197 | 688 | static struct xor_block_template xor_block_8regs_p __maybe_unused = { |
1da177e4 LT |
689 | .name = "8regs_prefetch", |
690 | .do_2 = xor_8regs_p_2, | |
691 | .do_3 = xor_8regs_p_3, | |
692 | .do_4 = xor_8regs_p_4, | |
693 | .do_5 = xor_8regs_p_5, | |
694 | }; | |
695 | ||
720fb197 | 696 | static struct xor_block_template xor_block_32regs_p __maybe_unused = { |
1da177e4 LT |
697 | .name = "32regs_prefetch", |
698 | .do_2 = xor_32regs_p_2, | |
699 | .do_3 = xor_32regs_p_3, | |
700 | .do_4 = xor_32regs_p_4, | |
701 | .do_5 = xor_32regs_p_5, | |
702 | }; | |
703 | ||
704 | #define XOR_TRY_TEMPLATES \ | |
705 | do { \ | |
706 | xor_speed(&xor_block_8regs); \ | |
707 | xor_speed(&xor_block_8regs_p); \ | |
708 | xor_speed(&xor_block_32regs); \ | |
709 | xor_speed(&xor_block_32regs_p); \ | |
710 | } while (0) |