]> git.proxmox.com Git - ceph.git/blame - ceph/src/isa-l/crc/crc32_ieee_01.asm
update sources to v12.1.1
[ceph.git] / ceph / src / isa-l / crc / crc32_ieee_01.asm
CommitLineData
7c673cae
FG
1;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
3;
4; Redistribution and use in source and binary forms, with or without
5; modification, are permitted provided that the following conditions
6; are met:
7; * Redistributions of source code must retain the above copyright
8; notice, this list of conditions and the following disclaimer.
9; * Redistributions in binary form must reproduce the above copyright
10; notice, this list of conditions and the following disclaimer in
11; the documentation and/or other materials provided with the
12; distribution.
13; * Neither the name of Intel Corporation nor the names of its
14; contributors may be used to endorse or promote products derived
15; from this software without specific prior written permission.
16;
17; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30; Function API:
31; UINT32 crc32_ieee_01(
32; UINT32 init_crc, //initial CRC value, 32 bits
33; const unsigned char *buf, //buffer pointer to calculate CRC on
34; UINT64 len //buffer length in bytes (64-bit data)
35; );
36;
37; Authors:
38; Erdinc Ozturk
39; Vinodh Gopal
40; James Guilford
41;
42; Reference paper titled "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction"
43; URL: http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
44
45%include "reg_sizes.asm"
46
224ce89b 47%define fetch_dist 1024
7c673cae
FG
48[bits 64]
49default rel
50
51section .text
52
53%ifidn __OUTPUT_FORMAT__, win64
54 %xdefine arg1 rcx
55 %xdefine arg2 rdx
56 %xdefine arg3 r8
57
58 %xdefine arg1_low32 ecx
59%else
60 %xdefine arg1 rdi
61 %xdefine arg2 rsi
62 %xdefine arg3 rdx
63
64 %xdefine arg1_low32 edi
65%endif
66
67%define TMP 16*0
68%ifidn __OUTPUT_FORMAT__, win64
69 %define XMM_SAVE 16*2
70 %define VARIABLE_OFFSET 16*10+8
71%else
72 %define VARIABLE_OFFSET 16*2+8
73%endif
74align 16
75global crc32_ieee_01:function
76crc32_ieee_01:
77
78 not arg1_low32 ;~init_crc
79
80 sub rsp,VARIABLE_OFFSET
81
82%ifidn __OUTPUT_FORMAT__, win64
83 ; push the xmm registers into the stack to maintain
84 movdqa [rsp + XMM_SAVE + 16*0], xmm6
85 movdqa [rsp + XMM_SAVE + 16*1], xmm7
86 movdqa [rsp + XMM_SAVE + 16*2], xmm8
87 movdqa [rsp + XMM_SAVE + 16*3], xmm9
88 movdqa [rsp + XMM_SAVE + 16*4], xmm10
89 movdqa [rsp + XMM_SAVE + 16*5], xmm11
90 movdqa [rsp + XMM_SAVE + 16*6], xmm12
91 movdqa [rsp + XMM_SAVE + 16*7], xmm13
92%endif
93
94
95 ; check if smaller than 256
96 cmp arg3, 256
97
98 ; for sizes less than 256, we can't fold 128B at a time...
99 jl _less_than_256
100
101
102 ; load the initial crc value
103 movd xmm10, arg1_low32 ; initial crc
104
105 ; crc value does not need to be byte-reflected, but it needs to be moved to the high part of the register.
106 ; because data will be byte-reflected and will align with initial crc at correct place.
107 pslldq xmm10, 12
108
109 movdqa xmm11, [SHUF_MASK]
110 ; receive the initial 128B data, xor the initial crc value
111 movdqu xmm0, [arg2+16*0]
112 movdqu xmm1, [arg2+16*1]
113 movdqu xmm2, [arg2+16*2]
114 movdqu xmm3, [arg2+16*3]
115 movdqu xmm4, [arg2+16*4]
116 movdqu xmm5, [arg2+16*5]
117 movdqu xmm6, [arg2+16*6]
118 movdqu xmm7, [arg2+16*7]
119
120 pshufb xmm0, xmm11
121 ; XOR the initial_crc value
122 pxor xmm0, xmm10
123 pshufb xmm1, xmm11
124 pshufb xmm2, xmm11
125 pshufb xmm3, xmm11
126 pshufb xmm4, xmm11
127 pshufb xmm5, xmm11
128 pshufb xmm6, xmm11
129 pshufb xmm7, xmm11
130
131 movdqa xmm10, [rk3] ;xmm10 has rk3 and rk4
132 ;imm value of pclmulqdq instruction will determine which constant to use
133 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
134 ; we subtract 256 instead of 128 to save one instruction from the loop
135 sub arg3, 256
136
137 ; at this section of the code, there is 128*x+y (0<=y<128) bytes of buffer. The _fold_128_B_loop
138 ; loop will fold 128B at a time until we have 128+y Bytes of buffer
139
140
141 ; fold 128B at a time. This section of the code folds 8 xmm registers in parallel
142_fold_128_B_loop:
143
144 ; update the buffer pointer
145 add arg2, 128 ; buf += 128;
146
224ce89b 147 prefetchnta [arg2+fetch_dist+0]
7c673cae
FG
148 movdqu xmm9, [arg2+16*0]
149 movdqu xmm12, [arg2+16*1]
150 pshufb xmm9, xmm11
151 pshufb xmm12, xmm11
152 movdqa xmm8, xmm0
153 movdqa xmm13, xmm1
154 pclmulqdq xmm0, xmm10, 0x0
155 pclmulqdq xmm8, xmm10 , 0x11
156 pclmulqdq xmm1, xmm10, 0x0
157 pclmulqdq xmm13, xmm10 , 0x11
158 pxor xmm0, xmm9
159 xorps xmm0, xmm8
160 pxor xmm1, xmm12
161 xorps xmm1, xmm13
162
224ce89b 163 prefetchnta [arg2+fetch_dist+32]
7c673cae
FG
164 movdqu xmm9, [arg2+16*2]
165 movdqu xmm12, [arg2+16*3]
166 pshufb xmm9, xmm11
167 pshufb xmm12, xmm11
168 movdqa xmm8, xmm2
169 movdqa xmm13, xmm3
170 pclmulqdq xmm2, xmm10, 0x0
171 pclmulqdq xmm8, xmm10 , 0x11
172 pclmulqdq xmm3, xmm10, 0x0
173 pclmulqdq xmm13, xmm10 , 0x11
174 pxor xmm2, xmm9
175 xorps xmm2, xmm8
176 pxor xmm3, xmm12
177 xorps xmm3, xmm13
178
224ce89b 179 prefetchnta [arg2+fetch_dist+64]
7c673cae
FG
180 movdqu xmm9, [arg2+16*4]
181 movdqu xmm12, [arg2+16*5]
182 pshufb xmm9, xmm11
183 pshufb xmm12, xmm11
184 movdqa xmm8, xmm4
185 movdqa xmm13, xmm5
186 pclmulqdq xmm4, xmm10, 0x0
187 pclmulqdq xmm8, xmm10 , 0x11
188 pclmulqdq xmm5, xmm10, 0x0
189 pclmulqdq xmm13, xmm10 , 0x11
190 pxor xmm4, xmm9
191 xorps xmm4, xmm8
192 pxor xmm5, xmm12
193 xorps xmm5, xmm13
194
224ce89b 195 prefetchnta [arg2+fetch_dist+96]
7c673cae
FG
196 movdqu xmm9, [arg2+16*6]
197 movdqu xmm12, [arg2+16*7]
198 pshufb xmm9, xmm11
199 pshufb xmm12, xmm11
200 movdqa xmm8, xmm6
201 movdqa xmm13, xmm7
202 pclmulqdq xmm6, xmm10, 0x0
203 pclmulqdq xmm8, xmm10 , 0x11
204 pclmulqdq xmm7, xmm10, 0x0
205 pclmulqdq xmm13, xmm10 , 0x11
206 pxor xmm6, xmm9
207 xorps xmm6, xmm8
208 pxor xmm7, xmm12
209 xorps xmm7, xmm13
210
211 sub arg3, 128
212
213 ; check if there is another 128B in the buffer to be able to fold
214 jge _fold_128_B_loop
215 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
216
217
218 add arg2, 128
219 ; at this point, the buffer pointer is pointing at the last y Bytes of the buffer
220 ; the 128 of folded data is in 4 of the xmm registers: xmm0, xmm1, xmm2, xmm3
221
222
223 ; fold the 8 xmm registers to 1 xmm register with different constants
224
225 movdqa xmm10, [rk9]
226 movdqa xmm8, xmm0
227 pclmulqdq xmm0, xmm10, 0x11
228 pclmulqdq xmm8, xmm10, 0x0
229 pxor xmm7, xmm8
230 xorps xmm7, xmm0
231
232 movdqa xmm10, [rk11]
233 movdqa xmm8, xmm1
234 pclmulqdq xmm1, xmm10, 0x11
235 pclmulqdq xmm8, xmm10, 0x0
236 pxor xmm7, xmm8
237 xorps xmm7, xmm1
238
239 movdqa xmm10, [rk13]
240 movdqa xmm8, xmm2
241 pclmulqdq xmm2, xmm10, 0x11
242 pclmulqdq xmm8, xmm10, 0x0
243 pxor xmm7, xmm8
244 pxor xmm7, xmm2
245
246 movdqa xmm10, [rk15]
247 movdqa xmm8, xmm3
248 pclmulqdq xmm3, xmm10, 0x11
249 pclmulqdq xmm8, xmm10, 0x0
250 pxor xmm7, xmm8
251 xorps xmm7, xmm3
252
253 movdqa xmm10, [rk17]
254 movdqa xmm8, xmm4
255 pclmulqdq xmm4, xmm10, 0x11
256 pclmulqdq xmm8, xmm10, 0x0
257 pxor xmm7, xmm8
258 pxor xmm7, xmm4
259
260 movdqa xmm10, [rk19]
261 movdqa xmm8, xmm5
262 pclmulqdq xmm5, xmm10, 0x11
263 pclmulqdq xmm8, xmm10, 0x0
264 pxor xmm7, xmm8
265 xorps xmm7, xmm5
266
267 movdqa xmm10, [rk1] ;xmm10 has rk1 and rk2
268 ;imm value of pclmulqdq instruction will determine which constant to use
269 movdqa xmm8, xmm6
270 pclmulqdq xmm6, xmm10, 0x11
271 pclmulqdq xmm8, xmm10, 0x0
272 pxor xmm7, xmm8
273 pxor xmm7, xmm6
274
275
276 ; instead of 128, we add 112 to the loop counter to save 1 instruction from the loop
277 ; instead of a cmp instruction, we use the negative flag with the jl instruction
278 add arg3, 128-16
279 jl _final_reduction_for_128
280
281 ; now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 and the rest is in memory
282 ; we can fold 16 bytes at a time if y>=16
283 ; continue folding 16B at a time
284
285_16B_reduction_loop:
286 movdqa xmm8, xmm7
287 pclmulqdq xmm7, xmm10, 0x11
288 pclmulqdq xmm8, xmm10, 0x0
289 pxor xmm7, xmm8
290 movdqu xmm0, [arg2]
291 pshufb xmm0, xmm11
292 pxor xmm7, xmm0
293 add arg2, 16
294 sub arg3, 16
295 ; instead of a cmp instruction, we utilize the flags with the jge instruction
296 ; equivalent of: cmp arg3, 16-16
297 ; check if there is any more 16B in the buffer to be able to fold
298 jge _16B_reduction_loop
299
300 ;now we have 16+z bytes left to reduce, where 0<= z < 16.
301 ;first, we reduce the data in the xmm7 register
302
303
304_final_reduction_for_128:
305 ; check if any more data to fold. If not, compute the CRC of the final 128 bits
306 add arg3, 16
307 je _128_done
308
309 ; here we are getting data that is less than 16 bytes.
310 ; since we know that there was data before the pointer, we can offset the input pointer before the actual point, to receive exactly 16 bytes.
311 ; after that the registers need to be adjusted.
312_get_last_two_xmms:
313 movdqa xmm2, xmm7
314
315 movdqu xmm1, [arg2 - 16 + arg3]
316 pshufb xmm1, xmm11
317
318 ; get rid of the extra data that was loaded before
319 ; load the shift constant
320 lea rax, [pshufb_shf_table + 16]
321 sub rax, arg3
322 movdqu xmm0, [rax]
323
324 ; shift xmm2 to the left by arg3 bytes
325 pshufb xmm2, xmm0
326
327 ; shift xmm7 to the right by 16-arg3 bytes
328 pxor xmm0, [mask1]
329 pshufb xmm7, xmm0
330 pblendvb xmm1, xmm2 ;xmm0 is implicit
331
332 ; fold 16 Bytes
333 movdqa xmm2, xmm1
334 movdqa xmm8, xmm7
335 pclmulqdq xmm7, xmm10, 0x11
336 pclmulqdq xmm8, xmm10, 0x0
337 pxor xmm7, xmm8
338 pxor xmm7, xmm2
339
340_128_done:
341 ; compute crc of a 128-bit value
342 movdqa xmm10, [rk5] ; rk5 and rk6 in xmm10
343 movdqa xmm0, xmm7
344
345 ;64b fold
346 pclmulqdq xmm7, xmm10, 0x1
347 pslldq xmm0, 8
348 pxor xmm7, xmm0
349
350 ;32b fold
351 movdqa xmm0, xmm7
352
353 pand xmm0, [mask2]
354
355 psrldq xmm7, 12
356 pclmulqdq xmm7, xmm10, 0x10
357 pxor xmm7, xmm0
358
359 ;barrett reduction
360_barrett:
361 movdqa xmm10, [rk7] ; rk7 and rk8 in xmm10
362 movdqa xmm0, xmm7
363 pclmulqdq xmm7, xmm10, 0x01
364 pslldq xmm7, 4
365 pclmulqdq xmm7, xmm10, 0x11
366
367 pslldq xmm7, 4
368 pxor xmm7, xmm0
369 pextrd eax, xmm7,1
370
371_cleanup:
372 not eax
373%ifidn __OUTPUT_FORMAT__, win64
374 movdqa xmm6, [rsp + XMM_SAVE + 16*0]
375 movdqa xmm7, [rsp + XMM_SAVE + 16*1]
376 movdqa xmm8, [rsp + XMM_SAVE + 16*2]
377 movdqa xmm9, [rsp + XMM_SAVE + 16*3]
378 movdqa xmm10, [rsp + XMM_SAVE + 16*4]
379 movdqa xmm11, [rsp + XMM_SAVE + 16*5]
380 movdqa xmm12, [rsp + XMM_SAVE + 16*6]
381 movdqa xmm13, [rsp + XMM_SAVE + 16*7]
382%endif
383 add rsp,VARIABLE_OFFSET
384 ret
385
386
387;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
388;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
389;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
390;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
391
392align 16
393_less_than_256:
394
395 ; check if there is enough buffer to be able to fold 16B at a time
396 cmp arg3, 32
397 jl _less_than_32
398 movdqa xmm11, [SHUF_MASK]
399
400 ; if there is, load the constants
401 movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
402
403 movd xmm0, arg1_low32 ; get the initial crc value
404 pslldq xmm0, 12 ; align it to its correct place
405 movdqu xmm7, [arg2] ; load the plaintext
406 pshufb xmm7, xmm11 ; byte-reflect the plaintext
407 pxor xmm7, xmm0
408
409
410 ; update the buffer pointer
411 add arg2, 16
412
413 ; update the counter. subtract 32 instead of 16 to save one instruction from the loop
414 sub arg3, 32
415
416 jmp _16B_reduction_loop
417
418
419align 16
420_less_than_32:
421 ; mov initial crc to the return value. this is necessary for zero-length buffers.
422 mov eax, arg1_low32
423 test arg3, arg3
424 je _cleanup
425
426 movdqa xmm11, [SHUF_MASK]
427
428 movd xmm0, arg1_low32 ; get the initial crc value
429 pslldq xmm0, 12 ; align it to its correct place
430
431 cmp arg3, 16
432 je _exact_16_left
433 jl _less_than_16_left
434
435 movdqu xmm7, [arg2] ; load the plaintext
436 pshufb xmm7, xmm11 ; byte-reflect the plaintext
437 pxor xmm7, xmm0 ; xor the initial crc value
438 add arg2, 16
439 sub arg3, 16
440 movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
441 jmp _get_last_two_xmms
442
443
444align 16
445_less_than_16_left:
446 ; use stack space to load data less than 16 bytes, zero-out the 16B in memory first.
447
448 pxor xmm1, xmm1
449 mov r11, rsp
450 movdqa [r11], xmm1
451
452 cmp arg3, 4
453 jl _only_less_than_4
454
455 ; backup the counter value
456 mov r9, arg3
457 cmp arg3, 8
458 jl _less_than_8_left
459
460 ; load 8 Bytes
461 mov rax, [arg2]
462 mov [r11], rax
463 add r11, 8
464 sub arg3, 8
465 add arg2, 8
466_less_than_8_left:
467
468 cmp arg3, 4
469 jl _less_than_4_left
470
471 ; load 4 Bytes
472 mov eax, [arg2]
473 mov [r11], eax
474 add r11, 4
475 sub arg3, 4
476 add arg2, 4
477_less_than_4_left:
478
479 cmp arg3, 2
480 jl _less_than_2_left
481
482 ; load 2 Bytes
483 mov ax, [arg2]
484 mov [r11], ax
485 add r11, 2
486 sub arg3, 2
487 add arg2, 2
488_less_than_2_left:
489 cmp arg3, 1
490 jl _zero_left
491
492 ; load 1 Byte
493 mov al, [arg2]
494 mov [r11], al
495_zero_left:
496 movdqa xmm7, [rsp]
497 pshufb xmm7, xmm11
498 pxor xmm7, xmm0 ; xor the initial crc value
499
500 ; shl r9, 4
501 lea rax, [pshufb_shf_table + 16]
502 sub rax, r9
503 movdqu xmm0, [rax]
504 pxor xmm0, [mask1]
505
506 pshufb xmm7, xmm0
507 jmp _128_done
508
509align 16
510_exact_16_left:
511 movdqu xmm7, [arg2]
512 pshufb xmm7, xmm11
513 pxor xmm7, xmm0 ; xor the initial crc value
514
515 jmp _128_done
516
517_only_less_than_4:
518 cmp arg3, 3
519 jl _only_less_than_3
520
521 ; load 3 Bytes
522 mov al, [arg2]
523 mov [r11], al
524
525 mov al, [arg2+1]
526 mov [r11+1], al
527
528 mov al, [arg2+2]
529 mov [r11+2], al
530
531 movdqa xmm7, [rsp]
532 pshufb xmm7, xmm11
533 pxor xmm7, xmm0 ; xor the initial crc value
534
535 psrldq xmm7, 5
536
537 jmp _barrett
538_only_less_than_3:
539 cmp arg3, 2
540 jl _only_less_than_2
541
542 ; load 2 Bytes
543 mov al, [arg2]
544 mov [r11], al
545
546 mov al, [arg2+1]
547 mov [r11+1], al
548
549 movdqa xmm7, [rsp]
550 pshufb xmm7, xmm11
551 pxor xmm7, xmm0 ; xor the initial crc value
552
553 psrldq xmm7, 6
554
555 jmp _barrett
556_only_less_than_2:
557
558 ; load 1 Byte
559 mov al, [arg2]
560 mov [r11], al
561
562 movdqa xmm7, [rsp]
563 pshufb xmm7, xmm11
564 pxor xmm7, xmm0 ; xor the initial crc value
565
566 psrldq xmm7, 7
567
568 jmp _barrett
569
570section .data
571
572; precomputed constants
573align 16
574
575rk1 :
576DQ 0xf200aa6600000000
577rk2 :
578DQ 0x17d3315d00000000
579rk3 :
580DQ 0x022ffca500000000
581rk4 :
582DQ 0x9d9ee22f00000000
583rk5 :
584DQ 0xf200aa6600000000
585rk6 :
586DQ 0x490d678d00000000
587rk7 :
588DQ 0x0000000104d101df
589rk8 :
590DQ 0x0000000104c11db7
591rk9 :
592DQ 0x6ac7e7d700000000
593rk10 :
594DQ 0xfcd922af00000000
595rk11 :
596DQ 0x34e45a6300000000
597rk12 :
598DQ 0x8762c1f600000000
599rk13 :
600DQ 0x5395a0ea00000000
601rk14 :
602DQ 0x54f2d5c700000000
603rk15 :
604DQ 0xd3504ec700000000
605rk16 :
606DQ 0x57a8445500000000
607rk17 :
608DQ 0xc053585d00000000
609rk18 :
610DQ 0x766f1b7800000000
611rk19 :
612DQ 0xcd8c54b500000000
613rk20 :
614DQ 0xab40b71e00000000
615
616
617
618
619
620
621
622
623
624mask1:
625dq 0x8080808080808080, 0x8080808080808080
626mask2:
627dq 0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF
628
629SHUF_MASK:
630dq 0x08090A0B0C0D0E0F, 0x0001020304050607
631
632pshufb_shf_table:
633; use these values for shift constants for the pshufb instruction
634; different alignments result in values as shown:
635; dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1
636; dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2
637; dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3
638; dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4
639; dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5
640; dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6
641; dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7
642; dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8
643; dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9
644; dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10
645; dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11
646; dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12
647; dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13
648; dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14
649; dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15
650dq 0x8786858483828100, 0x8f8e8d8c8b8a8988
651dq 0x0706050403020100, 0x000e0d0c0b0a0908
652
653;;; func core, ver, snum
654slversion crc32_ieee_01, 01, 06, 0011
655