]> git.proxmox.com Git - ceph.git/blob - ceph/src/isa-l/crc/crc32_ieee_01.asm
fcc4c6bba85c1428ff02714c0df5000d01e12b8d
[ceph.git] / ceph / src / isa-l / crc / crc32_ieee_01.asm
1 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2 ; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
3 ;
4 ; Redistribution and use in source and binary forms, with or without
5 ; modification, are permitted provided that the following conditions
6 ; are met:
7 ; * Redistributions of source code must retain the above copyright
8 ; notice, this list of conditions and the following disclaimer.
9 ; * Redistributions in binary form must reproduce the above copyright
10 ; notice, this list of conditions and the following disclaimer in
11 ; the documentation and/or other materials provided with the
12 ; distribution.
13 ; * Neither the name of Intel Corporation nor the names of its
14 ; contributors may be used to endorse or promote products derived
15 ; from this software without specific prior written permission.
16 ;
17 ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 ; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 ; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 ; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 ; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30 ; Function API:
31 ; UINT32 crc32_ieee_01(
32 ; UINT32 init_crc, //initial CRC value, 32 bits
33 ; const unsigned char *buf, //buffer pointer to calculate CRC on
34 ; UINT64 len //buffer length in bytes (64-bit data)
35 ; );
36 ;
37 ; Authors:
38 ; Erdinc Ozturk
39 ; Vinodh Gopal
40 ; James Guilford
41 ;
42 ; Reference paper titled "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction"
43 ; URL: http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
44
45 %include "reg_sizes.asm"
46
47 [bits 64]
48 default rel
49
50 section .text
51
52 %ifidn __OUTPUT_FORMAT__, win64
53 %xdefine arg1 rcx
54 %xdefine arg2 rdx
55 %xdefine arg3 r8
56
57 %xdefine arg1_low32 ecx
58 %else
59 %xdefine arg1 rdi
60 %xdefine arg2 rsi
61 %xdefine arg3 rdx
62
63 %xdefine arg1_low32 edi
64 %endif
65
66 %define TMP 16*0
67 %ifidn __OUTPUT_FORMAT__, win64
68 %define XMM_SAVE 16*2
69 %define VARIABLE_OFFSET 16*10+8
70 %else
71 %define VARIABLE_OFFSET 16*2+8
72 %endif
73 align 16
74 global crc32_ieee_01:function
75 crc32_ieee_01:
76
77 not arg1_low32 ;~init_crc
78
79 sub rsp,VARIABLE_OFFSET
80
81 %ifidn __OUTPUT_FORMAT__, win64
82 ; push the xmm registers into the stack to maintain
83 movdqa [rsp + XMM_SAVE + 16*0], xmm6
84 movdqa [rsp + XMM_SAVE + 16*1], xmm7
85 movdqa [rsp + XMM_SAVE + 16*2], xmm8
86 movdqa [rsp + XMM_SAVE + 16*3], xmm9
87 movdqa [rsp + XMM_SAVE + 16*4], xmm10
88 movdqa [rsp + XMM_SAVE + 16*5], xmm11
89 movdqa [rsp + XMM_SAVE + 16*6], xmm12
90 movdqa [rsp + XMM_SAVE + 16*7], xmm13
91 %endif
92
93
94 ; check if smaller than 256
95 cmp arg3, 256
96
97 ; for sizes less than 256, we can't fold 128B at a time...
98 jl _less_than_256
99
100
101 ; load the initial crc value
102 movd xmm10, arg1_low32 ; initial crc
103
104 ; crc value does not need to be byte-reflected, but it needs to be moved to the high part of the register.
105 ; because data will be byte-reflected and will align with initial crc at correct place.
106 pslldq xmm10, 12
107
108 movdqa xmm11, [SHUF_MASK]
109 ; receive the initial 128B data, xor the initial crc value
110 movdqu xmm0, [arg2+16*0]
111 movdqu xmm1, [arg2+16*1]
112 movdqu xmm2, [arg2+16*2]
113 movdqu xmm3, [arg2+16*3]
114 movdqu xmm4, [arg2+16*4]
115 movdqu xmm5, [arg2+16*5]
116 movdqu xmm6, [arg2+16*6]
117 movdqu xmm7, [arg2+16*7]
118
119 pshufb xmm0, xmm11
120 ; XOR the initial_crc value
121 pxor xmm0, xmm10
122 pshufb xmm1, xmm11
123 pshufb xmm2, xmm11
124 pshufb xmm3, xmm11
125 pshufb xmm4, xmm11
126 pshufb xmm5, xmm11
127 pshufb xmm6, xmm11
128 pshufb xmm7, xmm11
129
130 movdqa xmm10, [rk3] ;xmm10 has rk3 and rk4
131 ;imm value of pclmulqdq instruction will determine which constant to use
132 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
133 ; we subtract 256 instead of 128 to save one instruction from the loop
134 sub arg3, 256
135
136 ; at this section of the code, there is 128*x+y (0<=y<128) bytes of buffer. The _fold_128_B_loop
137 ; loop will fold 128B at a time until we have 128+y Bytes of buffer
138
139
140 ; fold 128B at a time. This section of the code folds 8 xmm registers in parallel
141 _fold_128_B_loop:
142
143 ; update the buffer pointer
144 add arg2, 128 ; buf += 128;
145
146 movdqu xmm9, [arg2+16*0]
147 movdqu xmm12, [arg2+16*1]
148 pshufb xmm9, xmm11
149 pshufb xmm12, xmm11
150 movdqa xmm8, xmm0
151 movdqa xmm13, xmm1
152 pclmulqdq xmm0, xmm10, 0x0
153 pclmulqdq xmm8, xmm10 , 0x11
154 pclmulqdq xmm1, xmm10, 0x0
155 pclmulqdq xmm13, xmm10 , 0x11
156 pxor xmm0, xmm9
157 xorps xmm0, xmm8
158 pxor xmm1, xmm12
159 xorps xmm1, xmm13
160
161 movdqu xmm9, [arg2+16*2]
162 movdqu xmm12, [arg2+16*3]
163 pshufb xmm9, xmm11
164 pshufb xmm12, xmm11
165 movdqa xmm8, xmm2
166 movdqa xmm13, xmm3
167 pclmulqdq xmm2, xmm10, 0x0
168 pclmulqdq xmm8, xmm10 , 0x11
169 pclmulqdq xmm3, xmm10, 0x0
170 pclmulqdq xmm13, xmm10 , 0x11
171 pxor xmm2, xmm9
172 xorps xmm2, xmm8
173 pxor xmm3, xmm12
174 xorps xmm3, xmm13
175
176 movdqu xmm9, [arg2+16*4]
177 movdqu xmm12, [arg2+16*5]
178 pshufb xmm9, xmm11
179 pshufb xmm12, xmm11
180 movdqa xmm8, xmm4
181 movdqa xmm13, xmm5
182 pclmulqdq xmm4, xmm10, 0x0
183 pclmulqdq xmm8, xmm10 , 0x11
184 pclmulqdq xmm5, xmm10, 0x0
185 pclmulqdq xmm13, xmm10 , 0x11
186 pxor xmm4, xmm9
187 xorps xmm4, xmm8
188 pxor xmm5, xmm12
189 xorps xmm5, xmm13
190
191 movdqu xmm9, [arg2+16*6]
192 movdqu xmm12, [arg2+16*7]
193 pshufb xmm9, xmm11
194 pshufb xmm12, xmm11
195 movdqa xmm8, xmm6
196 movdqa xmm13, xmm7
197 pclmulqdq xmm6, xmm10, 0x0
198 pclmulqdq xmm8, xmm10 , 0x11
199 pclmulqdq xmm7, xmm10, 0x0
200 pclmulqdq xmm13, xmm10 , 0x11
201 pxor xmm6, xmm9
202 xorps xmm6, xmm8
203 pxor xmm7, xmm12
204 xorps xmm7, xmm13
205
206 sub arg3, 128
207
208 ; check if there is another 128B in the buffer to be able to fold
209 jge _fold_128_B_loop
210 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
211
212
213 add arg2, 128
214 ; at this point, the buffer pointer is pointing at the last y Bytes of the buffer
215 ; the 128 of folded data is in 4 of the xmm registers: xmm0, xmm1, xmm2, xmm3
216
217
218 ; fold the 8 xmm registers to 1 xmm register with different constants
219
220 movdqa xmm10, [rk9]
221 movdqa xmm8, xmm0
222 pclmulqdq xmm0, xmm10, 0x11
223 pclmulqdq xmm8, xmm10, 0x0
224 pxor xmm7, xmm8
225 xorps xmm7, xmm0
226
227 movdqa xmm10, [rk11]
228 movdqa xmm8, xmm1
229 pclmulqdq xmm1, xmm10, 0x11
230 pclmulqdq xmm8, xmm10, 0x0
231 pxor xmm7, xmm8
232 xorps xmm7, xmm1
233
234 movdqa xmm10, [rk13]
235 movdqa xmm8, xmm2
236 pclmulqdq xmm2, xmm10, 0x11
237 pclmulqdq xmm8, xmm10, 0x0
238 pxor xmm7, xmm8
239 pxor xmm7, xmm2
240
241 movdqa xmm10, [rk15]
242 movdqa xmm8, xmm3
243 pclmulqdq xmm3, xmm10, 0x11
244 pclmulqdq xmm8, xmm10, 0x0
245 pxor xmm7, xmm8
246 xorps xmm7, xmm3
247
248 movdqa xmm10, [rk17]
249 movdqa xmm8, xmm4
250 pclmulqdq xmm4, xmm10, 0x11
251 pclmulqdq xmm8, xmm10, 0x0
252 pxor xmm7, xmm8
253 pxor xmm7, xmm4
254
255 movdqa xmm10, [rk19]
256 movdqa xmm8, xmm5
257 pclmulqdq xmm5, xmm10, 0x11
258 pclmulqdq xmm8, xmm10, 0x0
259 pxor xmm7, xmm8
260 xorps xmm7, xmm5
261
262 movdqa xmm10, [rk1] ;xmm10 has rk1 and rk2
263 ;imm value of pclmulqdq instruction will determine which constant to use
264 movdqa xmm8, xmm6
265 pclmulqdq xmm6, xmm10, 0x11
266 pclmulqdq xmm8, xmm10, 0x0
267 pxor xmm7, xmm8
268 pxor xmm7, xmm6
269
270
271 ; instead of 128, we add 112 to the loop counter to save 1 instruction from the loop
272 ; instead of a cmp instruction, we use the negative flag with the jl instruction
273 add arg3, 128-16
274 jl _final_reduction_for_128
275
276 ; now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 and the rest is in memory
277 ; we can fold 16 bytes at a time if y>=16
278 ; continue folding 16B at a time
279
280 _16B_reduction_loop:
281 movdqa xmm8, xmm7
282 pclmulqdq xmm7, xmm10, 0x11
283 pclmulqdq xmm8, xmm10, 0x0
284 pxor xmm7, xmm8
285 movdqu xmm0, [arg2]
286 pshufb xmm0, xmm11
287 pxor xmm7, xmm0
288 add arg2, 16
289 sub arg3, 16
290 ; instead of a cmp instruction, we utilize the flags with the jge instruction
291 ; equivalent of: cmp arg3, 16-16
292 ; check if there is any more 16B in the buffer to be able to fold
293 jge _16B_reduction_loop
294
295 ;now we have 16+z bytes left to reduce, where 0<= z < 16.
296 ;first, we reduce the data in the xmm7 register
297
298
299 _final_reduction_for_128:
300 ; check if any more data to fold. If not, compute the CRC of the final 128 bits
301 add arg3, 16
302 je _128_done
303
304 ; here we are getting data that is less than 16 bytes.
305 ; since we know that there was data before the pointer, we can offset the input pointer before the actual point, to receive exactly 16 bytes.
306 ; after that the registers need to be adjusted.
307 _get_last_two_xmms:
308 movdqa xmm2, xmm7
309
310 movdqu xmm1, [arg2 - 16 + arg3]
311 pshufb xmm1, xmm11
312
313 ; get rid of the extra data that was loaded before
314 ; load the shift constant
315 lea rax, [pshufb_shf_table + 16]
316 sub rax, arg3
317 movdqu xmm0, [rax]
318
319 ; shift xmm2 to the left by arg3 bytes
320 pshufb xmm2, xmm0
321
322 ; shift xmm7 to the right by 16-arg3 bytes
323 pxor xmm0, [mask1]
324 pshufb xmm7, xmm0
325 pblendvb xmm1, xmm2 ;xmm0 is implicit
326
327 ; fold 16 Bytes
328 movdqa xmm2, xmm1
329 movdqa xmm8, xmm7
330 pclmulqdq xmm7, xmm10, 0x11
331 pclmulqdq xmm8, xmm10, 0x0
332 pxor xmm7, xmm8
333 pxor xmm7, xmm2
334
335 _128_done:
336 ; compute crc of a 128-bit value
337 movdqa xmm10, [rk5] ; rk5 and rk6 in xmm10
338 movdqa xmm0, xmm7
339
340 ;64b fold
341 pclmulqdq xmm7, xmm10, 0x1
342 pslldq xmm0, 8
343 pxor xmm7, xmm0
344
345 ;32b fold
346 movdqa xmm0, xmm7
347
348 pand xmm0, [mask2]
349
350 psrldq xmm7, 12
351 pclmulqdq xmm7, xmm10, 0x10
352 pxor xmm7, xmm0
353
354 ;barrett reduction
355 _barrett:
356 movdqa xmm10, [rk7] ; rk7 and rk8 in xmm10
357 movdqa xmm0, xmm7
358 pclmulqdq xmm7, xmm10, 0x01
359 pslldq xmm7, 4
360 pclmulqdq xmm7, xmm10, 0x11
361
362 pslldq xmm7, 4
363 pxor xmm7, xmm0
364 pextrd eax, xmm7,1
365
366 _cleanup:
367 not eax
368 %ifidn __OUTPUT_FORMAT__, win64
369 movdqa xmm6, [rsp + XMM_SAVE + 16*0]
370 movdqa xmm7, [rsp + XMM_SAVE + 16*1]
371 movdqa xmm8, [rsp + XMM_SAVE + 16*2]
372 movdqa xmm9, [rsp + XMM_SAVE + 16*3]
373 movdqa xmm10, [rsp + XMM_SAVE + 16*4]
374 movdqa xmm11, [rsp + XMM_SAVE + 16*5]
375 movdqa xmm12, [rsp + XMM_SAVE + 16*6]
376 movdqa xmm13, [rsp + XMM_SAVE + 16*7]
377 %endif
378 add rsp,VARIABLE_OFFSET
379 ret
380
381
382 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
383 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
384 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
385 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
386
387 align 16
388 _less_than_256:
389
390 ; check if there is enough buffer to be able to fold 16B at a time
391 cmp arg3, 32
392 jl _less_than_32
393 movdqa xmm11, [SHUF_MASK]
394
395 ; if there is, load the constants
396 movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
397
398 movd xmm0, arg1_low32 ; get the initial crc value
399 pslldq xmm0, 12 ; align it to its correct place
400 movdqu xmm7, [arg2] ; load the plaintext
401 pshufb xmm7, xmm11 ; byte-reflect the plaintext
402 pxor xmm7, xmm0
403
404
405 ; update the buffer pointer
406 add arg2, 16
407
408 ; update the counter. subtract 32 instead of 16 to save one instruction from the loop
409 sub arg3, 32
410
411 jmp _16B_reduction_loop
412
413
414 align 16
415 _less_than_32:
416 ; mov initial crc to the return value. this is necessary for zero-length buffers.
417 mov eax, arg1_low32
418 test arg3, arg3
419 je _cleanup
420
421 movdqa xmm11, [SHUF_MASK]
422
423 movd xmm0, arg1_low32 ; get the initial crc value
424 pslldq xmm0, 12 ; align it to its correct place
425
426 cmp arg3, 16
427 je _exact_16_left
428 jl _less_than_16_left
429
430 movdqu xmm7, [arg2] ; load the plaintext
431 pshufb xmm7, xmm11 ; byte-reflect the plaintext
432 pxor xmm7, xmm0 ; xor the initial crc value
433 add arg2, 16
434 sub arg3, 16
435 movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
436 jmp _get_last_two_xmms
437
438
439 align 16
440 _less_than_16_left:
441 ; use stack space to load data less than 16 bytes, zero-out the 16B in memory first.
442
443 pxor xmm1, xmm1
444 mov r11, rsp
445 movdqa [r11], xmm1
446
447 cmp arg3, 4
448 jl _only_less_than_4
449
450 ; backup the counter value
451 mov r9, arg3
452 cmp arg3, 8
453 jl _less_than_8_left
454
455 ; load 8 Bytes
456 mov rax, [arg2]
457 mov [r11], rax
458 add r11, 8
459 sub arg3, 8
460 add arg2, 8
461 _less_than_8_left:
462
463 cmp arg3, 4
464 jl _less_than_4_left
465
466 ; load 4 Bytes
467 mov eax, [arg2]
468 mov [r11], eax
469 add r11, 4
470 sub arg3, 4
471 add arg2, 4
472 _less_than_4_left:
473
474 cmp arg3, 2
475 jl _less_than_2_left
476
477 ; load 2 Bytes
478 mov ax, [arg2]
479 mov [r11], ax
480 add r11, 2
481 sub arg3, 2
482 add arg2, 2
483 _less_than_2_left:
484 cmp arg3, 1
485 jl _zero_left
486
487 ; load 1 Byte
488 mov al, [arg2]
489 mov [r11], al
490 _zero_left:
491 movdqa xmm7, [rsp]
492 pshufb xmm7, xmm11
493 pxor xmm7, xmm0 ; xor the initial crc value
494
495 ; shl r9, 4
496 lea rax, [pshufb_shf_table + 16]
497 sub rax, r9
498 movdqu xmm0, [rax]
499 pxor xmm0, [mask1]
500
501 pshufb xmm7, xmm0
502 jmp _128_done
503
504 align 16
505 _exact_16_left:
506 movdqu xmm7, [arg2]
507 pshufb xmm7, xmm11
508 pxor xmm7, xmm0 ; xor the initial crc value
509
510 jmp _128_done
511
512 _only_less_than_4:
513 cmp arg3, 3
514 jl _only_less_than_3
515
516 ; load 3 Bytes
517 mov al, [arg2]
518 mov [r11], al
519
520 mov al, [arg2+1]
521 mov [r11+1], al
522
523 mov al, [arg2+2]
524 mov [r11+2], al
525
526 movdqa xmm7, [rsp]
527 pshufb xmm7, xmm11
528 pxor xmm7, xmm0 ; xor the initial crc value
529
530 psrldq xmm7, 5
531
532 jmp _barrett
533 _only_less_than_3:
534 cmp arg3, 2
535 jl _only_less_than_2
536
537 ; load 2 Bytes
538 mov al, [arg2]
539 mov [r11], al
540
541 mov al, [arg2+1]
542 mov [r11+1], al
543
544 movdqa xmm7, [rsp]
545 pshufb xmm7, xmm11
546 pxor xmm7, xmm0 ; xor the initial crc value
547
548 psrldq xmm7, 6
549
550 jmp _barrett
551 _only_less_than_2:
552
553 ; load 1 Byte
554 mov al, [arg2]
555 mov [r11], al
556
557 movdqa xmm7, [rsp]
558 pshufb xmm7, xmm11
559 pxor xmm7, xmm0 ; xor the initial crc value
560
561 psrldq xmm7, 7
562
563 jmp _barrett
564
565 section .data
566
567 ; precomputed constants
568 align 16
569
570 rk1 :
571 DQ 0xf200aa6600000000
572 rk2 :
573 DQ 0x17d3315d00000000
574 rk3 :
575 DQ 0x022ffca500000000
576 rk4 :
577 DQ 0x9d9ee22f00000000
578 rk5 :
579 DQ 0xf200aa6600000000
580 rk6 :
581 DQ 0x490d678d00000000
582 rk7 :
583 DQ 0x0000000104d101df
584 rk8 :
585 DQ 0x0000000104c11db7
586 rk9 :
587 DQ 0x6ac7e7d700000000
588 rk10 :
589 DQ 0xfcd922af00000000
590 rk11 :
591 DQ 0x34e45a6300000000
592 rk12 :
593 DQ 0x8762c1f600000000
594 rk13 :
595 DQ 0x5395a0ea00000000
596 rk14 :
597 DQ 0x54f2d5c700000000
598 rk15 :
599 DQ 0xd3504ec700000000
600 rk16 :
601 DQ 0x57a8445500000000
602 rk17 :
603 DQ 0xc053585d00000000
604 rk18 :
605 DQ 0x766f1b7800000000
606 rk19 :
607 DQ 0xcd8c54b500000000
608 rk20 :
609 DQ 0xab40b71e00000000
610
611
612
613
614
615
616
617
618
619 mask1:
620 dq 0x8080808080808080, 0x8080808080808080
621 mask2:
622 dq 0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF
623
624 SHUF_MASK:
625 dq 0x08090A0B0C0D0E0F, 0x0001020304050607
626
627 pshufb_shf_table:
628 ; use these values for shift constants for the pshufb instruction
629 ; different alignments result in values as shown:
630 ; dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1
631 ; dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2
632 ; dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3
633 ; dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4
634 ; dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5
635 ; dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6
636 ; dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7
637 ; dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8
638 ; dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9
639 ; dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10
640 ; dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11
641 ; dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12
642 ; dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13
643 ; dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14
644 ; dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15
645 dq 0x8786858483828100, 0x8f8e8d8c8b8a8988
646 dq 0x0706050403020100, 0x000e0d0c0b0a0908
647
648 ;;; func core, ver, snum
649 slversion crc32_ieee_01, 01, 06, 0011
650