]> git.proxmox.com Git - ceph.git/blame - ceph/src/isa-l/crc/crc16_t10dif_01.asm
update sources to v12.1.1
[ceph.git] / ceph / src / isa-l / crc / crc16_t10dif_01.asm
CommitLineData
7c673cae
FG
1;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
3;
4; Redistribution and use in source and binary forms, with or without
5; modification, are permitted provided that the following conditions
6; are met:
7; * Redistributions of source code must retain the above copyright
8; notice, this list of conditions and the following disclaimer.
9; * Redistributions in binary form must reproduce the above copyright
10; notice, this list of conditions and the following disclaimer in
11; the documentation and/or other materials provided with the
12; distribution.
13; * Neither the name of Intel Corporation nor the names of its
14; contributors may be used to endorse or promote products derived
15; from this software without specific prior written permission.
16;
17; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30; Function API:
31; UINT16 crc16_t10dif_01(
32; UINT16 init_crc, //initial CRC value, 16 bits
33; const unsigned char *buf, //buffer pointer to calculate CRC on
34; UINT64 len //buffer length in bytes (64-bit data)
35; );
36;
37; Authors:
38; Erdinc Ozturk
39; Vinodh Gopal
40; James Guilford
41;
42; Reference paper titled "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction"
43; URL: http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
44
45%include "reg_sizes.asm"
46
224ce89b
WB
47%define fetch_dist 1024
48
7c673cae
FG
49[bits 64]
50default rel
51
52section .text
53
54%ifidn __OUTPUT_FORMAT__, win64
55 %xdefine arg1 rcx
56 %xdefine arg2 rdx
57 %xdefine arg3 r8
58
59 %xdefine arg1_low32 ecx
60%else
61 %xdefine arg1 rdi
62 %xdefine arg2 rsi
63 %xdefine arg3 rdx
64
65 %xdefine arg1_low32 edi
66%endif
67
68%ifidn __OUTPUT_FORMAT__, win64
69 %define XMM_SAVE 16*2
70 %define VARIABLE_OFFSET 16*10+8
71%else
72 %define VARIABLE_OFFSET 16*2+8
73%endif
74
75align 16
76global crc16_t10dif_01:function
77crc16_t10dif_01:
78
79 ; adjust the 16-bit initial_crc value, scale it to 32 bits
80 shl arg1_low32, 16
81
82 ; After this point, code flow is exactly same as a 32-bit CRC.
83 ; The only difference is before returning eax, we will shift it right 16 bits, to scale back to 16 bits.
84
85 sub rsp, VARIABLE_OFFSET
86%ifidn __OUTPUT_FORMAT__, win64
87 ; push the xmm registers into the stack to maintain
88 movdqa [rsp+16*2],xmm6
89 movdqa [rsp+16*3],xmm7
90 movdqa [rsp+16*4],xmm8
91 movdqa [rsp+16*5],xmm9
92 movdqa [rsp+16*6],xmm10
93 movdqa [rsp+16*7],xmm11
94 movdqa [rsp+16*8],xmm12
95 movdqa [rsp+16*9],xmm13
96%endif
97
98 ; check if smaller than 256
99 cmp arg3, 256
100
101 ; for sizes less than 256, we can't fold 128B at a time...
102 jl _less_than_256
103
104
105 ; load the initial crc value
106 movd xmm10, arg1_low32 ; initial crc
107
108 ; crc value does not need to be byte-reflected, but it needs to be moved to the high part of the register.
109 ; because data will be byte-reflected and will align with initial crc at correct place.
110 pslldq xmm10, 12
111
112 movdqa xmm11, [SHUF_MASK]
113 ; receive the initial 128B data, xor the initial crc value
114 movdqu xmm0, [arg2+16*0]
115 movdqu xmm1, [arg2+16*1]
116 movdqu xmm2, [arg2+16*2]
117 movdqu xmm3, [arg2+16*3]
118 movdqu xmm4, [arg2+16*4]
119 movdqu xmm5, [arg2+16*5]
120 movdqu xmm6, [arg2+16*6]
121 movdqu xmm7, [arg2+16*7]
122
123 pshufb xmm0, xmm11
124 ; XOR the initial_crc value
125 pxor xmm0, xmm10
126 pshufb xmm1, xmm11
127 pshufb xmm2, xmm11
128 pshufb xmm3, xmm11
129 pshufb xmm4, xmm11
130 pshufb xmm5, xmm11
131 pshufb xmm6, xmm11
132 pshufb xmm7, xmm11
133
134 movdqa xmm10, [rk3] ;xmm10 has rk3 and rk4
135 ;imm value of pclmulqdq instruction will determine which constant to use
136 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
137 ; we subtract 256 instead of 128 to save one instruction from the loop
138 sub arg3, 256
139
140 ; at this section of the code, there is 128*x+y (0<=y<128) bytes of buffer. The _fold_128_B_loop
141 ; loop will fold 128B at a time until we have 128+y Bytes of buffer
142
143
144 ; fold 128B at a time. This section of the code folds 8 xmm registers in parallel
145_fold_128_B_loop:
146
147 ; update the buffer pointer
148 add arg2, 128 ; buf += 128;
149
224ce89b 150 prefetchnta [arg2+fetch_dist+0]
7c673cae
FG
151 movdqu xmm9, [arg2+16*0]
152 movdqu xmm12, [arg2+16*1]
153 pshufb xmm9, xmm11
154 pshufb xmm12, xmm11
155 movdqa xmm8, xmm0
156 movdqa xmm13, xmm1
157 pclmulqdq xmm0, xmm10, 0x0
158 pclmulqdq xmm8, xmm10 , 0x11
159 pclmulqdq xmm1, xmm10, 0x0
160 pclmulqdq xmm13, xmm10 , 0x11
161 pxor xmm0, xmm9
162 xorps xmm0, xmm8
163 pxor xmm1, xmm12
164 xorps xmm1, xmm13
165
224ce89b 166 prefetchnta [arg2+fetch_dist+32]
7c673cae
FG
167 movdqu xmm9, [arg2+16*2]
168 movdqu xmm12, [arg2+16*3]
169 pshufb xmm9, xmm11
170 pshufb xmm12, xmm11
171 movdqa xmm8, xmm2
172 movdqa xmm13, xmm3
173 pclmulqdq xmm2, xmm10, 0x0
174 pclmulqdq xmm8, xmm10 , 0x11
175 pclmulqdq xmm3, xmm10, 0x0
176 pclmulqdq xmm13, xmm10 , 0x11
177 pxor xmm2, xmm9
178 xorps xmm2, xmm8
179 pxor xmm3, xmm12
180 xorps xmm3, xmm13
181
224ce89b 182 prefetchnta [arg2+fetch_dist+64]
7c673cae
FG
183 movdqu xmm9, [arg2+16*4]
184 movdqu xmm12, [arg2+16*5]
185 pshufb xmm9, xmm11
186 pshufb xmm12, xmm11
187 movdqa xmm8, xmm4
188 movdqa xmm13, xmm5
189 pclmulqdq xmm4, xmm10, 0x0
190 pclmulqdq xmm8, xmm10 , 0x11
191 pclmulqdq xmm5, xmm10, 0x0
192 pclmulqdq xmm13, xmm10 , 0x11
193 pxor xmm4, xmm9
194 xorps xmm4, xmm8
195 pxor xmm5, xmm12
196 xorps xmm5, xmm13
197
224ce89b 198 prefetchnta [arg2+fetch_dist+96]
7c673cae
FG
199 movdqu xmm9, [arg2+16*6]
200 movdqu xmm12, [arg2+16*7]
201 pshufb xmm9, xmm11
202 pshufb xmm12, xmm11
203 movdqa xmm8, xmm6
204 movdqa xmm13, xmm7
205 pclmulqdq xmm6, xmm10, 0x0
206 pclmulqdq xmm8, xmm10 , 0x11
207 pclmulqdq xmm7, xmm10, 0x0
208 pclmulqdq xmm13, xmm10 , 0x11
209 pxor xmm6, xmm9
210 xorps xmm6, xmm8
211 pxor xmm7, xmm12
212 xorps xmm7, xmm13
213
214 sub arg3, 128
215
216 ; check if there is another 128B in the buffer to be able to fold
217 jge _fold_128_B_loop
218 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
219
220
221 add arg2, 128
222 ; at this point, the buffer pointer is pointing at the last y Bytes of the buffer
223 ; fold the 8 xmm registers to 1 xmm register with different constants
224
225 movdqa xmm10, [rk9]
226 movdqa xmm8, xmm0
227 pclmulqdq xmm0, xmm10, 0x11
228 pclmulqdq xmm8, xmm10, 0x0
229 pxor xmm7, xmm8
230 xorps xmm7, xmm0
231
232 movdqa xmm10, [rk11]
233 movdqa xmm8, xmm1
234 pclmulqdq xmm1, xmm10, 0x11
235 pclmulqdq xmm8, xmm10, 0x0
236 pxor xmm7, xmm8
237 xorps xmm7, xmm1
238
239 movdqa xmm10, [rk13]
240 movdqa xmm8, xmm2
241 pclmulqdq xmm2, xmm10, 0x11
242 pclmulqdq xmm8, xmm10, 0x0
243 pxor xmm7, xmm8
244 pxor xmm7, xmm2
245
246 movdqa xmm10, [rk15]
247 movdqa xmm8, xmm3
248 pclmulqdq xmm3, xmm10, 0x11
249 pclmulqdq xmm8, xmm10, 0x0
250 pxor xmm7, xmm8
251 xorps xmm7, xmm3
252
253 movdqa xmm10, [rk17]
254 movdqa xmm8, xmm4
255 pclmulqdq xmm4, xmm10, 0x11
256 pclmulqdq xmm8, xmm10, 0x0
257 pxor xmm7, xmm8
258 pxor xmm7, xmm4
259
260 movdqa xmm10, [rk19]
261 movdqa xmm8, xmm5
262 pclmulqdq xmm5, xmm10, 0x11
263 pclmulqdq xmm8, xmm10, 0x0
264 pxor xmm7, xmm8
265 xorps xmm7, xmm5
266
267 movdqa xmm10, [rk1] ;xmm10 has rk1 and rk2
268 ;imm value of pclmulqdq instruction will determine which constant to use
269 movdqa xmm8, xmm6
270 pclmulqdq xmm6, xmm10, 0x11
271 pclmulqdq xmm8, xmm10, 0x0
272 pxor xmm7, xmm8
273 pxor xmm7, xmm6
274
275
276 ; instead of 128, we add 112 to the loop counter to save 1 instruction from the loop
277 ; instead of a cmp instruction, we use the negative flag with the jl instruction
278 add arg3, 128-16
279 jl _final_reduction_for_128
280
281 ; now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 and the rest is in memory
282 ; we can fold 16 bytes at a time if y>=16
283 ; continue folding 16B at a time
284
285_16B_reduction_loop:
286 movdqa xmm8, xmm7
287 pclmulqdq xmm7, xmm10, 0x11
288 pclmulqdq xmm8, xmm10, 0x0
289 pxor xmm7, xmm8
290 movdqu xmm0, [arg2]
291 pshufb xmm0, xmm11
292 pxor xmm7, xmm0
293 add arg2, 16
294 sub arg3, 16
295 ; instead of a cmp instruction, we utilize the flags with the jge instruction
296 ; equivalent of: cmp arg3, 16-16
297 ; check if there is any more 16B in the buffer to be able to fold
298 jge _16B_reduction_loop
299
300 ;now we have 16+z bytes left to reduce, where 0<= z < 16.
301 ;first, we reduce the data in the xmm7 register
302
303
304_final_reduction_for_128:
305 ; check if any more data to fold. If not, compute the CRC of the final 128 bits
306 add arg3, 16
307 je _128_done
308
309 ; here we are getting data that is less than 16 bytes.
310 ; since we know that there was data before the pointer, we can offset the input pointer before the actual point, to receive exactly 16 bytes.
311 ; after that the registers need to be adjusted.
312_get_last_two_xmms:
313 movdqa xmm2, xmm7
314
315 movdqu xmm1, [arg2 - 16 + arg3]
316 pshufb xmm1, xmm11
317
318 ; get rid of the extra data that was loaded before
319 ; load the shift constant
320 lea rax, [pshufb_shf_table + 16]
321 sub rax, arg3
322 movdqu xmm0, [rax]
323
324 ; shift xmm2 to the left by arg3 bytes
325 pshufb xmm2, xmm0
326
327 ; shift xmm7 to the right by 16-arg3 bytes
328 pxor xmm0, [mask1]
329 pshufb xmm7, xmm0
330 pblendvb xmm1, xmm2 ;xmm0 is implicit
331
332 ; fold 16 Bytes
333 movdqa xmm2, xmm1
334 movdqa xmm8, xmm7
335 pclmulqdq xmm7, xmm10, 0x11
336 pclmulqdq xmm8, xmm10, 0x0
337 pxor xmm7, xmm8
338 pxor xmm7, xmm2
339
340_128_done:
341 ; compute crc of a 128-bit value
342 movdqa xmm10, [rk5] ; rk5 and rk6 in xmm10
343 movdqa xmm0, xmm7
344
345 ;64b fold
346 pclmulqdq xmm7, xmm10, 0x1
347 pslldq xmm0, 8
348 pxor xmm7, xmm0
349
350 ;32b fold
351 movdqa xmm0, xmm7
352
353 pand xmm0, [mask2]
354
355 psrldq xmm7, 12
356 pclmulqdq xmm7, xmm10, 0x10
357 pxor xmm7, xmm0
358
359 ;barrett reduction
360_barrett:
361 movdqa xmm10, [rk7] ; rk7 and rk8 in xmm10
362 movdqa xmm0, xmm7
363 pclmulqdq xmm7, xmm10, 0x01
364 pslldq xmm7, 4
365 pclmulqdq xmm7, xmm10, 0x11
366
367 pslldq xmm7, 4
368 pxor xmm7, xmm0
369 pextrd eax, xmm7,1
370
371_cleanup:
372 ; scale the result back to 16 bits
373 shr eax, 16
374%ifidn __OUTPUT_FORMAT__, win64
375 movdqa xmm6, [rsp+16*2]
376 movdqa xmm7, [rsp+16*3]
377 movdqa xmm8, [rsp+16*4]
378 movdqa xmm9, [rsp+16*5]
379 movdqa xmm10, [rsp+16*6]
380 movdqa xmm11, [rsp+16*7]
381 movdqa xmm12, [rsp+16*8]
382 movdqa xmm13, [rsp+16*9]
383%endif
384 add rsp, VARIABLE_OFFSET
385 ret
386
387
388;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
389;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
390;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
391;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
392
393align 16
394_less_than_256:
395
396 ; check if there is enough buffer to be able to fold 16B at a time
397 cmp arg3, 32
398 jl _less_than_32
399 movdqa xmm11, [SHUF_MASK]
400
401 ; if there is, load the constants
402 movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
403
404 movd xmm0, arg1_low32 ; get the initial crc value
405 pslldq xmm0, 12 ; align it to its correct place
406 movdqu xmm7, [arg2] ; load the plaintext
407 pshufb xmm7, xmm11 ; byte-reflect the plaintext
408 pxor xmm7, xmm0
409
410
411 ; update the buffer pointer
412 add arg2, 16
413
414 ; update the counter. subtract 32 instead of 16 to save one instruction from the loop
415 sub arg3, 32
416
417 jmp _16B_reduction_loop
418
419
420align 16
421_less_than_32:
422 ; mov initial crc to the return value. this is necessary for zero-length buffers.
423 mov eax, arg1_low32
424 test arg3, arg3
425 je _cleanup
426
427 movdqa xmm11, [SHUF_MASK]
428
429 movd xmm0, arg1_low32 ; get the initial crc value
430 pslldq xmm0, 12 ; align it to its correct place
431
432 cmp arg3, 16
433 je _exact_16_left
434 jl _less_than_16_left
435
436 movdqu xmm7, [arg2] ; load the plaintext
437 pshufb xmm7, xmm11 ; byte-reflect the plaintext
438 pxor xmm7, xmm0 ; xor the initial crc value
439 add arg2, 16
440 sub arg3, 16
441 movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
442 jmp _get_last_two_xmms
443
444
445align 16
446_less_than_16_left:
447 ; use stack space to load data less than 16 bytes, zero-out the 16B in memory first.
448
449 pxor xmm1, xmm1
450 mov r11, rsp
451 movdqa [r11], xmm1
452
453 cmp arg3, 4
454 jl _only_less_than_4
455
456 ; backup the counter value
457 mov r9, arg3
458 cmp arg3, 8
459 jl _less_than_8_left
460
461 ; load 8 Bytes
462 mov rax, [arg2]
463 mov [r11], rax
464 add r11, 8
465 sub arg3, 8
466 add arg2, 8
467_less_than_8_left:
468
469 cmp arg3, 4
470 jl _less_than_4_left
471
472 ; load 4 Bytes
473 mov eax, [arg2]
474 mov [r11], eax
475 add r11, 4
476 sub arg3, 4
477 add arg2, 4
478_less_than_4_left:
479
480 cmp arg3, 2
481 jl _less_than_2_left
482
483 ; load 2 Bytes
484 mov ax, [arg2]
485 mov [r11], ax
486 add r11, 2
487 sub arg3, 2
488 add arg2, 2
489_less_than_2_left:
490 cmp arg3, 1
491 jl _zero_left
492
493 ; load 1 Byte
494 mov al, [arg2]
495 mov [r11], al
496_zero_left:
497 movdqa xmm7, [rsp]
498 pshufb xmm7, xmm11
499 pxor xmm7, xmm0 ; xor the initial crc value
500
501 lea rax, [pshufb_shf_table + 16]
502 sub rax, r9
503 movdqu xmm0, [rax]
504 pxor xmm0, [mask1]
505
506 pshufb xmm7, xmm0
507 jmp _128_done
508
509align 16
510_exact_16_left:
511 movdqu xmm7, [arg2]
512 pshufb xmm7, xmm11
513 pxor xmm7, xmm0 ; xor the initial crc value
514
515 jmp _128_done
516
517_only_less_than_4:
518 cmp arg3, 3
519 jl _only_less_than_3
520
521 ; load 3 Bytes
522 mov al, [arg2]
523 mov [r11], al
524
525 mov al, [arg2+1]
526 mov [r11+1], al
527
528 mov al, [arg2+2]
529 mov [r11+2], al
530
531 movdqa xmm7, [rsp]
532 pshufb xmm7, xmm11
533 pxor xmm7, xmm0 ; xor the initial crc value
534
535 psrldq xmm7, 5
536
537 jmp _barrett
538_only_less_than_3:
539 cmp arg3, 2
540 jl _only_less_than_2
541
542 ; load 2 Bytes
543 mov al, [arg2]
544 mov [r11], al
545
546 mov al, [arg2+1]
547 mov [r11+1], al
548
549 movdqa xmm7, [rsp]
550 pshufb xmm7, xmm11
551 pxor xmm7, xmm0 ; xor the initial crc value
552
553 psrldq xmm7, 6
554
555 jmp _barrett
556_only_less_than_2:
557
558 ; load 1 Byte
559 mov al, [arg2]
560 mov [r11], al
561
562 movdqa xmm7, [rsp]
563 pshufb xmm7, xmm11
564 pxor xmm7, xmm0 ; xor the initial crc value
565
566 psrldq xmm7, 7
567
568 jmp _barrett
569
570section .data
571
572; precomputed constants
573; these constants are precomputed from the poly: 0x8bb70000 (0x8bb7 scaled to 32 bits)
574align 16
575; Q = 0x18BB70000
576; rk1 = 2^(32*3) mod Q << 32
577; rk2 = 2^(32*5) mod Q << 32
578; rk3 = 2^(32*15) mod Q << 32
579; rk4 = 2^(32*17) mod Q << 32
580; rk5 = 2^(32*3) mod Q << 32
581; rk6 = 2^(32*2) mod Q << 32
582; rk7 = floor(2^64/Q)
583; rk8 = Q
584rk1:
585DQ 0x2d56000000000000
586rk2:
587DQ 0x06df000000000000
588rk3:
589DQ 0x9d9d000000000000
590rk4:
591DQ 0x7cf5000000000000
592rk5:
593DQ 0x2d56000000000000
594rk6:
595DQ 0x1368000000000000
596rk7:
597DQ 0x00000001f65a57f8
598rk8:
599DQ 0x000000018bb70000
600
601rk9:
602DQ 0xceae000000000000
603rk10:
604DQ 0xbfd6000000000000
605rk11:
606DQ 0x1e16000000000000
607rk12:
608DQ 0x713c000000000000
609rk13:
610DQ 0xf7f9000000000000
611rk14:
612DQ 0x80a6000000000000
613rk15:
614DQ 0x044c000000000000
615rk16:
616DQ 0xe658000000000000
617rk17:
618DQ 0xad18000000000000
619rk18:
620DQ 0xa497000000000000
621rk19:
622DQ 0x6ee3000000000000
623rk20:
624DQ 0xe7b5000000000000
625
626
627
628
629
630
631
632
633
634mask1:
635dq 0x8080808080808080, 0x8080808080808080
636mask2:
637dq 0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF
638
639SHUF_MASK:
640dq 0x08090A0B0C0D0E0F, 0x0001020304050607
641
642pshufb_shf_table:
643; use these values for shift constants for the pshufb instruction
644; different alignments result in values as shown:
645; dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1
646; dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2
647; dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3
648; dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4
649; dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5
650; dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6
651; dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7
652; dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8
653; dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9
654; dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10
655; dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11
656; dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12
657; dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13
658; dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14
659; dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15
660dq 0x8786858483828100, 0x8f8e8d8c8b8a8988
661dq 0x0706050403020100, 0x000e0d0c0b0a0908
662
663;;; func core, ver, snum
664slversion crc16_t10dif_01, 01, 06, 0010
665