]> git.proxmox.com Git - ceph.git/blob - ceph/src/isa-l/crc/crc64_jones_norm_by8.asm
update sources to v12.1.1
[ceph.git] / ceph / src / isa-l / crc / crc64_jones_norm_by8.asm
1 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2 ; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
3 ;
4 ; Redistribution and use in source and binary forms, with or without
5 ; modification, are permitted provided that the following conditions
6 ; are met:
7 ; * Redistributions of source code must retain the above copyright
8 ; notice, this list of conditions and the following disclaimer.
9 ; * Redistributions in binary form must reproduce the above copyright
10 ; notice, this list of conditions and the following disclaimer in
11 ; the documentation and/or other materials provided with the
12 ; distribution.
13 ; * Neither the name of Intel Corporation nor the names of its
14 ; contributors may be used to endorse or promote products derived
15 ; from this software without specific prior written permission.
16 ;
17 ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 ; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 ; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 ; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 ; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30 ; Function API:
31 ; uint64_t crc64_jones_norm_by8(
32 ; uint64_t init_crc, //initial CRC value, 64 bits
33 ; const unsigned char *buf, //buffer pointer to calculate CRC on
34 ; uint64_t len //buffer length in bytes (64-bit data)
35 ; );
36 ;
37 %include "reg_sizes.asm"
38
39 %define fetch_dist 1024
40
41 [bits 64]
42 default rel
43
44 section .text
45
46 %ifidn __OUTPUT_FORMAT__, win64
47 %xdefine arg1 rcx
48 %xdefine arg2 rdx
49 %xdefine arg3 r8
50 %else
51 %xdefine arg1 rdi
52 %xdefine arg2 rsi
53 %xdefine arg3 rdx
54 %endif
55
56 %define TMP 16*0
57 %ifidn __OUTPUT_FORMAT__, win64
58 %define XMM_SAVE 16*2
59 %define VARIABLE_OFFSET 16*10+8
60 %else
61 %define VARIABLE_OFFSET 16*2+8
62 %endif
63 align 16
64 global crc64_jones_norm_by8:function
65 crc64_jones_norm_by8:
66
67 not arg1 ;~init_crc
68
69 sub rsp,VARIABLE_OFFSET
70
71 %ifidn __OUTPUT_FORMAT__, win64
72 ; push the xmm registers into the stack to maintain
73 movdqa [rsp + XMM_SAVE + 16*0], xmm6
74 movdqa [rsp + XMM_SAVE + 16*1], xmm7
75 movdqa [rsp + XMM_SAVE + 16*2], xmm8
76 movdqa [rsp + XMM_SAVE + 16*3], xmm9
77 movdqa [rsp + XMM_SAVE + 16*4], xmm10
78 movdqa [rsp + XMM_SAVE + 16*5], xmm11
79 movdqa [rsp + XMM_SAVE + 16*6], xmm12
80 movdqa [rsp + XMM_SAVE + 16*7], xmm13
81 %endif
82
83
84 ; check if smaller than 256
85 cmp arg3, 256
86
87 ; for sizes less than 256, we can't fold 128B at a time...
88 jl _less_than_256
89
90
91 ; load the initial crc value
92 movq xmm10, arg1 ; initial crc
93
94 ; crc value does not need to be byte-reflected, but it needs to be moved to the high part of the register.
95 ; because data will be byte-reflected and will align with initial crc at correct place.
96 pslldq xmm10, 8
97
98 movdqa xmm11, [SHUF_MASK]
99 ; receive the initial 128B data, xor the initial crc value
100 movdqu xmm0, [arg2+16*0]
101 movdqu xmm1, [arg2+16*1]
102 movdqu xmm2, [arg2+16*2]
103 movdqu xmm3, [arg2+16*3]
104 movdqu xmm4, [arg2+16*4]
105 movdqu xmm5, [arg2+16*5]
106 movdqu xmm6, [arg2+16*6]
107 movdqu xmm7, [arg2+16*7]
108
109 pshufb xmm0, xmm11
110 ; XOR the initial_crc value
111 pxor xmm0, xmm10
112 pshufb xmm1, xmm11
113 pshufb xmm2, xmm11
114 pshufb xmm3, xmm11
115 pshufb xmm4, xmm11
116 pshufb xmm5, xmm11
117 pshufb xmm6, xmm11
118 pshufb xmm7, xmm11
119
120 movdqa xmm10, [rk3] ;xmm10 has rk3 and rk4
121 ;imm value of pclmulqdq instruction will determine which constant to use
122 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
123 ; we subtract 256 instead of 128 to save one instruction from the loop
124 sub arg3, 256
125
126 ; at this section of the code, there is 128*x+y (0<=y<128) bytes of buffer. The _fold_128_B_loop
127 ; loop will fold 128B at a time until we have 128+y Bytes of buffer
128
129
130 ; fold 128B at a time. This section of the code folds 8 xmm registers in parallel
131 _fold_128_B_loop:
132
133 ; update the buffer pointer
134 add arg2, 128 ; buf += 128;
135
136 prefetchnta [arg2+fetch_dist+0]
137 movdqu xmm9, [arg2+16*0]
138 movdqu xmm12, [arg2+16*1]
139 pshufb xmm9, xmm11
140 pshufb xmm12, xmm11
141 movdqa xmm8, xmm0
142 movdqa xmm13, xmm1
143 pclmulqdq xmm0, xmm10, 0x0
144 pclmulqdq xmm8, xmm10 , 0x11
145 pclmulqdq xmm1, xmm10, 0x0
146 pclmulqdq xmm13, xmm10 , 0x11
147 pxor xmm0, xmm9
148 xorps xmm0, xmm8
149 pxor xmm1, xmm12
150 xorps xmm1, xmm13
151
152 prefetchnta [arg2+fetch_dist+32]
153 movdqu xmm9, [arg2+16*2]
154 movdqu xmm12, [arg2+16*3]
155 pshufb xmm9, xmm11
156 pshufb xmm12, xmm11
157 movdqa xmm8, xmm2
158 movdqa xmm13, xmm3
159 pclmulqdq xmm2, xmm10, 0x0
160 pclmulqdq xmm8, xmm10 , 0x11
161 pclmulqdq xmm3, xmm10, 0x0
162 pclmulqdq xmm13, xmm10 , 0x11
163 pxor xmm2, xmm9
164 xorps xmm2, xmm8
165 pxor xmm3, xmm12
166 xorps xmm3, xmm13
167
168 prefetchnta [arg2+fetch_dist+64]
169 movdqu xmm9, [arg2+16*4]
170 movdqu xmm12, [arg2+16*5]
171 pshufb xmm9, xmm11
172 pshufb xmm12, xmm11
173 movdqa xmm8, xmm4
174 movdqa xmm13, xmm5
175 pclmulqdq xmm4, xmm10, 0x0
176 pclmulqdq xmm8, xmm10 , 0x11
177 pclmulqdq xmm5, xmm10, 0x0
178 pclmulqdq xmm13, xmm10 , 0x11
179 pxor xmm4, xmm9
180 xorps xmm4, xmm8
181 pxor xmm5, xmm12
182 xorps xmm5, xmm13
183
184 prefetchnta [arg2+fetch_dist+96]
185 movdqu xmm9, [arg2+16*6]
186 movdqu xmm12, [arg2+16*7]
187 pshufb xmm9, xmm11
188 pshufb xmm12, xmm11
189 movdqa xmm8, xmm6
190 movdqa xmm13, xmm7
191 pclmulqdq xmm6, xmm10, 0x0
192 pclmulqdq xmm8, xmm10 , 0x11
193 pclmulqdq xmm7, xmm10, 0x0
194 pclmulqdq xmm13, xmm10 , 0x11
195 pxor xmm6, xmm9
196 xorps xmm6, xmm8
197 pxor xmm7, xmm12
198 xorps xmm7, xmm13
199
200 sub arg3, 128
201
202 ; check if there is another 128B in the buffer to be able to fold
203 jge _fold_128_B_loop
204 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
205
206 add arg2, 128
207 ; at this point, the buffer pointer is pointing at the last y Bytes of the buffer, where 0 <= y < 128
208 ; the 128B of folded data is in 8 of the xmm registers: xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
209
210
211 ; fold the 8 xmm registers to 1 xmm register with different constants
212
213 movdqa xmm10, [rk9]
214 movdqa xmm8, xmm0
215 pclmulqdq xmm0, xmm10, 0x11
216 pclmulqdq xmm8, xmm10, 0x0
217 pxor xmm7, xmm8
218 xorps xmm7, xmm0
219
220 movdqa xmm10, [rk11]
221 movdqa xmm8, xmm1
222 pclmulqdq xmm1, xmm10, 0x11
223 pclmulqdq xmm8, xmm10, 0x0
224 pxor xmm7, xmm8
225 xorps xmm7, xmm1
226
227 movdqa xmm10, [rk13]
228 movdqa xmm8, xmm2
229 pclmulqdq xmm2, xmm10, 0x11
230 pclmulqdq xmm8, xmm10, 0x0
231 pxor xmm7, xmm8
232 pxor xmm7, xmm2
233
234 movdqa xmm10, [rk15]
235 movdqa xmm8, xmm3
236 pclmulqdq xmm3, xmm10, 0x11
237 pclmulqdq xmm8, xmm10, 0x0
238 pxor xmm7, xmm8
239 xorps xmm7, xmm3
240
241 movdqa xmm10, [rk17]
242 movdqa xmm8, xmm4
243 pclmulqdq xmm4, xmm10, 0x11
244 pclmulqdq xmm8, xmm10, 0x0
245 pxor xmm7, xmm8
246 pxor xmm7, xmm4
247
248 movdqa xmm10, [rk19]
249 movdqa xmm8, xmm5
250 pclmulqdq xmm5, xmm10, 0x11
251 pclmulqdq xmm8, xmm10, 0x0
252 pxor xmm7, xmm8
253 xorps xmm7, xmm5
254
255 movdqa xmm10, [rk1] ;xmm10 has rk1 and rk2
256
257 movdqa xmm8, xmm6
258 pclmulqdq xmm6, xmm10, 0x11
259 pclmulqdq xmm8, xmm10, 0x0
260 pxor xmm7, xmm8
261 pxor xmm7, xmm6
262
263
264 ; instead of 128, we add 112 to the loop counter to save 1 instruction from the loop
265 ; instead of a cmp instruction, we use the negative flag with the jl instruction
266 add arg3, 128-16
267 jl _final_reduction_for_128
268
269 ; now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 and the rest is in memory
270 ; we can fold 16 bytes at a time if y>=16
271 ; continue folding 16B at a time
272
273 _16B_reduction_loop:
274 movdqa xmm8, xmm7
275 pclmulqdq xmm7, xmm10, 0x11
276 pclmulqdq xmm8, xmm10, 0x0
277 pxor xmm7, xmm8
278 movdqu xmm0, [arg2]
279 pshufb xmm0, xmm11
280 pxor xmm7, xmm0
281 add arg2, 16
282 sub arg3, 16
283 ; instead of a cmp instruction, we utilize the flags with the jge instruction
284 ; equivalent of: cmp arg3, 16-16
285 ; check if there is any more 16B in the buffer to be able to fold
286 jge _16B_reduction_loop
287
288 ;now we have 16+z bytes left to reduce, where 0<= z < 16.
289 ;first, we reduce the data in the xmm7 register
290
291
292 _final_reduction_for_128:
293 ; check if any more data to fold. If not, compute the CRC of the final 128 bits
294 add arg3, 16
295 je _128_done
296
297 ; here we are getting data that is less than 16 bytes.
298 ; since we know that there was data before the pointer, we can offset the input pointer before the actual point, to receive exactly 16 bytes.
299 ; after that the registers need to be adjusted.
300 _get_last_two_xmms:
301 movdqa xmm2, xmm7
302
303 movdqu xmm1, [arg2 - 16 + arg3]
304 pshufb xmm1, xmm11
305
306 ; get rid of the extra data that was loaded before
307 ; load the shift constant
308 lea rax, [pshufb_shf_table + 16]
309 sub rax, arg3
310 movdqu xmm0, [rax]
311
312 ; shift xmm2 to the left by arg3 bytes
313 pshufb xmm2, xmm0
314
315 ; shift xmm7 to the right by 16-arg3 bytes
316 pxor xmm0, [mask1]
317 pshufb xmm7, xmm0
318 pblendvb xmm1, xmm2 ;xmm0 is implicit
319
320 ; fold 16 Bytes
321 movdqa xmm2, xmm1
322 movdqa xmm8, xmm7
323 pclmulqdq xmm7, xmm10, 0x11
324 pclmulqdq xmm8, xmm10, 0x0
325 pxor xmm7, xmm8
326 pxor xmm7, xmm2
327
328 _128_done:
329 ; compute crc of a 128-bit value
330 movdqa xmm10, [rk5] ; rk5 and rk6 in xmm10
331 movdqa xmm0, xmm7
332
333 ;64b fold
334 pclmulqdq xmm7, xmm10, 0x01 ; H*L
335 pslldq xmm0, 8
336 pxor xmm7, xmm0
337
338 ;barrett reduction
339 _barrett:
340 movdqa xmm10, [rk7] ; rk7 and rk8 in xmm10
341 movdqa xmm0, xmm7
342
343 movdqa xmm1, xmm7
344 pand xmm1, [mask3]
345 pclmulqdq xmm7, xmm10, 0x01
346 pxor xmm7, xmm1
347
348 pclmulqdq xmm7, xmm10, 0x11
349 pxor xmm7, xmm0
350 pextrq rax, xmm7, 0
351
352 _cleanup:
353 not rax
354 %ifidn __OUTPUT_FORMAT__, win64
355 movdqa xmm6, [rsp + XMM_SAVE + 16*0]
356 movdqa xmm7, [rsp + XMM_SAVE + 16*1]
357 movdqa xmm8, [rsp + XMM_SAVE + 16*2]
358 movdqa xmm9, [rsp + XMM_SAVE + 16*3]
359 movdqa xmm10, [rsp + XMM_SAVE + 16*4]
360 movdqa xmm11, [rsp + XMM_SAVE + 16*5]
361 movdqa xmm12, [rsp + XMM_SAVE + 16*6]
362 movdqa xmm13, [rsp + XMM_SAVE + 16*7]
363 %endif
364 add rsp, VARIABLE_OFFSET
365 ret
366
367 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
368 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
369 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
370 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
371
372 align 16
373 _less_than_256:
374
375 ; check if there is enough buffer to be able to fold 16B at a time
376 cmp arg3, 32
377 jl _less_than_32
378 movdqa xmm11, [SHUF_MASK]
379
380 ; if there is, load the constants
381 movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
382
383 movq xmm0, arg1 ; get the initial crc value
384 pslldq xmm0, 8 ; align it to its correct place
385 movdqu xmm7, [arg2] ; load the plaintext
386 pshufb xmm7, xmm11 ; byte-reflect the plaintext
387 pxor xmm7, xmm0
388
389
390 ; update the buffer pointer
391 add arg2, 16
392
393 ; update the counter. subtract 32 instead of 16 to save one instruction from the loop
394 sub arg3, 32
395
396 jmp _16B_reduction_loop
397 align 16
398 _less_than_32:
399 ; mov initial crc to the return value. this is necessary for zero-length buffers.
400 mov rax, arg1
401 test arg3, arg3
402 je _cleanup
403
404 movdqa xmm11, [SHUF_MASK]
405
406 movq xmm0, arg1 ; get the initial crc value
407 pslldq xmm0, 8 ; align it to its correct place
408
409 cmp arg3, 16
410 je _exact_16_left
411 jl _less_than_16_left
412
413 movdqu xmm7, [arg2] ; load the plaintext
414 pshufb xmm7, xmm11 ; byte-reflect the plaintext
415 pxor xmm7, xmm0 ; xor the initial crc value
416 add arg2, 16
417 sub arg3, 16
418 movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
419 jmp _get_last_two_xmms
420 align 16
421 _less_than_16_left:
422 ; use stack space to load data less than 16 bytes, zero-out the 16B in memory first.
423 pxor xmm1, xmm1
424 mov r11, rsp
425 movdqa [r11], xmm1
426
427 ; backup the counter value
428 mov r9, arg3
429 cmp arg3, 8
430 jl _less_than_8_left
431
432 ; load 8 Bytes
433 mov rax, [arg2]
434 mov [r11], rax
435 add r11, 8
436 sub arg3, 8
437 add arg2, 8
438 _less_than_8_left:
439
440 cmp arg3, 4
441 jl _less_than_4_left
442
443 ; load 4 Bytes
444 mov eax, [arg2]
445 mov [r11], eax
446 add r11, 4
447 sub arg3, 4
448 add arg2, 4
449 _less_than_4_left:
450
451 cmp arg3, 2
452 jl _less_than_2_left
453
454 ; load 2 Bytes
455 mov ax, [arg2]
456 mov [r11], ax
457 add r11, 2
458 sub arg3, 2
459 add arg2, 2
460 _less_than_2_left:
461 cmp arg3, 1
462 jl _zero_left
463
464 ; load 1 Byte
465 mov al, [arg2]
466 mov [r11], al
467 _zero_left:
468 movdqa xmm7, [rsp]
469 pshufb xmm7, xmm11
470 pxor xmm7, xmm0 ; xor the initial crc value
471
472 ; shl r9, 4
473 lea rax, [pshufb_shf_table + 16]
474 sub rax, r9
475
476 cmp r9, 8
477 jl _end_1to7
478
479 _end_8to15:
480 movdqu xmm0, [rax]
481 pxor xmm0, [mask1]
482
483 pshufb xmm7, xmm0
484 jmp _128_done
485
486 _end_1to7:
487 ; Right shift (8-length) bytes in XMM
488 add rax, 8
489 movdqu xmm0, [rax]
490 pshufb xmm7,xmm0
491
492 jmp _barrett
493 align 16
494 _exact_16_left:
495 movdqu xmm7, [arg2]
496 pshufb xmm7, xmm11
497 pxor xmm7, xmm0 ; xor the initial crc value
498
499 jmp _128_done
500
501 section .data
502
503 ; precomputed constants
504 align 16
505
506 rk1:
507 DQ 0x4445ed2750017038
508 rk2:
509 DQ 0x698b74157cfbd736
510 rk3:
511 DQ 0x0cfcfb5101c4b775
512 rk4:
513 DQ 0x65403fd47cbec866
514 rk5:
515 DQ 0x4445ed2750017038
516 rk6:
517 DQ 0x0000000000000000
518 rk7:
519 DQ 0xddf3eeb298be6cf8
520 rk8:
521 DQ 0xad93d23594c935a9
522 rk9:
523 DQ 0xd8dc208e2ba527b4
524 rk10:
525 DQ 0xf032cfec76bb2bc5
526 rk11:
527 DQ 0xb536044f357f4238
528 rk12:
529 DQ 0xfdbf104d938ba67a
530 rk13:
531 DQ 0xeeddad9297a843e7
532 rk14:
533 DQ 0x3550bce629466473
534 rk15:
535 DQ 0x4e501e58ca43d25e
536 rk16:
537 DQ 0x13c961588f27f643
538 rk17:
539 DQ 0x3b60d00dcb1099bc
540 rk18:
541 DQ 0x44bf1f468c53b9a3
542 rk19:
543 DQ 0x96f2236e317179ee
544 rk20:
545 DQ 0xf00839aa0dd64bac
546
547 mask1:
548 dq 0x8080808080808080, 0x8080808080808080
549 mask2:
550 dq 0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF
551 mask3:
552 dq 0x0000000000000000, 0xFFFFFFFFFFFFFFFF
553
554 SHUF_MASK:
555 dq 0x08090A0B0C0D0E0F, 0x0001020304050607
556
557 pshufb_shf_table:
558 ; use these values for shift constants for the pshufb instruction
559 ; different alignments result in values as shown:
560 ; dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1
561 ; dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2
562 ; dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3
563 ; dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4
564 ; dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5
565 ; dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6
566 ; dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7
567 ; dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8
568 ; dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9
569 ; dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10
570 ; dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11
571 ; dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12
572 ; dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13
573 ; dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14
574 ; dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15
575 dq 0x8786858483828100, 0x8f8e8d8c8b8a8988
576 dq 0x0706050403020100, 0x0f0e0d0c0b0a0908
577 dq 0x8080808080808080, 0x0f0e0d0c0b0a0908
578 dq 0x8080808080808080, 0x8080808080808080
579
580 ;;; func core, ver, snum
581 slversion crc64_jones_norm_by8, 01, 00, 0026