]> git.proxmox.com Git - ceph.git/blame - ceph/src/isa-l/igzip/igzip_body.asm
update sources to v12.1.1
[ceph.git] / ceph / src / isa-l / igzip / igzip_body.asm
CommitLineData
7c673cae
FG
1;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
3;
4; Redistribution and use in source and binary forms, with or without
5; modification, are permitted provided that the following conditions
6; are met:
7; * Redistributions of source code must retain the above copyright
8; notice, this list of conditions and the following disclaimer.
9; * Redistributions in binary form must reproduce the above copyright
10; notice, this list of conditions and the following disclaimer in
11; the documentation and/or other materials provided with the
12; distribution.
13; * Neither the name of Intel Corporation nor the names of its
14; contributors may be used to endorse or promote products derived
15; from this software without specific prior written permission.
16;
17; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30%include "options.asm"
7c673cae
FG
31
32%include "lz0a_const.asm"
33%include "data_struct2.asm"
34%include "bitbuf2.asm"
35%include "huffman.asm"
36%include "igzip_compare_types.asm"
7c673cae
FG
37%include "reg_sizes.asm"
38
39%include "stdmac.asm"
40
7c673cae
FG
41%ifdef DEBUG
42%macro MARK 1
43global %1
44%1:
45%endm
46%else
47%macro MARK 1
48%endm
49%endif
50
51;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
52;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
53;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7c673cae 54
224ce89b
WB
55%define tmp2 rcx
56%define hash2 rcx
7c673cae 57
224ce89b
WB
58%define curr_data rax
59%define code rax
60%define tmp5 rax
7c673cae 61
224ce89b
WB
62%define tmp4 rbx
63%define dist rbx
64%define code2 rbx
7c673cae 65
224ce89b
WB
66%define hash rdx
67%define len rdx
68%define code_len3 rdx
69%define tmp8 rdx
7c673cae 70
224ce89b
WB
71%define tmp1 rsi
72%define code_len2 rsi
7c673cae 73
224ce89b 74%define file_start rdi
7c673cae 75
224ce89b 76%define m_bit_count rbp
7c673cae 77
224ce89b
WB
78%define curr_data2 r8
79%define len2 r8
80%define tmp6 r8
7c673cae 81
224ce89b 82%define m_bits r9
7c673cae 83
224ce89b 84%define f_i r10
7c673cae 85
224ce89b 86%define m_out_buf r11
7c673cae 87
224ce89b
WB
88%define f_end_i r12
89%define dist2 r12
90%define tmp7 r12
91%define code4 r12
7c673cae 92
224ce89b
WB
93%define tmp3 r13
94%define code3 r13
7c673cae 95
224ce89b 96%define stream r14
7c673cae 97
224ce89b 98%define hufftables r15
7c673cae 99
224ce89b
WB
100;; GPR r8 & r15 can be used
101
102%define xtmp0 xmm0 ; tmp
103%define xtmp1 xmm1 ; tmp
104%define xhash xmm2
105%define xmask xmm3
106%define xdata xmm4
107
108%define ytmp0 ymm0 ; tmp
109%define ytmp1 ymm1 ; tmp
7c673cae 110
7c673cae 111
7c673cae
FG
112;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
113;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
114;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
115
7c673cae
FG
116
117blen_mem_offset equ 0 ; local variable (8 bytes)
224ce89b
WB
118f_end_i_mem_offset equ 8
119gpr_save_mem_offset equ 16 ; gpr save area (8*8 bytes)
120xmm_save_mem_offset equ 16 + 8*8 ; xmm save area (4*16 bytes) (16 byte aligned)
121stack_size equ 2*8 + 8*8 + 4*16 + 8
7c673cae
FG
122;;; 8 because stack address is odd multiple of 8 after a function call and
123;;; we want it aligned to 16 bytes
124
125; void isal_deflate_body ( isal_zstream *stream )
126; arg 1: rcx: addr of stream
127global isal_deflate_body_ %+ ARCH
128isal_deflate_body_ %+ ARCH %+ :
129%ifidn __OUTPUT_FORMAT__, elf64
130 mov rcx, rdi
131%endif
132
133 ;; do nothing if (avail_in == 0)
134 cmp dword [rcx + _avail_in], 0
135 jne skip1
136
137 ;; Set stream's next state
138 mov rdx, ZSTATE_FLUSH_READ_BUFFER
139 mov rax, ZSTATE_BODY
140 cmp dword [rcx + _end_of_stream], 0
141 cmovne rax, rdx
142 cmp dword [rcx + _flush], _NO_FLUSH
143 cmovne rax, rdx
144 mov dword [rcx + _internal_state_state], eax
145 ret
146skip1:
147
148%ifdef ALIGN_STACK
149 push rbp
150 mov rbp, rsp
151 sub rsp, stack_size
152 and rsp, ~15
153%else
154 sub rsp, stack_size
155%endif
156
157 mov [rsp + gpr_save_mem_offset + 0*8], rbx
158 mov [rsp + gpr_save_mem_offset + 1*8], rsi
159 mov [rsp + gpr_save_mem_offset + 2*8], rdi
160 mov [rsp + gpr_save_mem_offset + 3*8], rbp
161 mov [rsp + gpr_save_mem_offset + 4*8], r12
162 mov [rsp + gpr_save_mem_offset + 5*8], r13
163 mov [rsp + gpr_save_mem_offset + 6*8], r14
164 mov [rsp + gpr_save_mem_offset + 7*8], r15
7c673cae
FG
165
166 mov stream, rcx
7c673cae
FG
167 mov dword [stream + _internal_state_has_eob], 0
168
224ce89b
WB
169 MOVDQU xmask, [mask]
170
7c673cae
FG
171 ; state->bitbuf.set_buf(stream->next_out, stream->avail_out);
172 mov m_out_buf, [stream + _next_out]
173 mov [stream + _internal_state_bitbuf_m_out_start], m_out_buf
174 mov tmp1 %+ d, [stream + _avail_out]
175 add tmp1, m_out_buf
176 sub tmp1, SLOP
224ce89b 177
7c673cae
FG
178 mov [stream + _internal_state_bitbuf_m_out_end], tmp1
179
180 mov m_bits, [stream + _internal_state_bitbuf_m_bits]
181 mov m_bit_count %+ d, [stream + _internal_state_bitbuf_m_bit_count]
7c673cae 182 mov hufftables, [stream + _hufftables]
224ce89b
WB
183
184 mov file_start, [stream + _next_in]
185
186 mov f_i %+ d, dword [stream + _total_in]
187 sub file_start, f_i
188
189 mov f_end_i %+ d, [stream + _avail_in]
190 add f_end_i, f_i
191
192 ; f_end_i -= LA;
7c673cae 193 sub f_end_i, LA
7c673cae 194 mov [rsp + f_end_i_mem_offset], f_end_i
224ce89b
WB
195 ; if (f_end_i <= 0) continue;
196
197 cmp f_end_i, f_i
198 jle input_end
7c673cae
FG
199
200 ; for (f_i = f_start_i; f_i < f_end_i; f_i++) {
224ce89b
WB
201MARK __body_compute_hash_ %+ ARCH
202 MOVDQU xdata, [file_start + f_i]
203 mov curr_data, [file_start + f_i]
204 mov tmp3, curr_data
205 mov tmp6, curr_data
7c673cae 206
224ce89b 207 compute_hash hash, curr_data
7c673cae 208
224ce89b
WB
209 shr tmp3, 8
210 compute_hash hash2, tmp3
7c673cae 211
224ce89b
WB
212 and hash, HASH_MASK
213 and hash2, HASH_MASK
7c673cae 214
224ce89b
WB
215 cmp dword [stream + _internal_state_has_hist], 0
216 je write_first_byte
7c673cae 217
224ce89b 218 jmp loop2
7c673cae
FG
219 align 16
220
221loop2:
7c673cae
FG
222 ; if (state->bitbuf.is_full()) {
223 cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
224ce89b 224 ja output_end
7c673cae
FG
225
226 xor dist, dist
227 xor dist2, dist2
228 xor tmp3, tmp3
229
230 lea tmp1, [file_start + f_i]
7c673cae
FG
231
232 mov dist %+ w, f_i %+ w
224ce89b 233 dec dist
7c673cae 234 sub dist %+ w, word [stream + _internal_state_head + 2 * hash]
7c673cae
FG
235 mov [stream + _internal_state_head + 2 * hash], f_i %+ w
236
237 inc f_i
238
224ce89b
WB
239 MOVQ tmp6, xdata
240 shr tmp5, 16
241 mov tmp8, tmp5
242 compute_hash tmp6, tmp5
243
7c673cae 244 mov dist2 %+ w, f_i %+ w
7c673cae 245 dec dist2
224ce89b 246 sub dist2 %+ w, word [stream + _internal_state_head + 2 * hash2]
7c673cae
FG
247 mov [stream + _internal_state_head + 2 * hash2], f_i %+ w
248
7c673cae 249 ; if ((dist-1) < (D-1)) {
224ce89b
WB
250 and dist %+ d, (D-1)
251 neg dist
7c673cae 252
224ce89b
WB
253 shr tmp8, 8
254 compute_hash tmp2, tmp8
7c673cae 255
224ce89b
WB
256 and dist2 %+ d, (D-1)
257 neg dist2
7c673cae 258
224ce89b 259MARK __body_compare_ %+ ARCH
7c673cae 260 ;; Check for long len/dist match (>7) with first literal
224ce89b
WB
261 MOVQ len, xdata
262 mov curr_data, len
263 PSRLDQ xdata, 1
264 xor len, [tmp1 + dist - 1]
7c673cae
FG
265 jz compare_loop
266
224ce89b
WB
267 MOVD xhash, tmp6 %+ d
268 PINSRD xhash, tmp2 %+ d, 1
269 PAND xhash, xhash, xmask
7c673cae
FG
270
271 ;; Check for len/dist match (>7) with second literal
224ce89b
WB
272 MOVQ len2, xdata
273 xor len2, [tmp1 + dist2]
7c673cae
FG
274 jz compare_loop2
275
7c673cae
FG
276 ;; Specutively load the code for the first literal
277 movzx tmp1, curr_data %+ b
278 get_lit_code tmp1, code3, rcx, hufftables
279
280 ;; Check for len/dist match for first literal
224ce89b 281 test len %+ d, 0xFFFFFFFF
7c673cae
FG
282 jz len_dist_huffman_pre
283
284 ;; Specutively load the code for the second literal
285 shr curr_data, 8
286 and curr_data, 0xff
287 get_lit_code curr_data, code2, code_len2, hufftables
288
224ce89b 289 SHLX code2, code2, rcx
7c673cae
FG
290 or code2, code3
291 add code_len2, rcx
292
293 ;; Check for len/dist match for second literal
224ce89b 294 test len2 %+ d, 0xFFFFFFFF
7c673cae
FG
295 jnz write_lit_bits
296
224ce89b 297MARK __body_len_dist_lit_huffman_ %+ ARCH
7c673cae
FG
298len_dist_lit_huffman_pre:
299 mov code_len3, rcx
7c673cae
FG
300 bsf len2, len2
301 shr len2, 3
302
303len_dist_lit_huffman:
224ce89b
WB
304 neg dist2
305
7c673cae
FG
306%ifndef LONGER_HUFFTABLE
307 mov tmp4, dist2
308 get_dist_code tmp4, code4, code_len2, hufftables ;; clobbers dist, rcx
309%else
224ce89b 310 get_dist_code dist2, code4, code_len2, hufftables
7c673cae
FG
311%endif
312 get_len_code len2, code, rcx, hufftables ;; rcx is code_len
313
224ce89b 314 SHLX code4, code4, rcx
7c673cae
FG
315 or code4, code
316 add code_len2, rcx
317
224ce89b
WB
318 add f_i, len2
319 neg len2
7c673cae 320
224ce89b
WB
321 MOVQ tmp5, xdata
322 shr tmp5, 24
323 compute_hash tmp4, tmp5
324 and tmp4, HASH_MASK
325
326 SHLX code4, code4, code_len3
7c673cae 327 or code4, code3
224ce89b 328 add code_len2, code_len3
7c673cae 329
7c673cae 330 ;; Setup for updating hash
224ce89b 331 lea tmp3, [f_i + len2 + 1] ; tmp3 <= k
7c673cae 332
224ce89b
WB
333 MOVDQU xdata, [file_start + f_i]
334 mov curr_data, [file_start + f_i]
335 mov curr_data2, curr_data
7c673cae 336
224ce89b
WB
337 MOVD hash %+ d, xhash
338 PEXTRD hash2 %+ d, xhash, 1
7c673cae
FG
339 mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
340
224ce89b
WB
341 compute_hash hash, curr_data
342
7c673cae 343 add tmp3,1
224ce89b 344 mov [stream + _internal_state_head + 2 * hash2], tmp3 %+ w
7c673cae 345
224ce89b
WB
346 add tmp3, 1
347 mov [stream + _internal_state_head + 2 * tmp4], tmp3 %+ w
348
349 write_bits m_bits, m_bit_count, code4, code_len2, m_out_buf, tmp4
350 mov f_end_i, [rsp + f_end_i_mem_offset]
351
352 shr curr_data2, 8
353 compute_hash hash2, curr_data2
354
355%ifdef NO_LIMIT_HASH_UPDATE
356loop3:
357 add tmp3,1
358 cmp tmp3, f_i
359 jae loop3_done
360 mov tmp6, [file_start + tmp3]
361 compute_hash tmp4, tmp6
362 and tmp4 %+ d, HASH_MASK
363 ; state->head[hash] = k;
364 mov [stream + _internal_state_head + 2 * tmp4], tmp3 %+ w
365 jmp loop3
366loop3_done:
367%endif
368 ; hash = compute_hash(state->file_start + f_i) & HASH_MASK;
369 and hash %+ d, HASH_MASK
370 and hash2 %+ d, HASH_MASK
371
372 ; continue
373 cmp f_i, f_end_i
374 jl loop2
375 jmp input_end
7c673cae
FG
376 ;; encode as dist/len
377
224ce89b 378MARK __body_len_dist_huffman_ %+ ARCH
7c673cae
FG
379len_dist_huffman_pre:
380 bsf len, len
381 shr len, 3
224ce89b 382
7c673cae 383len_dist_huffman:
224ce89b
WB
384 dec f_i
385 neg dist
7c673cae
FG
386
387 ; get_dist_code(dist, &code2, &code_len2);
388%ifndef LONGER_HUFFTABLE
389 mov tmp3, dist ; since code2 and dist are rbx
390 get_dist_code tmp3, code2, code_len2, hufftables ;; clobbers dist, rcx
391%else
224ce89b 392 get_dist_code dist, code2, code_len2, hufftables
7c673cae
FG
393%endif
394 ; get_len_code(len, &code, &code_len);
395 get_len_code len, code, rcx, hufftables ;; rcx is code_len
396
397 ; code2 <<= code_len
398 ; code2 |= code
399 ; code_len2 += code_len
224ce89b 400 SHLX code2, code2, rcx
7c673cae
FG
401 or code2, code
402 add code_len2, rcx
403
404 ;; Setup for updateing hash
405 lea tmp3, [f_i + 2] ; tmp3 <= k
406 add f_i, len
7c673cae 407
224ce89b
WB
408 MOVD hash %+ d, xhash
409 PEXTRD hash2 %+ d, xhash, 1
410 mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
411 add tmp3,1
412 mov [stream + _internal_state_head + 2 * hash2], tmp3 %+ w
413
414 MOVDQU xdata, [file_start + f_i]
415 mov curr_data, [file_start + f_i]
7c673cae
FG
416 mov curr_data2, curr_data
417 compute_hash hash, curr_data
7c673cae 418
224ce89b
WB
419 write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp7
420 mov f_end_i, [rsp + f_end_i_mem_offset]
7c673cae 421
224ce89b
WB
422 shr curr_data2, 8
423 compute_hash hash2, curr_data2
424
425%ifdef NO_LIMIT_HASH_UPDATE
426loop4:
427 add tmp3,1
7c673cae 428 cmp tmp3, f_i
224ce89b
WB
429 jae loop4_done
430 mov tmp6, [file_start + tmp3]
431 compute_hash tmp4, tmp6
432 and tmp4, HASH_MASK
433 mov [stream + _internal_state_head + 2 * tmp4], tmp3 %+ w
434 jmp loop4
435loop4_done:
7c673cae
FG
436%endif
437
224ce89b
WB
438 ; hash = compute_hash(state->file_start + f_i) & HASH_MASK;
439 and hash %+ d, HASH_MASK
440 and hash2 %+ d, HASH_MASK
7c673cae
FG
441
442 ; continue
443 cmp f_i, f_end_i
444 jl loop2
224ce89b 445 jmp input_end
7c673cae 446
224ce89b 447MARK __body_write_lit_bits_ %+ ARCH
7c673cae 448write_lit_bits:
224ce89b 449 MOVDQU xdata, [file_start + f_i + 1]
7c673cae
FG
450 mov f_end_i, [rsp + f_end_i_mem_offset]
451 add f_i, 1
224ce89b 452 mov curr_data, [file_start + f_i]
7c673cae 453
224ce89b 454 MOVD hash %+ d, xhash
7c673cae
FG
455
456 write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp3
457
224ce89b
WB
458 PEXTRD hash2 %+ d, xhash, 1
459
7c673cae
FG
460 ; continue
461 cmp f_i, f_end_i
462 jl loop2
463
224ce89b 464input_end:
7c673cae
FG
465 mov tmp1, ZSTATE_FLUSH_READ_BUFFER
466 mov tmp5, ZSTATE_BODY
467 cmp dword [stream + _end_of_stream], 0
468 cmovne tmp5, tmp1
469 cmp dword [stream + _flush], _NO_FLUSH
470 cmovne tmp5, tmp1
471 mov dword [stream + _internal_state_state], tmp5 %+ d
224ce89b
WB
472
473output_end:
474 ;; update input buffer
475 add f_end_i, LA
476 mov [stream + _total_in], f_i %+ d
477 add file_start, f_i
478 mov [stream + _next_in], file_start
479 sub f_end_i, f_i
480 mov [stream + _avail_in], f_end_i %+ d
481
482 ;; update output buffer
7c673cae 483 mov [stream + _next_out], m_out_buf
7c673cae
FG
484 sub m_out_buf, [stream + _internal_state_bitbuf_m_out_start]
485 sub [stream + _avail_out], m_out_buf %+ d
486 add [stream + _total_out], m_out_buf %+ d
487
488 mov [stream + _internal_state_bitbuf_m_bits], m_bits
489 mov [stream + _internal_state_bitbuf_m_bit_count], m_bit_count %+ d
490
7c673cae
FG
491 mov rbx, [rsp + gpr_save_mem_offset + 0*8]
492 mov rsi, [rsp + gpr_save_mem_offset + 1*8]
493 mov rdi, [rsp + gpr_save_mem_offset + 2*8]
494 mov rbp, [rsp + gpr_save_mem_offset + 3*8]
495 mov r12, [rsp + gpr_save_mem_offset + 4*8]
496 mov r13, [rsp + gpr_save_mem_offset + 5*8]
497 mov r14, [rsp + gpr_save_mem_offset + 6*8]
498 mov r15, [rsp + gpr_save_mem_offset + 7*8]
7c673cae
FG
499
500%ifndef ALIGN_STACK
501 add rsp, stack_size
502%else
503 mov rsp, rbp
504 pop rbp
505%endif
506 ret
507
224ce89b 508MARK __body_compare_loops_ %+ ARCH
7c673cae 509compare_loop:
224ce89b
WB
510 MOVD xhash, tmp6 %+ d
511 PINSRD xhash, tmp2 %+ d, 1
512 PAND xhash, xhash, xmask
513 lea tmp2, [tmp1 + dist - 1]
7c673cae
FG
514%if (COMPARE_TYPE == 1)
515 compare250 tmp1, tmp2, len, tmp3
516%elif (COMPARE_TYPE == 2)
517 compare250_x tmp1, tmp2, len, tmp3, xtmp0, xtmp1
518%elif (COMPARE_TYPE == 3)
519 compare250_y tmp1, tmp2, len, tmp3, ytmp0, ytmp1
520%else
521 %error Unknown Compare type COMPARE_TYPE
522 % error
523%endif
524 jmp len_dist_huffman
525
526compare_loop2:
224ce89b
WB
527 lea tmp2, [tmp1 + dist2]
528 add tmp1, 1
7c673cae
FG
529%if (COMPARE_TYPE == 1)
530 compare250 tmp1, tmp2, len2, tmp3
531%elif (COMPARE_TYPE == 2)
532 compare250_x tmp1, tmp2, len2, tmp3, xtmp0, xtmp1
533%elif (COMPARE_TYPE == 3)
534 compare250_y tmp1, tmp2, len2, tmp3, ytmp0, ytmp1
535%else
536%error Unknown Compare type COMPARE_TYPE
537 % error
538%endif
539 and curr_data, 0xff
540 get_lit_code curr_data, code3, code_len3, hufftables
541 jmp len_dist_lit_huffman
542
543MARK __write_first_byte_ %+ ARCH
544write_first_byte:
545 cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
224ce89b
WB
546 ja output_end
547
548 mov dword [stream + _internal_state_has_hist], 1
7c673cae 549
7c673cae 550 mov [stream + _internal_state_head + 2 * hash], f_i %+ w
224ce89b
WB
551
552 mov hash, hash2
553 shr tmp6, 16
554 compute_hash hash2, tmp6
555
556 MOVD xhash, hash %+ d
557 PINSRD xhash, hash2 %+ d, 1
558 PAND xhash, xhash, xmask
559
7c673cae
FG
560 and curr_data, 0xff
561 get_lit_code curr_data, code2, code_len2, hufftables
562 jmp write_lit_bits
563
564section .data
224ce89b
WB
565 align 16
566mask: dd HASH_MASK, HASH_MASK, HASH_MASK, HASH_MASK
7c673cae 567const_D: dq D