]> git.proxmox.com Git - ceph.git/blame - ceph/src/isa-l/igzip/igzip_decode_block_stateless.asm
Import ceph 15.2.8
[ceph.git] / ceph / src / isa-l / igzip / igzip_decode_block_stateless.asm
CommitLineData
f91f0fd5
TL
1;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2; Copyright(c) 2011-2018 Intel Corporation All rights reserved.
3;
4; Redistribution and use in source and binary forms, with or without
5; modification, are permitted provided that the following conditions
6; are met:
7; * Redistributions of source code must retain the above copyright
8; notice, this list of conditions and the following disclaimer.
9; * Redistributions in binary form must reproduce the above copyright
10; notice, this list of conditions and the following disclaimer in
11; the documentation and/or other materials provided with the
12; distribution.
13; * Neither the name of Intel Corporation nor the names of its
14; contributors may be used to endorse or promote products derived
15; from this software without specific prior written permission.
16;
17; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
224ce89b
WB
30default rel
31
32%include "reg_sizes.asm"
33
34%define DECOMP_OK 0
35%define END_INPUT 1
36%define OUT_OVERFLOW 2
37%define INVALID_BLOCK -1
38%define INVALID_SYMBOL -2
39%define INVALID_LOOKBACK -3
40
41%define ISAL_DECODE_LONG_BITS 12
42%define ISAL_DECODE_SHORT_BITS 10
43
224ce89b
WB
44%define COPY_SIZE 16
45%define COPY_LEN_MAX 258
46
47%define IN_BUFFER_SLOP 8
48%define OUT_BUFFER_SLOP COPY_SIZE + COPY_LEN_MAX
49
50%include "inflate_data_structs.asm"
51%include "stdmac.asm"
52
53extern rfc1951_lookup_table
54
f91f0fd5
TL
55
56
57%define LARGE_SHORT_SYM_LEN 25
58%define LARGE_SHORT_SYM_MASK ((1 << LARGE_SHORT_SYM_LEN) - 1)
59%define LARGE_LONG_SYM_LEN 10
60%define LARGE_LONG_SYM_MASK ((1 << LARGE_LONG_SYM_LEN) - 1)
61%define LARGE_SHORT_CODE_LEN_OFFSET 28
62%define LARGE_LONG_CODE_LEN_OFFSET 10
63%define LARGE_FLAG_BIT_OFFSET 25
64%define LARGE_FLAG_BIT (1 << LARGE_FLAG_BIT_OFFSET)
65%define LARGE_SYM_COUNT_OFFSET 26
66%define LARGE_SYM_COUNT_LEN 2
67%define LARGE_SYM_COUNT_MASK ((1 << LARGE_SYM_COUNT_LEN) - 1)
68%define LARGE_SHORT_MAX_LEN_OFFSET 26
69
70%define SMALL_SHORT_SYM_LEN 9
71%define SMALL_SHORT_SYM_MASK ((1 << SMALL_SHORT_SYM_LEN) - 1)
72%define SMALL_LONG_SYM_LEN 9
73%define SMALL_LONG_SYM_MASK ((1 << SMALL_LONG_SYM_LEN) - 1)
74%define SMALL_SHORT_CODE_LEN_OFFSET 11
75%define SMALL_LONG_CODE_LEN_OFFSET 10
76%define SMALL_FLAG_BIT_OFFSET 10
77%define SMALL_FLAG_BIT (1 << SMALL_FLAG_BIT_OFFSET)
78
79%define DIST_SYM_OFFSET 0
80%define DIST_SYM_LEN 5
81%define DIST_SYM_MASK ((1 << DIST_SYM_LEN) - 1)
82%define DIST_SYM_EXTRA_OFFSET 5
83%define DIST_SYM_EXTRA_LEN 4
84%define DIST_SYM_EXTRA_MASK ((1 << DIST_SYM_EXTRA_LEN) - 1)
85
224ce89b
WB
86;; rax
87%define tmp3 rax
88%define read_in_2 rax
89%define look_back_dist rax
90
91;; rcx
92;; rdx arg3
93%define next_sym2 rdx
94%define copy_start rdx
95%define tmp4 rdx
96
97;; rdi arg1
98%define tmp1 rdi
99%define look_back_dist2 rdi
100%define next_bits2 rdi
101%define next_sym3 rdi
102
103;; rsi arg2
104%define tmp2 rsi
f91f0fd5 105%define next_sym_num rsi
224ce89b
WB
106%define next_bits rsi
107
108;; rbx ; Saved
109%define next_in rbx
110
111;; rbp ; Saved
112%define end_in rbp
113
114;; r8
115%define repeat_length r8
116
117;; r9
118%define read_in r9
119
120;; r10
121%define read_in_length r10
122
123;; r11
124%define state r11
125
126;; r12 ; Saved
127%define next_out r12
128
129;; r13 ; Saved
130%define end_out r13
131
132;; r14 ; Saved
133%define next_sym r14
134
135;; r15 ; Saved
136%define rfc_lookup r15
137
138start_out_mem_offset equ 0
139read_in_mem_offset equ 8
140read_in_length_mem_offset equ 16
f91f0fd5
TL
141next_out_mem_offset equ 24
142gpr_save_mem_offset equ 32
143stack_size equ 4 * 8 + 8 * 8
224ce89b
WB
144
145%define _dist_extra_bit_count 264
146%define _dist_start _dist_extra_bit_count + 1*32
147%define _len_extra_bit_count _dist_start + 4*32
148%define _len_start _len_extra_bit_count + 1*32
149
150%ifidn __OUTPUT_FORMAT__, elf64
151%define arg0 rdi
f91f0fd5 152%define arg1 rsi
224ce89b
WB
153
154%macro FUNC_SAVE 0
155%ifdef ALIGN_STACK
156 push rbp
157 mov rbp, rsp
158 sub rsp, stack_size
159 and rsp, ~15
160%else
161 sub rsp, stack_size
162%endif
163
164 mov [rsp + gpr_save_mem_offset + 0*8], rbx
165 mov [rsp + gpr_save_mem_offset + 1*8], rbp
166 mov [rsp + gpr_save_mem_offset + 2*8], r12
167 mov [rsp + gpr_save_mem_offset + 3*8], r13
168 mov [rsp + gpr_save_mem_offset + 4*8], r14
169 mov [rsp + gpr_save_mem_offset + 5*8], r15
170%endm
171
172%macro FUNC_RESTORE 0
173 mov rbx, [rsp + gpr_save_mem_offset + 0*8]
174 mov rbp, [rsp + gpr_save_mem_offset + 1*8]
175 mov r12, [rsp + gpr_save_mem_offset + 2*8]
176 mov r13, [rsp + gpr_save_mem_offset + 3*8]
177 mov r14, [rsp + gpr_save_mem_offset + 4*8]
178 mov r15, [rsp + gpr_save_mem_offset + 5*8]
179
180%ifndef ALIGN_STACK
181 add rsp, stack_size
182%else
183 mov rsp, rbp
184 pop rbp
185%endif
186%endm
187%endif
188
189%ifidn __OUTPUT_FORMAT__, win64
190%define arg0 rcx
f91f0fd5
TL
191%define arg1 rdx
192
224ce89b
WB
193%macro FUNC_SAVE 0
194%ifdef ALIGN_STACK
195 push rbp
196 mov rbp, rsp
197 sub rsp, stack_size
198 and rsp, ~15
199%else
200 sub rsp, stack_size
201%endif
202
203 mov [rsp + gpr_save_mem_offset + 0*8], rbx
204 mov [rsp + gpr_save_mem_offset + 1*8], rsi
205 mov [rsp + gpr_save_mem_offset + 2*8], rdi
206 mov [rsp + gpr_save_mem_offset + 3*8], rbp
207 mov [rsp + gpr_save_mem_offset + 4*8], r12
208 mov [rsp + gpr_save_mem_offset + 5*8], r13
209 mov [rsp + gpr_save_mem_offset + 6*8], r14
210 mov [rsp + gpr_save_mem_offset + 7*8], r15
211%endm
212
213%macro FUNC_RESTORE 0
214 mov rbx, [rsp + gpr_save_mem_offset + 0*8]
215 mov rsi, [rsp + gpr_save_mem_offset + 1*8]
216 mov rdi, [rsp + gpr_save_mem_offset + 2*8]
217 mov rbp, [rsp + gpr_save_mem_offset + 3*8]
218 mov r12, [rsp + gpr_save_mem_offset + 4*8]
219 mov r13, [rsp + gpr_save_mem_offset + 5*8]
220 mov r14, [rsp + gpr_save_mem_offset + 6*8]
221 mov r15, [rsp + gpr_save_mem_offset + 7*8]
222
223%ifndef ALIGN_STACK
224 add rsp, stack_size
225%else
226 mov rsp, rbp
227 pop rbp
228%endif
229%endm
230%endif
231
232;; Load read_in and updated in_buffer accordingly
233;; when there are at least 8 bytes in the in buffer
234;; Clobbers rcx, unless rcx is %%read_in_length
235%macro inflate_in_load 6
236%define %%next_in %1
237%define %%end_in %2
238%define %%read_in %3
239%define %%read_in_length %4
240%define %%tmp1 %5 ; Tmp registers
241%define %%tmp2 %6
242
243 SHLX %%tmp1, [%%next_in], %%read_in_length
244 or %%read_in, %%tmp1
245
246 mov %%tmp1, 64
247 sub %%tmp1, %%read_in_length
248 shr %%tmp1, 3
249
250 add %%next_in, %%tmp1
251 lea %%read_in_length, [%%read_in_length + 8 * %%tmp1]
252%%end:
253%endm
254
255;; Load read_in and updated in_buffer accordingly
256;; Clobbers rcx, unless rcx is %%read_in_length
257%macro inflate_in_small_load 6
258%define %%next_in %1
259%define %%end_in %2
260%define %%read_in %3
261%define %%read_in_length %4
262%define %%avail_in %5 ; Tmp registers
263%define %%tmp1 %5
264%define %%loop_count %6
265
266 mov %%avail_in, %%end_in
267 sub %%avail_in, %%next_in
268
269%ifnidn %%read_in_length, rcx
270 mov rcx, %%read_in_length
271%endif
272
273 mov %%loop_count, 64
274 sub %%loop_count, %%read_in_length
275 shr %%loop_count, 3
276
277 cmp %%loop_count, %%avail_in
278 cmovg %%loop_count, %%avail_in
279 cmp %%loop_count, 0
280 je %%end
281
282%%load_byte:
283 xor %%tmp1, %%tmp1
284 mov %%tmp1 %+ b, byte [%%next_in]
285 SHLX %%tmp1, %%tmp1, rcx
286 or %%read_in, %%tmp1
287 add rcx, 8
288 add %%next_in, 1
289 sub %%loop_count, 1
290 jg %%load_byte
291%ifnidn %%read_in_length, rcx
292 mov %%read_in_length, rcx
293%endif
294%%end:
295%endm
296
f91f0fd5
TL
297;; Clears all bits at index %%bit_count and above in %%next_bits
298;; May clobber rcx and %%bit_count
299%macro CLEAR_HIGH_BITS 3
300%define %%next_bits %1
301%define %%bit_count %2
302%define %%lookup_size %3
303
304 sub %%bit_count, 0x40 + %%lookup_size
305;; Extract the 15-DECODE_LOOKUP_SIZE bits beyond the first DECODE_LOOKUP_SIZE bits.
306%ifdef USE_HSWNI
307 and %%bit_count, 0x1F
308 bzhi %%next_bits, %%next_bits, %%bit_count
309%else
310%ifnidn %%bit_count, rcx
311 mov rcx, %%bit_count
312%endif
313 neg rcx
314 shl %%next_bits, cl
315 shr %%next_bits, cl
316%endif
317
318%endm
319
224ce89b
WB
320;; Decode next symbol
321;; Clobber rcx
f91f0fd5 322%macro decode_next_lit_len 8
224ce89b
WB
323%define %%state %1 ; State structure associated with compressed stream
324%define %%lookup_size %2 ; Number of bits used for small lookup
f91f0fd5 325%define %%state_offset %3 ; Type of huff code, should be either LIT or DIST
224ce89b
WB
326%define %%read_in %4 ; Bits read in from compressed stream
327%define %%read_in_length %5 ; Number of valid bits in read_in
f91f0fd5
TL
328%define %%next_sym %6 ; Returned symbols
329%define %%next_sym_num %7 ; Returned symbols count
330%define %%next_bits %8
224ce89b 331
f91f0fd5 332 mov %%next_sym_num, %%next_sym
224ce89b 333 mov rcx, %%next_sym
f91f0fd5 334 shr rcx, LARGE_SHORT_CODE_LEN_OFFSET
224ce89b
WB
335 jz invalid_symbol
336
f91f0fd5
TL
337 and %%next_sym_num, LARGE_SYM_COUNT_MASK << LARGE_SYM_COUNT_OFFSET
338 shr %%next_sym_num, LARGE_SYM_COUNT_OFFSET
339
224ce89b 340 ;; Check if symbol or hint was looked up
f91f0fd5
TL
341 and %%next_sym, LARGE_FLAG_BIT | LARGE_SHORT_SYM_MASK
342 test %%next_sym, LARGE_FLAG_BIT
343 jz %%end
224ce89b 344
f91f0fd5
TL
345 shl rcx, LARGE_SYM_COUNT_LEN
346 or rcx, %%next_sym_num
224ce89b 347
f91f0fd5
TL
348 ;; Save length associated with symbol
349 mov %%next_bits, %%read_in
350 shr %%next_bits, %%lookup_size
351
352 ;; Extract the bits beyond the first %%lookup_size bits.
353 CLEAR_HIGH_BITS %%next_bits, rcx, %%lookup_size
224ce89b 354
f91f0fd5
TL
355 and %%next_sym, LARGE_SHORT_SYM_MASK
356 add %%next_sym, %%next_bits
224ce89b
WB
357
358 ;; Lookup actual next symbol
f91f0fd5
TL
359 movzx %%next_sym, word [%%state + LARGE_LONG_CODE_SIZE * %%next_sym + %%state_offset + LARGE_SHORT_CODE_SIZE * (1 << %%lookup_size)]
360 mov %%next_sym_num, 1
224ce89b
WB
361
362 ;; Save length associated with symbol
363 mov rcx, %%next_sym
f91f0fd5 364 shr rcx, LARGE_LONG_CODE_LEN_OFFSET
224ce89b 365 jz invalid_symbol
f91f0fd5
TL
366 and %%next_sym, LARGE_LONG_SYM_MASK
367
224ce89b 368%%end:
f91f0fd5 369;; Updated read_in to reflect the bits which were decoded
224ce89b 370 SHRX %%read_in, %%read_in, rcx
f91f0fd5 371 sub %%read_in_length, rcx
224ce89b
WB
372%endm
373
f91f0fd5
TL
374;; Decode next symbol
375;; Clobber rcx
376%macro decode_next_lit_len_with_load 8
377%define %%state %1 ; State structure associated with compressed stream
378%define %%lookup_size %2 ; Number of bits used for small lookup
379%define %%state_offset %3
380%define %%read_in %4 ; Bits read in from compressed stream
381%define %%read_in_length %5 ; Number of valid bits in read_in
382%define %%next_sym %6 ; Returned symbols
383%define %%next_sym_num %7 ; Returned symbols count
384%define %%next_bits %8
385
386 ;; Lookup possible next symbol
387 mov %%next_bits, %%read_in
388 and %%next_bits, (1 << %%lookup_size) - 1
389 mov %%next_sym %+ d, dword [%%state + %%state_offset + LARGE_SHORT_CODE_SIZE * %%next_bits]
390
391 decode_next_lit_len %%state, %%lookup_size, %%state_offset, %%read_in, %%read_in_length, %%next_sym, %%next_sym_num, %%next_bits
392%endm
224ce89b
WB
393
394;; Decode next symbol
395;; Clobber rcx
f91f0fd5 396%macro decode_next_dist 8
224ce89b
WB
397%define %%state %1 ; State structure associated with compressed stream
398%define %%lookup_size %2 ; Number of bits used for small lookup
399%define %%state_offset %3 ; Type of huff code, should be either LIT or DIST
400%define %%read_in %4 ; Bits read in from compressed stream
401%define %%read_in_length %5 ; Number of valid bits in read_in
402%define %%next_sym %6 ; Returned symobl
f91f0fd5
TL
403%define %%next_extra_bits %7
404%define %%next_bits %8
224ce89b
WB
405
406 mov rcx, %%next_sym
f91f0fd5
TL
407 shr rcx, SMALL_SHORT_CODE_LEN_OFFSET
408 jz invalid_dist_symbol_ %+ %%next_sym
224ce89b
WB
409
410 ;; Check if symbol or hint was looked up
f91f0fd5
TL
411 and %%next_sym, SMALL_FLAG_BIT | SMALL_SHORT_SYM_MASK
412 test %%next_sym, SMALL_FLAG_BIT
413 jz %%end
414
415 ;; Save length associated with symbol
416 mov %%next_bits, %%read_in
417 shr %%next_bits, %%lookup_size
224ce89b
WB
418
419 ;; Extract the 15-DECODE_LOOKUP_SIZE bits beyond the first %%lookup_size bits.
f91f0fd5 420 lea %%next_sym, [%%state + SMALL_LONG_CODE_SIZE * %%next_sym]
224ce89b 421
f91f0fd5 422 CLEAR_HIGH_BITS %%next_bits, rcx, %%lookup_size
224ce89b
WB
423
424 ;; Lookup actual next symbol
f91f0fd5 425 movzx %%next_sym, word [%%next_sym + %%state_offset + SMALL_LONG_CODE_SIZE * %%next_bits + SMALL_SHORT_CODE_SIZE * (1 << %%lookup_size) - SMALL_LONG_CODE_SIZE * SMALL_FLAG_BIT]
224ce89b
WB
426
427 ;; Save length associated with symbol
428 mov rcx, %%next_sym
f91f0fd5
TL
429 shr rcx, SMALL_LONG_CODE_LEN_OFFSET
430 jz invalid_dist_symbol_ %+ %%next_sym
431 and %%next_sym, SMALL_SHORT_SYM_MASK
224ce89b
WB
432
433%%end:
434 ;; Updated read_in to reflect the bits which were decoded
435 SHRX %%read_in, %%read_in, rcx
436 sub %%read_in_length, rcx
f91f0fd5
TL
437 mov rcx, %%next_sym
438 shr rcx, DIST_SYM_EXTRA_OFFSET
439 and %%next_sym, DIST_SYM_MASK
440%endm
441
442;; Decode next symbol
443;; Clobber rcx
444%macro decode_next_dist_with_load 8
445%define %%state %1 ; State structure associated with compressed stream
446%define %%lookup_size %2 ; Number of bits used for small lookup
447%define %%state_offset %3
448%define %%read_in %4 ; Bits read in from compressed stream
449%define %%read_in_length %5 ; Number of valid bits in read_in
450%define %%next_sym %6 ; Returned symobl
451%define %%next_extra_bits %7
452%define %%next_bits %8
453
454 ;; Lookup possible next symbol
455 mov %%next_bits, %%read_in
456 and %%next_bits, (1 << %%lookup_size) - 1
457 movzx %%next_sym, word [%%state + %%state_offset + SMALL_SHORT_CODE_SIZE * %%next_bits]
458
459 decode_next_dist %%state, %%lookup_size, %%state_offset, %%read_in, %%read_in_length, %%next_sym, %%next_extra_bits, %%next_bits
224ce89b
WB
460%endm
461
462global decode_huffman_code_block_stateless_ %+ ARCH
463decode_huffman_code_block_stateless_ %+ ARCH %+ :
464
465 FUNC_SAVE
466
467 mov state, arg0
f91f0fd5 468 mov [rsp + start_out_mem_offset], arg1
224ce89b
WB
469 lea rfc_lookup, [rfc1951_lookup_table]
470
471 mov read_in,[state + _read_in]
472 mov read_in_length %+ d, dword [state + _read_in_length]
473 mov next_out, [state + _next_out]
474 mov end_out %+ d, dword [state + _avail_out]
475 add end_out, next_out
476 mov next_in, [state + _next_in]
477 mov end_in %+ d, dword [state + _avail_in]
478 add end_in, next_in
479
480 mov dword [state + _copy_overflow_len], 0
481 mov dword [state + _copy_overflow_dist], 0
482
224ce89b
WB
483 sub end_out, OUT_BUFFER_SLOP
484 sub end_in, IN_BUFFER_SLOP
485
486 cmp next_in, end_in
487 jg end_loop_block_pre
488
489 cmp read_in_length, 64
490 je skip_load
491
492 inflate_in_load next_in, end_in, read_in, read_in_length, tmp1, tmp2
493
494skip_load:
495 mov tmp3, read_in
496 and tmp3, (1 << ISAL_DECODE_LONG_BITS) - 1
f91f0fd5 497 mov next_sym %+ d, dword [state + _lit_huff_code + LARGE_SHORT_CODE_SIZE * tmp3]
224ce89b
WB
498
499;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
500; Main Loop
501;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
502loop_block:
503 ;; Check if near end of in buffer or out buffer
504 cmp next_in, end_in
505 jg end_loop_block_pre
506 cmp next_out, end_out
507 jg end_loop_block_pre
508
509 ;; Decode next symbol and reload the read_in buffer
f91f0fd5 510 decode_next_lit_len state, ISAL_DECODE_LONG_BITS, _lit_huff_code, read_in, read_in_length, next_sym, next_sym_num, tmp1
224ce89b 511
f91f0fd5
TL
512 ;; Specutively write next_sym if it is a literal
513 mov [next_out], next_sym
514 add next_out, next_sym_num
515 lea next_sym2, [8 * next_sym_num - 8]
516 SHRX next_sym2, next_sym, next_sym2
224ce89b
WB
517
518 ;; Find index to specutively preload next_sym from
f91f0fd5
TL
519 mov tmp3, (1 << ISAL_DECODE_LONG_BITS) - 1
520 and tmp3, read_in
224ce89b
WB
521
522 ;; Start reloading read_in
523 mov tmp1, [next_in]
524 SHLX tmp1, tmp1, read_in_length
525 or read_in, tmp1
526
527 ;; Specutively load data associated with length symbol
f91f0fd5 528 lea repeat_length, [next_sym2 - 254]
224ce89b
WB
529
530 ;; Test for end of block symbol
531 cmp next_sym2, 256
532 je end_symbol_pre
533
534 ;; Specutively load next_sym for next loop if a literal was decoded
f91f0fd5 535 mov next_sym %+ d, dword [state + _lit_huff_code + LARGE_SHORT_CODE_SIZE * tmp3]
224ce89b
WB
536
537 ;; Finish updating read_in_length for read_in
538 mov tmp1, 64
539 sub tmp1, read_in_length
540 shr tmp1, 3
541 add next_in, tmp1
542 lea read_in_length, [read_in_length + 8 * tmp1]
543
544 ;; Specultively load next dist code
f91f0fd5
TL
545 mov next_bits2, (1 << ISAL_DECODE_SHORT_BITS) - 1
546 and next_bits2, read_in
547 movzx next_sym3, word [state + _dist_huff_code + SMALL_SHORT_CODE_SIZE * next_bits2]
224ce89b
WB
548
549 ;; Check if next_sym2 is a literal, length, or end of block symbol
550 cmp next_sym2, 256
551 jl loop_block
552
553decode_len_dist:
f91f0fd5
TL
554 ;; Determine next_out after the copy is finished
555 lea next_out, [next_out + repeat_length - 1]
224ce89b
WB
556
557 ;; Decode distance code
f91f0fd5 558 decode_next_dist state, ISAL_DECODE_SHORT_BITS, _dist_huff_code, read_in, read_in_length, next_sym3, rcx, tmp2
224ce89b 559
224ce89b
WB
560 mov look_back_dist2 %+ d, [rfc_lookup + _dist_start + 4 * next_sym3]
561
f91f0fd5
TL
562 ; ;; Load distance code extra bits
563 mov next_bits, read_in
224ce89b
WB
564
565 ;; Calculate the look back distance
566 BZHI next_bits, next_bits, rcx, tmp4
f91f0fd5 567 SHRX read_in, read_in, rcx
224ce89b
WB
568
569 ;; Setup next_sym, read_in, and read_in_length for next loop
f91f0fd5
TL
570 mov read_in_2, (1 << ISAL_DECODE_LONG_BITS) - 1
571 and read_in_2, read_in
572 mov next_sym %+ d, dword [state + _lit_huff_code + LARGE_SHORT_CODE_SIZE * read_in_2]
224ce89b
WB
573 sub read_in_length, rcx
574
575 ;; Copy distance in len/dist pair
576 add look_back_dist2, next_bits
577
578 ;; Find beginning of copy
579 mov copy_start, next_out
580 sub copy_start, repeat_length
581 sub copy_start, look_back_dist2
582
583 ;; Check if a valid look back distances was decoded
584 cmp copy_start, [rsp + start_out_mem_offset]
585 jl invalid_look_back_distance
586 MOVDQU xmm1, [copy_start]
587
588 ;; Set tmp2 to be the minimum of COPY_SIZE and repeat_length
589 ;; This is to decrease use of small_byte_copy branch
590 mov tmp2, COPY_SIZE
591 cmp tmp2, repeat_length
592 cmovg tmp2, repeat_length
593
594 ;; Check for overlapping memory in the copy
595 cmp look_back_dist2, tmp2
596 jl small_byte_copy_pre
597
598large_byte_copy:
599 ;; Copy length distance pair when memory overlap is not an issue
600 MOVDQU [copy_start + look_back_dist2], xmm1
601
602 sub repeat_length, COPY_SIZE
603 jle loop_block
604
605 add copy_start, COPY_SIZE
606 MOVDQU xmm1, [copy_start]
607 jmp large_byte_copy
608
609small_byte_copy_pre:
610 ;; Copy length distance pair when source and destination overlap
611 add repeat_length, look_back_dist2
612small_byte_copy:
613 MOVDQU [copy_start + look_back_dist2], xmm1
614
615 shl look_back_dist2, 1
616 MOVDQU xmm1, [copy_start]
617 cmp look_back_dist2, COPY_SIZE
618 jl small_byte_copy
619
620 sub repeat_length, look_back_dist2
621 jge large_byte_copy
622 jmp loop_block
623
624;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
625; Finish Main Loop
626;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
627end_loop_block_pre:
628 ;; Fix up in buffer and out buffer to reflect the actual buffer end
629 add end_out, OUT_BUFFER_SLOP
630 add end_in, IN_BUFFER_SLOP
631
632end_loop_block:
633 ;; Load read in buffer and decode next lit/len symbol
634 inflate_in_small_load next_in, end_in, read_in, read_in_length, tmp1, tmp2
635 mov [rsp + read_in_mem_offset], read_in
636 mov [rsp + read_in_length_mem_offset], read_in_length
f91f0fd5 637 mov [rsp + next_out_mem_offset], next_out
224ce89b 638
f91f0fd5 639 decode_next_lit_len_with_load state, ISAL_DECODE_LONG_BITS, _lit_huff_code, read_in, read_in_length, next_sym, next_sym_num, tmp1
224ce89b
WB
640
641 ;; Check that enough input was available to decode symbol
642 cmp read_in_length, 0
643 jl end_of_input
644
f91f0fd5
TL
645multi_symbol_start:
646 cmp next_sym_num, 1
647 jg decode_literal
648
224ce89b
WB
649 cmp next_sym, 256
650 jl decode_literal
651 je end_symbol
652
653decode_len_dist_2:
f91f0fd5 654 lea repeat_length, [next_sym - 254]
224ce89b 655 ;; Decode distance code
f91f0fd5 656 decode_next_dist_with_load state, ISAL_DECODE_SHORT_BITS, _dist_huff_code, read_in, read_in_length, next_sym, rcx, tmp1
224ce89b
WB
657
658 ;; Load distance code extra bits
659 mov next_bits, read_in
660 mov look_back_dist %+ d, [rfc_lookup + _dist_start + 4 * next_sym]
224ce89b
WB
661
662 ;; Calculate the look back distance and check for enough input
663 BZHI next_bits, next_bits, rcx, tmp1
664 SHRX read_in, read_in, rcx
665 add look_back_dist, next_bits
666 sub read_in_length, rcx
667 jl end_of_input
668
669 ;; Setup code for byte copy using rep movsb
670 mov rsi, next_out
671 mov rdi, rsi
672 mov rcx, repeat_length
673 sub rsi, look_back_dist
674
675 ;; Check if a valid look back distance was decoded
676 cmp rsi, [rsp + start_out_mem_offset]
677 jl invalid_look_back_distance
678
679 ;; Check for out buffer overflow
680 add repeat_length, next_out
681 cmp repeat_length, end_out
682 jg out_buffer_overflow_repeat
683
684 mov next_out, repeat_length
685
686 rep movsb
687 jmp end_loop_block
688
689decode_literal:
690 ;; Store literal decoded from the input stream
691 cmp next_out, end_out
692 jge out_buffer_overflow_lit
693 add next_out, 1
694 mov byte [next_out - 1], next_sym %+ b
f91f0fd5
TL
695 sub next_sym_num, 1
696 jz end_loop_block
697 shr next_sym, 8
698 jmp multi_symbol_start
224ce89b
WB
699
700;; Set exit codes
701end_of_input:
702 mov read_in, [rsp + read_in_mem_offset]
703 mov read_in_length, [rsp + read_in_length_mem_offset]
f91f0fd5
TL
704 mov next_out, [rsp + next_out_mem_offset]
705 xor tmp1, tmp1
706 mov dword [state + _write_overflow_lits], tmp1 %+ d
707 mov dword [state + _write_overflow_len], tmp1 %+ d
224ce89b
WB
708 mov rax, END_INPUT
709 jmp end
710
711out_buffer_overflow_repeat:
712 mov rcx, end_out
713 sub rcx, next_out
714 sub repeat_length, rcx
715 sub repeat_length, next_out
716 rep movsb
717
718 mov [state + _copy_overflow_len], repeat_length %+ d
719 mov [state + _copy_overflow_dist], look_back_dist %+ d
720
721 mov next_out, end_out
722
723 mov rax, OUT_OVERFLOW
724 jmp end
725
726out_buffer_overflow_lit:
f91f0fd5
TL
727 mov dword [state + _write_overflow_lits], next_sym %+ d
728 mov dword [state + _write_overflow_len], next_sym_num %+ d
729 sub next_sym_num, 1
730 shl next_sym_num, 3
731 SHRX next_sym, next_sym, next_sym_num
224ce89b 732 mov rax, OUT_OVERFLOW
f91f0fd5
TL
733 shr next_sym_num, 3
734 cmp next_sym, 256
735 jl end
736 mov dword [state + _write_overflow_len], next_sym_num %+ d
737 jg decode_len_dist_2
738 jmp end_state
224ce89b
WB
739
740invalid_look_back_distance:
741 mov rax, INVALID_LOOKBACK
742 jmp end
743
f91f0fd5
TL
744invalid_dist_symbol_ %+ next_sym:
745 cmp read_in_length, next_sym
746 jl end_of_input
747 jmp invalid_symbol
748invalid_dist_symbol_ %+ next_sym3:
749 cmp read_in_length, next_sym3
750 jl end_of_input
224ce89b
WB
751invalid_symbol:
752 mov rax, INVALID_SYMBOL
753 jmp end
754
755end_symbol_pre:
756 ;; Fix up in buffer and out buffer to reflect the actual buffer
f91f0fd5 757 sub next_out, 1
224ce89b
WB
758 add end_out, OUT_BUFFER_SLOP
759 add end_in, IN_BUFFER_SLOP
760end_symbol:
f91f0fd5
TL
761 xor rax, rax
762end_state:
224ce89b
WB
763 ;; Set flag identifying a new block is required
764 mov byte [state + _block_state], ISAL_BLOCK_NEW_HDR
f91f0fd5
TL
765 cmp dword [state + _bfinal], 0
766 je end
767 mov byte [state + _block_state], ISAL_BLOCK_INPUT_DONE
768
224ce89b
WB
769end:
770 ;; Save current buffer states
771 mov [state + _read_in], read_in
772 mov [state + _read_in_length], read_in_length %+ d
f91f0fd5
TL
773
774 ;; Set avail_out
224ce89b
WB
775 sub end_out, next_out
776 mov dword [state + _avail_out], end_out %+ d
f91f0fd5
TL
777
778 ;; Set total_out
779 mov tmp1, next_out
780 sub tmp1, [state + _next_out]
781 add [state + _total_out], tmp1 %+ d
782
783 ;; Set next_out
784 mov [state + _next_out], next_out
785
786 ;; Set next_in
224ce89b 787 mov [state + _next_in], next_in
f91f0fd5
TL
788
789 ;; Set avail_in
224ce89b
WB
790 sub end_in, next_in
791 mov [state + _avail_in], end_in %+ d
792
793 FUNC_RESTORE
794
795 ret