]>
Commit | Line | Data |
---|---|---|
f91f0fd5 TL |
1 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
2 | ; Copyright(c) 2011-2018 Intel Corporation All rights reserved. | |
3 | ; | |
4 | ; Redistribution and use in source and binary forms, with or without | |
5 | ; modification, are permitted provided that the following conditions | |
6 | ; are met: | |
7 | ; * Redistributions of source code must retain the above copyright | |
8 | ; notice, this list of conditions and the following disclaimer. | |
9 | ; * Redistributions in binary form must reproduce the above copyright | |
10 | ; notice, this list of conditions and the following disclaimer in | |
11 | ; the documentation and/or other materials provided with the | |
12 | ; distribution. | |
13 | ; * Neither the name of Intel Corporation nor the names of its | |
14 | ; contributors may be used to endorse or promote products derived | |
15 | ; from this software without specific prior written permission. | |
16 | ; | |
17 | ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
18 | ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
19 | ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
20 | ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
21 | ; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
22 | ; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
23 | ; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
24 | ; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
25 | ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
26 | ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
27 | ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
28 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
29 | ||
224ce89b WB |
30 | default rel |
31 | ||
32 | %include "reg_sizes.asm" | |
33 | ||
34 | %define DECOMP_OK 0 | |
35 | %define END_INPUT 1 | |
36 | %define OUT_OVERFLOW 2 | |
37 | %define INVALID_BLOCK -1 | |
38 | %define INVALID_SYMBOL -2 | |
39 | %define INVALID_LOOKBACK -3 | |
40 | ||
41 | %define ISAL_DECODE_LONG_BITS 12 | |
42 | %define ISAL_DECODE_SHORT_BITS 10 | |
43 | ||
224ce89b WB |
44 | %define COPY_SIZE 16 |
45 | %define COPY_LEN_MAX 258 | |
46 | ||
47 | %define IN_BUFFER_SLOP 8 | |
48 | %define OUT_BUFFER_SLOP COPY_SIZE + COPY_LEN_MAX | |
49 | ||
50 | %include "inflate_data_structs.asm" | |
51 | %include "stdmac.asm" | |
52 | ||
53 | extern rfc1951_lookup_table | |
54 | ||
f91f0fd5 TL |
55 | |
56 | ||
57 | %define LARGE_SHORT_SYM_LEN 25 | |
58 | %define LARGE_SHORT_SYM_MASK ((1 << LARGE_SHORT_SYM_LEN) - 1) | |
59 | %define LARGE_LONG_SYM_LEN 10 | |
60 | %define LARGE_LONG_SYM_MASK ((1 << LARGE_LONG_SYM_LEN) - 1) | |
61 | %define LARGE_SHORT_CODE_LEN_OFFSET 28 | |
62 | %define LARGE_LONG_CODE_LEN_OFFSET 10 | |
63 | %define LARGE_FLAG_BIT_OFFSET 25 | |
64 | %define LARGE_FLAG_BIT (1 << LARGE_FLAG_BIT_OFFSET) | |
65 | %define LARGE_SYM_COUNT_OFFSET 26 | |
66 | %define LARGE_SYM_COUNT_LEN 2 | |
67 | %define LARGE_SYM_COUNT_MASK ((1 << LARGE_SYM_COUNT_LEN) - 1) | |
68 | %define LARGE_SHORT_MAX_LEN_OFFSET 26 | |
69 | ||
70 | %define SMALL_SHORT_SYM_LEN 9 | |
71 | %define SMALL_SHORT_SYM_MASK ((1 << SMALL_SHORT_SYM_LEN) - 1) | |
72 | %define SMALL_LONG_SYM_LEN 9 | |
73 | %define SMALL_LONG_SYM_MASK ((1 << SMALL_LONG_SYM_LEN) - 1) | |
74 | %define SMALL_SHORT_CODE_LEN_OFFSET 11 | |
75 | %define SMALL_LONG_CODE_LEN_OFFSET 10 | |
76 | %define SMALL_FLAG_BIT_OFFSET 10 | |
77 | %define SMALL_FLAG_BIT (1 << SMALL_FLAG_BIT_OFFSET) | |
78 | ||
79 | %define DIST_SYM_OFFSET 0 | |
80 | %define DIST_SYM_LEN 5 | |
81 | %define DIST_SYM_MASK ((1 << DIST_SYM_LEN) - 1) | |
82 | %define DIST_SYM_EXTRA_OFFSET 5 | |
83 | %define DIST_SYM_EXTRA_LEN 4 | |
84 | %define DIST_SYM_EXTRA_MASK ((1 << DIST_SYM_EXTRA_LEN) - 1) | |
85 | ||
224ce89b WB |
86 | ;; rax |
87 | %define tmp3 rax | |
88 | %define read_in_2 rax | |
89 | %define look_back_dist rax | |
90 | ||
91 | ;; rcx | |
92 | ;; rdx arg3 | |
93 | %define next_sym2 rdx | |
94 | %define copy_start rdx | |
95 | %define tmp4 rdx | |
96 | ||
97 | ;; rdi arg1 | |
98 | %define tmp1 rdi | |
99 | %define look_back_dist2 rdi | |
100 | %define next_bits2 rdi | |
101 | %define next_sym3 rdi | |
102 | ||
103 | ;; rsi arg2 | |
104 | %define tmp2 rsi | |
f91f0fd5 | 105 | %define next_sym_num rsi |
224ce89b WB |
106 | %define next_bits rsi |
107 | ||
108 | ;; rbx ; Saved | |
109 | %define next_in rbx | |
110 | ||
111 | ;; rbp ; Saved | |
112 | %define end_in rbp | |
113 | ||
114 | ;; r8 | |
115 | %define repeat_length r8 | |
116 | ||
117 | ;; r9 | |
118 | %define read_in r9 | |
119 | ||
120 | ;; r10 | |
121 | %define read_in_length r10 | |
122 | ||
123 | ;; r11 | |
124 | %define state r11 | |
125 | ||
126 | ;; r12 ; Saved | |
127 | %define next_out r12 | |
128 | ||
129 | ;; r13 ; Saved | |
130 | %define end_out r13 | |
131 | ||
132 | ;; r14 ; Saved | |
133 | %define next_sym r14 | |
134 | ||
135 | ;; r15 ; Saved | |
136 | %define rfc_lookup r15 | |
137 | ||
138 | start_out_mem_offset equ 0 | |
139 | read_in_mem_offset equ 8 | |
140 | read_in_length_mem_offset equ 16 | |
f91f0fd5 TL |
141 | next_out_mem_offset equ 24 |
142 | gpr_save_mem_offset equ 32 | |
143 | stack_size equ 4 * 8 + 8 * 8 | |
224ce89b WB |
144 | |
145 | %define _dist_extra_bit_count 264 | |
146 | %define _dist_start _dist_extra_bit_count + 1*32 | |
147 | %define _len_extra_bit_count _dist_start + 4*32 | |
148 | %define _len_start _len_extra_bit_count + 1*32 | |
149 | ||
150 | %ifidn __OUTPUT_FORMAT__, elf64 | |
151 | %define arg0 rdi | |
f91f0fd5 | 152 | %define arg1 rsi |
224ce89b WB |
153 | |
154 | %macro FUNC_SAVE 0 | |
155 | %ifdef ALIGN_STACK | |
156 | push rbp | |
157 | mov rbp, rsp | |
158 | sub rsp, stack_size | |
159 | and rsp, ~15 | |
160 | %else | |
161 | sub rsp, stack_size | |
162 | %endif | |
163 | ||
164 | mov [rsp + gpr_save_mem_offset + 0*8], rbx | |
165 | mov [rsp + gpr_save_mem_offset + 1*8], rbp | |
166 | mov [rsp + gpr_save_mem_offset + 2*8], r12 | |
167 | mov [rsp + gpr_save_mem_offset + 3*8], r13 | |
168 | mov [rsp + gpr_save_mem_offset + 4*8], r14 | |
169 | mov [rsp + gpr_save_mem_offset + 5*8], r15 | |
170 | %endm | |
171 | ||
172 | %macro FUNC_RESTORE 0 | |
173 | mov rbx, [rsp + gpr_save_mem_offset + 0*8] | |
174 | mov rbp, [rsp + gpr_save_mem_offset + 1*8] | |
175 | mov r12, [rsp + gpr_save_mem_offset + 2*8] | |
176 | mov r13, [rsp + gpr_save_mem_offset + 3*8] | |
177 | mov r14, [rsp + gpr_save_mem_offset + 4*8] | |
178 | mov r15, [rsp + gpr_save_mem_offset + 5*8] | |
179 | ||
180 | %ifndef ALIGN_STACK | |
181 | add rsp, stack_size | |
182 | %else | |
183 | mov rsp, rbp | |
184 | pop rbp | |
185 | %endif | |
186 | %endm | |
187 | %endif | |
188 | ||
189 | %ifidn __OUTPUT_FORMAT__, win64 | |
190 | %define arg0 rcx | |
f91f0fd5 TL |
191 | %define arg1 rdx |
192 | ||
224ce89b WB |
193 | %macro FUNC_SAVE 0 |
194 | %ifdef ALIGN_STACK | |
195 | push rbp | |
196 | mov rbp, rsp | |
197 | sub rsp, stack_size | |
198 | and rsp, ~15 | |
199 | %else | |
200 | sub rsp, stack_size | |
201 | %endif | |
202 | ||
203 | mov [rsp + gpr_save_mem_offset + 0*8], rbx | |
204 | mov [rsp + gpr_save_mem_offset + 1*8], rsi | |
205 | mov [rsp + gpr_save_mem_offset + 2*8], rdi | |
206 | mov [rsp + gpr_save_mem_offset + 3*8], rbp | |
207 | mov [rsp + gpr_save_mem_offset + 4*8], r12 | |
208 | mov [rsp + gpr_save_mem_offset + 5*8], r13 | |
209 | mov [rsp + gpr_save_mem_offset + 6*8], r14 | |
210 | mov [rsp + gpr_save_mem_offset + 7*8], r15 | |
211 | %endm | |
212 | ||
213 | %macro FUNC_RESTORE 0 | |
214 | mov rbx, [rsp + gpr_save_mem_offset + 0*8] | |
215 | mov rsi, [rsp + gpr_save_mem_offset + 1*8] | |
216 | mov rdi, [rsp + gpr_save_mem_offset + 2*8] | |
217 | mov rbp, [rsp + gpr_save_mem_offset + 3*8] | |
218 | mov r12, [rsp + gpr_save_mem_offset + 4*8] | |
219 | mov r13, [rsp + gpr_save_mem_offset + 5*8] | |
220 | mov r14, [rsp + gpr_save_mem_offset + 6*8] | |
221 | mov r15, [rsp + gpr_save_mem_offset + 7*8] | |
222 | ||
223 | %ifndef ALIGN_STACK | |
224 | add rsp, stack_size | |
225 | %else | |
226 | mov rsp, rbp | |
227 | pop rbp | |
228 | %endif | |
229 | %endm | |
230 | %endif | |
231 | ||
232 | ;; Load read_in and updated in_buffer accordingly | |
233 | ;; when there are at least 8 bytes in the in buffer | |
234 | ;; Clobbers rcx, unless rcx is %%read_in_length | |
235 | %macro inflate_in_load 6 | |
236 | %define %%next_in %1 | |
237 | %define %%end_in %2 | |
238 | %define %%read_in %3 | |
239 | %define %%read_in_length %4 | |
240 | %define %%tmp1 %5 ; Tmp registers | |
241 | %define %%tmp2 %6 | |
242 | ||
243 | SHLX %%tmp1, [%%next_in], %%read_in_length | |
244 | or %%read_in, %%tmp1 | |
245 | ||
246 | mov %%tmp1, 64 | |
247 | sub %%tmp1, %%read_in_length | |
248 | shr %%tmp1, 3 | |
249 | ||
250 | add %%next_in, %%tmp1 | |
251 | lea %%read_in_length, [%%read_in_length + 8 * %%tmp1] | |
252 | %%end: | |
253 | %endm | |
254 | ||
255 | ;; Load read_in and updated in_buffer accordingly | |
256 | ;; Clobbers rcx, unless rcx is %%read_in_length | |
257 | %macro inflate_in_small_load 6 | |
258 | %define %%next_in %1 | |
259 | %define %%end_in %2 | |
260 | %define %%read_in %3 | |
261 | %define %%read_in_length %4 | |
262 | %define %%avail_in %5 ; Tmp registers | |
263 | %define %%tmp1 %5 | |
264 | %define %%loop_count %6 | |
265 | ||
266 | mov %%avail_in, %%end_in | |
267 | sub %%avail_in, %%next_in | |
268 | ||
269 | %ifnidn %%read_in_length, rcx | |
270 | mov rcx, %%read_in_length | |
271 | %endif | |
272 | ||
273 | mov %%loop_count, 64 | |
274 | sub %%loop_count, %%read_in_length | |
275 | shr %%loop_count, 3 | |
276 | ||
277 | cmp %%loop_count, %%avail_in | |
278 | cmovg %%loop_count, %%avail_in | |
279 | cmp %%loop_count, 0 | |
280 | je %%end | |
281 | ||
282 | %%load_byte: | |
283 | xor %%tmp1, %%tmp1 | |
284 | mov %%tmp1 %+ b, byte [%%next_in] | |
285 | SHLX %%tmp1, %%tmp1, rcx | |
286 | or %%read_in, %%tmp1 | |
287 | add rcx, 8 | |
288 | add %%next_in, 1 | |
289 | sub %%loop_count, 1 | |
290 | jg %%load_byte | |
291 | %ifnidn %%read_in_length, rcx | |
292 | mov %%read_in_length, rcx | |
293 | %endif | |
294 | %%end: | |
295 | %endm | |
296 | ||
f91f0fd5 TL |
297 | ;; Clears all bits at index %%bit_count and above in %%next_bits |
298 | ;; May clobber rcx and %%bit_count | |
299 | %macro CLEAR_HIGH_BITS 3 | |
300 | %define %%next_bits %1 | |
301 | %define %%bit_count %2 | |
302 | %define %%lookup_size %3 | |
303 | ||
304 | sub %%bit_count, 0x40 + %%lookup_size | |
305 | ;; Extract the 15-DECODE_LOOKUP_SIZE bits beyond the first DECODE_LOOKUP_SIZE bits. | |
306 | %ifdef USE_HSWNI | |
307 | and %%bit_count, 0x1F | |
308 | bzhi %%next_bits, %%next_bits, %%bit_count | |
309 | %else | |
310 | %ifnidn %%bit_count, rcx | |
311 | mov rcx, %%bit_count | |
312 | %endif | |
313 | neg rcx | |
314 | shl %%next_bits, cl | |
315 | shr %%next_bits, cl | |
316 | %endif | |
317 | ||
318 | %endm | |
319 | ||
224ce89b WB |
320 | ;; Decode next symbol |
321 | ;; Clobber rcx | |
f91f0fd5 | 322 | %macro decode_next_lit_len 8 |
224ce89b WB |
323 | %define %%state %1 ; State structure associated with compressed stream |
324 | %define %%lookup_size %2 ; Number of bits used for small lookup | |
f91f0fd5 | 325 | %define %%state_offset %3 ; Type of huff code, should be either LIT or DIST |
224ce89b WB |
326 | %define %%read_in %4 ; Bits read in from compressed stream |
327 | %define %%read_in_length %5 ; Number of valid bits in read_in | |
f91f0fd5 TL |
328 | %define %%next_sym %6 ; Returned symbols |
329 | %define %%next_sym_num %7 ; Returned symbols count | |
330 | %define %%next_bits %8 | |
224ce89b | 331 | |
f91f0fd5 | 332 | mov %%next_sym_num, %%next_sym |
224ce89b | 333 | mov rcx, %%next_sym |
f91f0fd5 | 334 | shr rcx, LARGE_SHORT_CODE_LEN_OFFSET |
224ce89b WB |
335 | jz invalid_symbol |
336 | ||
f91f0fd5 TL |
337 | and %%next_sym_num, LARGE_SYM_COUNT_MASK << LARGE_SYM_COUNT_OFFSET |
338 | shr %%next_sym_num, LARGE_SYM_COUNT_OFFSET | |
339 | ||
224ce89b | 340 | ;; Check if symbol or hint was looked up |
f91f0fd5 TL |
341 | and %%next_sym, LARGE_FLAG_BIT | LARGE_SHORT_SYM_MASK |
342 | test %%next_sym, LARGE_FLAG_BIT | |
343 | jz %%end | |
224ce89b | 344 | |
f91f0fd5 TL |
345 | shl rcx, LARGE_SYM_COUNT_LEN |
346 | or rcx, %%next_sym_num | |
224ce89b | 347 | |
f91f0fd5 TL |
348 | ;; Save length associated with symbol |
349 | mov %%next_bits, %%read_in | |
350 | shr %%next_bits, %%lookup_size | |
351 | ||
352 | ;; Extract the bits beyond the first %%lookup_size bits. | |
353 | CLEAR_HIGH_BITS %%next_bits, rcx, %%lookup_size | |
224ce89b | 354 | |
f91f0fd5 TL |
355 | and %%next_sym, LARGE_SHORT_SYM_MASK |
356 | add %%next_sym, %%next_bits | |
224ce89b WB |
357 | |
358 | ;; Lookup actual next symbol | |
f91f0fd5 TL |
359 | movzx %%next_sym, word [%%state + LARGE_LONG_CODE_SIZE * %%next_sym + %%state_offset + LARGE_SHORT_CODE_SIZE * (1 << %%lookup_size)] |
360 | mov %%next_sym_num, 1 | |
224ce89b WB |
361 | |
362 | ;; Save length associated with symbol | |
363 | mov rcx, %%next_sym | |
f91f0fd5 | 364 | shr rcx, LARGE_LONG_CODE_LEN_OFFSET |
224ce89b | 365 | jz invalid_symbol |
f91f0fd5 TL |
366 | and %%next_sym, LARGE_LONG_SYM_MASK |
367 | ||
224ce89b | 368 | %%end: |
f91f0fd5 | 369 | ;; Updated read_in to reflect the bits which were decoded |
224ce89b | 370 | SHRX %%read_in, %%read_in, rcx |
f91f0fd5 | 371 | sub %%read_in_length, rcx |
224ce89b WB |
372 | %endm |
373 | ||
f91f0fd5 TL |
374 | ;; Decode next symbol |
375 | ;; Clobber rcx | |
376 | %macro decode_next_lit_len_with_load 8 | |
377 | %define %%state %1 ; State structure associated with compressed stream | |
378 | %define %%lookup_size %2 ; Number of bits used for small lookup | |
379 | %define %%state_offset %3 | |
380 | %define %%read_in %4 ; Bits read in from compressed stream | |
381 | %define %%read_in_length %5 ; Number of valid bits in read_in | |
382 | %define %%next_sym %6 ; Returned symbols | |
383 | %define %%next_sym_num %7 ; Returned symbols count | |
384 | %define %%next_bits %8 | |
385 | ||
386 | ;; Lookup possible next symbol | |
387 | mov %%next_bits, %%read_in | |
388 | and %%next_bits, (1 << %%lookup_size) - 1 | |
389 | mov %%next_sym %+ d, dword [%%state + %%state_offset + LARGE_SHORT_CODE_SIZE * %%next_bits] | |
390 | ||
391 | decode_next_lit_len %%state, %%lookup_size, %%state_offset, %%read_in, %%read_in_length, %%next_sym, %%next_sym_num, %%next_bits | |
392 | %endm | |
224ce89b WB |
393 | |
394 | ;; Decode next symbol | |
395 | ;; Clobber rcx | |
f91f0fd5 | 396 | %macro decode_next_dist 8 |
224ce89b WB |
397 | %define %%state %1 ; State structure associated with compressed stream |
398 | %define %%lookup_size %2 ; Number of bits used for small lookup | |
399 | %define %%state_offset %3 ; Type of huff code, should be either LIT or DIST | |
400 | %define %%read_in %4 ; Bits read in from compressed stream | |
401 | %define %%read_in_length %5 ; Number of valid bits in read_in | |
402 | %define %%next_sym %6 ; Returned symobl | |
f91f0fd5 TL |
403 | %define %%next_extra_bits %7 |
404 | %define %%next_bits %8 | |
224ce89b WB |
405 | |
406 | mov rcx, %%next_sym | |
f91f0fd5 TL |
407 | shr rcx, SMALL_SHORT_CODE_LEN_OFFSET |
408 | jz invalid_dist_symbol_ %+ %%next_sym | |
224ce89b WB |
409 | |
410 | ;; Check if symbol or hint was looked up | |
f91f0fd5 TL |
411 | and %%next_sym, SMALL_FLAG_BIT | SMALL_SHORT_SYM_MASK |
412 | test %%next_sym, SMALL_FLAG_BIT | |
413 | jz %%end | |
414 | ||
415 | ;; Save length associated with symbol | |
416 | mov %%next_bits, %%read_in | |
417 | shr %%next_bits, %%lookup_size | |
224ce89b WB |
418 | |
419 | ;; Extract the 15-DECODE_LOOKUP_SIZE bits beyond the first %%lookup_size bits. | |
f91f0fd5 | 420 | lea %%next_sym, [%%state + SMALL_LONG_CODE_SIZE * %%next_sym] |
224ce89b | 421 | |
f91f0fd5 | 422 | CLEAR_HIGH_BITS %%next_bits, rcx, %%lookup_size |
224ce89b WB |
423 | |
424 | ;; Lookup actual next symbol | |
f91f0fd5 | 425 | movzx %%next_sym, word [%%next_sym + %%state_offset + SMALL_LONG_CODE_SIZE * %%next_bits + SMALL_SHORT_CODE_SIZE * (1 << %%lookup_size) - SMALL_LONG_CODE_SIZE * SMALL_FLAG_BIT] |
224ce89b WB |
426 | |
427 | ;; Save length associated with symbol | |
428 | mov rcx, %%next_sym | |
f91f0fd5 TL |
429 | shr rcx, SMALL_LONG_CODE_LEN_OFFSET |
430 | jz invalid_dist_symbol_ %+ %%next_sym | |
431 | and %%next_sym, SMALL_SHORT_SYM_MASK | |
224ce89b WB |
432 | |
433 | %%end: | |
434 | ;; Updated read_in to reflect the bits which were decoded | |
435 | SHRX %%read_in, %%read_in, rcx | |
436 | sub %%read_in_length, rcx | |
f91f0fd5 TL |
437 | mov rcx, %%next_sym |
438 | shr rcx, DIST_SYM_EXTRA_OFFSET | |
439 | and %%next_sym, DIST_SYM_MASK | |
440 | %endm | |
441 | ||
442 | ;; Decode next symbol | |
443 | ;; Clobber rcx | |
444 | %macro decode_next_dist_with_load 8 | |
445 | %define %%state %1 ; State structure associated with compressed stream | |
446 | %define %%lookup_size %2 ; Number of bits used for small lookup | |
447 | %define %%state_offset %3 | |
448 | %define %%read_in %4 ; Bits read in from compressed stream | |
449 | %define %%read_in_length %5 ; Number of valid bits in read_in | |
450 | %define %%next_sym %6 ; Returned symobl | |
451 | %define %%next_extra_bits %7 | |
452 | %define %%next_bits %8 | |
453 | ||
454 | ;; Lookup possible next symbol | |
455 | mov %%next_bits, %%read_in | |
456 | and %%next_bits, (1 << %%lookup_size) - 1 | |
457 | movzx %%next_sym, word [%%state + %%state_offset + SMALL_SHORT_CODE_SIZE * %%next_bits] | |
458 | ||
459 | decode_next_dist %%state, %%lookup_size, %%state_offset, %%read_in, %%read_in_length, %%next_sym, %%next_extra_bits, %%next_bits | |
224ce89b WB |
460 | %endm |
461 | ||
462 | global decode_huffman_code_block_stateless_ %+ ARCH | |
463 | decode_huffman_code_block_stateless_ %+ ARCH %+ : | |
464 | ||
465 | FUNC_SAVE | |
466 | ||
467 | mov state, arg0 | |
f91f0fd5 | 468 | mov [rsp + start_out_mem_offset], arg1 |
224ce89b WB |
469 | lea rfc_lookup, [rfc1951_lookup_table] |
470 | ||
471 | mov read_in,[state + _read_in] | |
472 | mov read_in_length %+ d, dword [state + _read_in_length] | |
473 | mov next_out, [state + _next_out] | |
474 | mov end_out %+ d, dword [state + _avail_out] | |
475 | add end_out, next_out | |
476 | mov next_in, [state + _next_in] | |
477 | mov end_in %+ d, dword [state + _avail_in] | |
478 | add end_in, next_in | |
479 | ||
480 | mov dword [state + _copy_overflow_len], 0 | |
481 | mov dword [state + _copy_overflow_dist], 0 | |
482 | ||
224ce89b WB |
483 | sub end_out, OUT_BUFFER_SLOP |
484 | sub end_in, IN_BUFFER_SLOP | |
485 | ||
486 | cmp next_in, end_in | |
487 | jg end_loop_block_pre | |
488 | ||
489 | cmp read_in_length, 64 | |
490 | je skip_load | |
491 | ||
492 | inflate_in_load next_in, end_in, read_in, read_in_length, tmp1, tmp2 | |
493 | ||
494 | skip_load: | |
495 | mov tmp3, read_in | |
496 | and tmp3, (1 << ISAL_DECODE_LONG_BITS) - 1 | |
f91f0fd5 | 497 | mov next_sym %+ d, dword [state + _lit_huff_code + LARGE_SHORT_CODE_SIZE * tmp3] |
224ce89b WB |
498 | |
499 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
500 | ; Main Loop | |
501 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
502 | loop_block: | |
503 | ;; Check if near end of in buffer or out buffer | |
504 | cmp next_in, end_in | |
505 | jg end_loop_block_pre | |
506 | cmp next_out, end_out | |
507 | jg end_loop_block_pre | |
508 | ||
509 | ;; Decode next symbol and reload the read_in buffer | |
f91f0fd5 | 510 | decode_next_lit_len state, ISAL_DECODE_LONG_BITS, _lit_huff_code, read_in, read_in_length, next_sym, next_sym_num, tmp1 |
224ce89b | 511 | |
f91f0fd5 TL |
512 | ;; Specutively write next_sym if it is a literal |
513 | mov [next_out], next_sym | |
514 | add next_out, next_sym_num | |
515 | lea next_sym2, [8 * next_sym_num - 8] | |
516 | SHRX next_sym2, next_sym, next_sym2 | |
224ce89b WB |
517 | |
518 | ;; Find index to specutively preload next_sym from | |
f91f0fd5 TL |
519 | mov tmp3, (1 << ISAL_DECODE_LONG_BITS) - 1 |
520 | and tmp3, read_in | |
224ce89b WB |
521 | |
522 | ;; Start reloading read_in | |
523 | mov tmp1, [next_in] | |
524 | SHLX tmp1, tmp1, read_in_length | |
525 | or read_in, tmp1 | |
526 | ||
527 | ;; Specutively load data associated with length symbol | |
f91f0fd5 | 528 | lea repeat_length, [next_sym2 - 254] |
224ce89b WB |
529 | |
530 | ;; Test for end of block symbol | |
531 | cmp next_sym2, 256 | |
532 | je end_symbol_pre | |
533 | ||
534 | ;; Specutively load next_sym for next loop if a literal was decoded | |
f91f0fd5 | 535 | mov next_sym %+ d, dword [state + _lit_huff_code + LARGE_SHORT_CODE_SIZE * tmp3] |
224ce89b WB |
536 | |
537 | ;; Finish updating read_in_length for read_in | |
538 | mov tmp1, 64 | |
539 | sub tmp1, read_in_length | |
540 | shr tmp1, 3 | |
541 | add next_in, tmp1 | |
542 | lea read_in_length, [read_in_length + 8 * tmp1] | |
543 | ||
544 | ;; Specultively load next dist code | |
f91f0fd5 TL |
545 | mov next_bits2, (1 << ISAL_DECODE_SHORT_BITS) - 1 |
546 | and next_bits2, read_in | |
547 | movzx next_sym3, word [state + _dist_huff_code + SMALL_SHORT_CODE_SIZE * next_bits2] | |
224ce89b WB |
548 | |
549 | ;; Check if next_sym2 is a literal, length, or end of block symbol | |
550 | cmp next_sym2, 256 | |
551 | jl loop_block | |
552 | ||
553 | decode_len_dist: | |
f91f0fd5 TL |
554 | ;; Determine next_out after the copy is finished |
555 | lea next_out, [next_out + repeat_length - 1] | |
224ce89b WB |
556 | |
557 | ;; Decode distance code | |
f91f0fd5 | 558 | decode_next_dist state, ISAL_DECODE_SHORT_BITS, _dist_huff_code, read_in, read_in_length, next_sym3, rcx, tmp2 |
224ce89b | 559 | |
224ce89b WB |
560 | mov look_back_dist2 %+ d, [rfc_lookup + _dist_start + 4 * next_sym3] |
561 | ||
f91f0fd5 TL |
562 | ; ;; Load distance code extra bits |
563 | mov next_bits, read_in | |
224ce89b WB |
564 | |
565 | ;; Calculate the look back distance | |
566 | BZHI next_bits, next_bits, rcx, tmp4 | |
f91f0fd5 | 567 | SHRX read_in, read_in, rcx |
224ce89b WB |
568 | |
569 | ;; Setup next_sym, read_in, and read_in_length for next loop | |
f91f0fd5 TL |
570 | mov read_in_2, (1 << ISAL_DECODE_LONG_BITS) - 1 |
571 | and read_in_2, read_in | |
572 | mov next_sym %+ d, dword [state + _lit_huff_code + LARGE_SHORT_CODE_SIZE * read_in_2] | |
224ce89b WB |
573 | sub read_in_length, rcx |
574 | ||
575 | ;; Copy distance in len/dist pair | |
576 | add look_back_dist2, next_bits | |
577 | ||
578 | ;; Find beginning of copy | |
579 | mov copy_start, next_out | |
580 | sub copy_start, repeat_length | |
581 | sub copy_start, look_back_dist2 | |
582 | ||
583 | ;; Check if a valid look back distances was decoded | |
584 | cmp copy_start, [rsp + start_out_mem_offset] | |
585 | jl invalid_look_back_distance | |
586 | MOVDQU xmm1, [copy_start] | |
587 | ||
588 | ;; Set tmp2 to be the minimum of COPY_SIZE and repeat_length | |
589 | ;; This is to decrease use of small_byte_copy branch | |
590 | mov tmp2, COPY_SIZE | |
591 | cmp tmp2, repeat_length | |
592 | cmovg tmp2, repeat_length | |
593 | ||
594 | ;; Check for overlapping memory in the copy | |
595 | cmp look_back_dist2, tmp2 | |
596 | jl small_byte_copy_pre | |
597 | ||
598 | large_byte_copy: | |
599 | ;; Copy length distance pair when memory overlap is not an issue | |
600 | MOVDQU [copy_start + look_back_dist2], xmm1 | |
601 | ||
602 | sub repeat_length, COPY_SIZE | |
603 | jle loop_block | |
604 | ||
605 | add copy_start, COPY_SIZE | |
606 | MOVDQU xmm1, [copy_start] | |
607 | jmp large_byte_copy | |
608 | ||
609 | small_byte_copy_pre: | |
610 | ;; Copy length distance pair when source and destination overlap | |
611 | add repeat_length, look_back_dist2 | |
612 | small_byte_copy: | |
613 | MOVDQU [copy_start + look_back_dist2], xmm1 | |
614 | ||
615 | shl look_back_dist2, 1 | |
616 | MOVDQU xmm1, [copy_start] | |
617 | cmp look_back_dist2, COPY_SIZE | |
618 | jl small_byte_copy | |
619 | ||
620 | sub repeat_length, look_back_dist2 | |
621 | jge large_byte_copy | |
622 | jmp loop_block | |
623 | ||
624 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
625 | ; Finish Main Loop | |
626 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
627 | end_loop_block_pre: | |
628 | ;; Fix up in buffer and out buffer to reflect the actual buffer end | |
629 | add end_out, OUT_BUFFER_SLOP | |
630 | add end_in, IN_BUFFER_SLOP | |
631 | ||
632 | end_loop_block: | |
633 | ;; Load read in buffer and decode next lit/len symbol | |
634 | inflate_in_small_load next_in, end_in, read_in, read_in_length, tmp1, tmp2 | |
635 | mov [rsp + read_in_mem_offset], read_in | |
636 | mov [rsp + read_in_length_mem_offset], read_in_length | |
f91f0fd5 | 637 | mov [rsp + next_out_mem_offset], next_out |
224ce89b | 638 | |
f91f0fd5 | 639 | decode_next_lit_len_with_load state, ISAL_DECODE_LONG_BITS, _lit_huff_code, read_in, read_in_length, next_sym, next_sym_num, tmp1 |
224ce89b WB |
640 | |
641 | ;; Check that enough input was available to decode symbol | |
642 | cmp read_in_length, 0 | |
643 | jl end_of_input | |
644 | ||
f91f0fd5 TL |
645 | multi_symbol_start: |
646 | cmp next_sym_num, 1 | |
647 | jg decode_literal | |
648 | ||
224ce89b WB |
649 | cmp next_sym, 256 |
650 | jl decode_literal | |
651 | je end_symbol | |
652 | ||
653 | decode_len_dist_2: | |
f91f0fd5 | 654 | lea repeat_length, [next_sym - 254] |
224ce89b | 655 | ;; Decode distance code |
f91f0fd5 | 656 | decode_next_dist_with_load state, ISAL_DECODE_SHORT_BITS, _dist_huff_code, read_in, read_in_length, next_sym, rcx, tmp1 |
224ce89b WB |
657 | |
658 | ;; Load distance code extra bits | |
659 | mov next_bits, read_in | |
660 | mov look_back_dist %+ d, [rfc_lookup + _dist_start + 4 * next_sym] | |
224ce89b WB |
661 | |
662 | ;; Calculate the look back distance and check for enough input | |
663 | BZHI next_bits, next_bits, rcx, tmp1 | |
664 | SHRX read_in, read_in, rcx | |
665 | add look_back_dist, next_bits | |
666 | sub read_in_length, rcx | |
667 | jl end_of_input | |
668 | ||
669 | ;; Setup code for byte copy using rep movsb | |
670 | mov rsi, next_out | |
671 | mov rdi, rsi | |
672 | mov rcx, repeat_length | |
673 | sub rsi, look_back_dist | |
674 | ||
675 | ;; Check if a valid look back distance was decoded | |
676 | cmp rsi, [rsp + start_out_mem_offset] | |
677 | jl invalid_look_back_distance | |
678 | ||
679 | ;; Check for out buffer overflow | |
680 | add repeat_length, next_out | |
681 | cmp repeat_length, end_out | |
682 | jg out_buffer_overflow_repeat | |
683 | ||
684 | mov next_out, repeat_length | |
685 | ||
686 | rep movsb | |
687 | jmp end_loop_block | |
688 | ||
689 | decode_literal: | |
690 | ;; Store literal decoded from the input stream | |
691 | cmp next_out, end_out | |
692 | jge out_buffer_overflow_lit | |
693 | add next_out, 1 | |
694 | mov byte [next_out - 1], next_sym %+ b | |
f91f0fd5 TL |
695 | sub next_sym_num, 1 |
696 | jz end_loop_block | |
697 | shr next_sym, 8 | |
698 | jmp multi_symbol_start | |
224ce89b WB |
699 | |
700 | ;; Set exit codes | |
701 | end_of_input: | |
702 | mov read_in, [rsp + read_in_mem_offset] | |
703 | mov read_in_length, [rsp + read_in_length_mem_offset] | |
f91f0fd5 TL |
704 | mov next_out, [rsp + next_out_mem_offset] |
705 | xor tmp1, tmp1 | |
706 | mov dword [state + _write_overflow_lits], tmp1 %+ d | |
707 | mov dword [state + _write_overflow_len], tmp1 %+ d | |
224ce89b WB |
708 | mov rax, END_INPUT |
709 | jmp end | |
710 | ||
711 | out_buffer_overflow_repeat: | |
712 | mov rcx, end_out | |
713 | sub rcx, next_out | |
714 | sub repeat_length, rcx | |
715 | sub repeat_length, next_out | |
716 | rep movsb | |
717 | ||
718 | mov [state + _copy_overflow_len], repeat_length %+ d | |
719 | mov [state + _copy_overflow_dist], look_back_dist %+ d | |
720 | ||
721 | mov next_out, end_out | |
722 | ||
723 | mov rax, OUT_OVERFLOW | |
724 | jmp end | |
725 | ||
726 | out_buffer_overflow_lit: | |
f91f0fd5 TL |
727 | mov dword [state + _write_overflow_lits], next_sym %+ d |
728 | mov dword [state + _write_overflow_len], next_sym_num %+ d | |
729 | sub next_sym_num, 1 | |
730 | shl next_sym_num, 3 | |
731 | SHRX next_sym, next_sym, next_sym_num | |
224ce89b | 732 | mov rax, OUT_OVERFLOW |
f91f0fd5 TL |
733 | shr next_sym_num, 3 |
734 | cmp next_sym, 256 | |
735 | jl end | |
736 | mov dword [state + _write_overflow_len], next_sym_num %+ d | |
737 | jg decode_len_dist_2 | |
738 | jmp end_state | |
224ce89b WB |
739 | |
740 | invalid_look_back_distance: | |
741 | mov rax, INVALID_LOOKBACK | |
742 | jmp end | |
743 | ||
f91f0fd5 TL |
744 | invalid_dist_symbol_ %+ next_sym: |
745 | cmp read_in_length, next_sym | |
746 | jl end_of_input | |
747 | jmp invalid_symbol | |
748 | invalid_dist_symbol_ %+ next_sym3: | |
749 | cmp read_in_length, next_sym3 | |
750 | jl end_of_input | |
224ce89b WB |
751 | invalid_symbol: |
752 | mov rax, INVALID_SYMBOL | |
753 | jmp end | |
754 | ||
755 | end_symbol_pre: | |
756 | ;; Fix up in buffer and out buffer to reflect the actual buffer | |
f91f0fd5 | 757 | sub next_out, 1 |
224ce89b WB |
758 | add end_out, OUT_BUFFER_SLOP |
759 | add end_in, IN_BUFFER_SLOP | |
760 | end_symbol: | |
f91f0fd5 TL |
761 | xor rax, rax |
762 | end_state: | |
224ce89b WB |
763 | ;; Set flag identifying a new block is required |
764 | mov byte [state + _block_state], ISAL_BLOCK_NEW_HDR | |
f91f0fd5 TL |
765 | cmp dword [state + _bfinal], 0 |
766 | je end | |
767 | mov byte [state + _block_state], ISAL_BLOCK_INPUT_DONE | |
768 | ||
224ce89b WB |
769 | end: |
770 | ;; Save current buffer states | |
771 | mov [state + _read_in], read_in | |
772 | mov [state + _read_in_length], read_in_length %+ d | |
f91f0fd5 TL |
773 | |
774 | ;; Set avail_out | |
224ce89b WB |
775 | sub end_out, next_out |
776 | mov dword [state + _avail_out], end_out %+ d | |
f91f0fd5 TL |
777 | |
778 | ;; Set total_out | |
779 | mov tmp1, next_out | |
780 | sub tmp1, [state + _next_out] | |
781 | add [state + _total_out], tmp1 %+ d | |
782 | ||
783 | ;; Set next_out | |
784 | mov [state + _next_out], next_out | |
785 | ||
786 | ;; Set next_in | |
224ce89b | 787 | mov [state + _next_in], next_in |
f91f0fd5 TL |
788 | |
789 | ;; Set avail_in | |
224ce89b WB |
790 | sub end_in, next_in |
791 | mov [state + _avail_in], end_in %+ d | |
792 | ||
793 | FUNC_RESTORE | |
794 | ||
795 | ret |