]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
2 | ; Copyright(c) 2011-2016 Intel Corporation All rights reserved. | |
3 | ; | |
4 | ; Redistribution and use in source and binary forms, with or without | |
5 | ; modification, are permitted provided that the following conditions | |
6 | ; are met: | |
7 | ; * Redistributions of source code must retain the above copyright | |
8 | ; notice, this list of conditions and the following disclaimer. | |
9 | ; * Redistributions in binary form must reproduce the above copyright | |
10 | ; notice, this list of conditions and the following disclaimer in | |
11 | ; the documentation and/or other materials provided with the | |
12 | ; distribution. | |
13 | ; * Neither the name of Intel Corporation nor the names of its | |
14 | ; contributors may be used to endorse or promote products derived | |
15 | ; from this software without specific prior written permission. | |
16 | ; | |
17 | ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
18 | ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
19 | ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
20 | ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
21 | ; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
22 | ; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
23 | ; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
24 | ; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
25 | ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
26 | ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
27 | ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
28 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
29 | ||
30 | %include "options.asm" | |
7c673cae FG |
31 | |
32 | %include "lz0a_const.asm" | |
33 | %include "data_struct2.asm" | |
34 | %include "bitbuf2.asm" | |
35 | %include "huffman.asm" | |
36 | %include "igzip_compare_types.asm" | |
7c673cae FG |
37 | %include "reg_sizes.asm" |
38 | ||
39 | %include "stdmac.asm" | |
40 | ||
7c673cae FG |
41 | %ifdef DEBUG |
42 | %macro MARK 1 | |
43 | global %1 | |
44 | %1: | |
45 | %endm | |
46 | %else | |
47 | %macro MARK 1 | |
48 | %endm | |
49 | %endif | |
50 | ||
51 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
52 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
53 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
7c673cae | 54 | |
224ce89b WB |
55 | %define tmp2 rcx |
56 | %define hash2 rcx | |
7c673cae | 57 | |
224ce89b WB |
58 | %define curr_data rax |
59 | %define code rax | |
60 | %define tmp5 rax | |
7c673cae | 61 | |
224ce89b WB |
62 | %define tmp4 rbx |
63 | %define dist rbx | |
64 | %define code2 rbx | |
7c673cae | 65 | |
224ce89b WB |
66 | %define hash rdx |
67 | %define len rdx | |
68 | %define code_len3 rdx | |
69 | %define tmp8 rdx | |
7c673cae | 70 | |
224ce89b WB |
71 | %define tmp1 rsi |
72 | %define code_len2 rsi | |
7c673cae | 73 | |
224ce89b | 74 | %define file_start rdi |
7c673cae | 75 | |
224ce89b | 76 | %define m_bit_count rbp |
7c673cae | 77 | |
224ce89b WB |
78 | %define curr_data2 r8 |
79 | %define len2 r8 | |
80 | %define tmp6 r8 | |
7c673cae | 81 | |
224ce89b | 82 | %define m_bits r9 |
7c673cae | 83 | |
224ce89b | 84 | %define f_i r10 |
7c673cae | 85 | |
224ce89b | 86 | %define m_out_buf r11 |
7c673cae | 87 | |
224ce89b WB |
88 | %define f_end_i r12 |
89 | %define dist2 r12 | |
90 | %define tmp7 r12 | |
91 | %define code4 r12 | |
7c673cae | 92 | |
224ce89b WB |
93 | %define tmp3 r13 |
94 | %define code3 r13 | |
7c673cae | 95 | |
224ce89b | 96 | %define stream r14 |
7c673cae | 97 | |
224ce89b | 98 | %define hufftables r15 |
7c673cae | 99 | |
224ce89b WB |
100 | ;; GPR r8 & r15 can be used |
101 | ||
102 | %define xtmp0 xmm0 ; tmp | |
103 | %define xtmp1 xmm1 ; tmp | |
104 | %define xhash xmm2 | |
105 | %define xmask xmm3 | |
106 | %define xdata xmm4 | |
107 | ||
108 | %define ytmp0 ymm0 ; tmp | |
109 | %define ytmp1 ymm1 ; tmp | |
7c673cae | 110 | |
7c673cae | 111 | |
7c673cae FG |
112 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
113 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
114 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
115 | ||
7c673cae FG |
116 | |
117 | blen_mem_offset equ 0 ; local variable (8 bytes) | |
224ce89b WB |
118 | f_end_i_mem_offset equ 8 |
119 | gpr_save_mem_offset equ 16 ; gpr save area (8*8 bytes) | |
120 | xmm_save_mem_offset equ 16 + 8*8 ; xmm save area (4*16 bytes) (16 byte aligned) | |
121 | stack_size equ 2*8 + 8*8 + 4*16 + 8 | |
7c673cae FG |
122 | ;;; 8 because stack address is odd multiple of 8 after a function call and |
123 | ;;; we want it aligned to 16 bytes | |
124 | ||
125 | ; void isal_deflate_body ( isal_zstream *stream ) | |
126 | ; arg 1: rcx: addr of stream | |
127 | global isal_deflate_body_ %+ ARCH | |
128 | isal_deflate_body_ %+ ARCH %+ : | |
129 | %ifidn __OUTPUT_FORMAT__, elf64 | |
130 | mov rcx, rdi | |
131 | %endif | |
132 | ||
133 | ;; do nothing if (avail_in == 0) | |
134 | cmp dword [rcx + _avail_in], 0 | |
135 | jne skip1 | |
136 | ||
137 | ;; Set stream's next state | |
138 | mov rdx, ZSTATE_FLUSH_READ_BUFFER | |
139 | mov rax, ZSTATE_BODY | |
140 | cmp dword [rcx + _end_of_stream], 0 | |
141 | cmovne rax, rdx | |
142 | cmp dword [rcx + _flush], _NO_FLUSH | |
143 | cmovne rax, rdx | |
144 | mov dword [rcx + _internal_state_state], eax | |
145 | ret | |
146 | skip1: | |
147 | ||
148 | %ifdef ALIGN_STACK | |
149 | push rbp | |
150 | mov rbp, rsp | |
151 | sub rsp, stack_size | |
152 | and rsp, ~15 | |
153 | %else | |
154 | sub rsp, stack_size | |
155 | %endif | |
156 | ||
157 | mov [rsp + gpr_save_mem_offset + 0*8], rbx | |
158 | mov [rsp + gpr_save_mem_offset + 1*8], rsi | |
159 | mov [rsp + gpr_save_mem_offset + 2*8], rdi | |
160 | mov [rsp + gpr_save_mem_offset + 3*8], rbp | |
161 | mov [rsp + gpr_save_mem_offset + 4*8], r12 | |
162 | mov [rsp + gpr_save_mem_offset + 5*8], r13 | |
163 | mov [rsp + gpr_save_mem_offset + 6*8], r14 | |
164 | mov [rsp + gpr_save_mem_offset + 7*8], r15 | |
7c673cae FG |
165 | |
166 | mov stream, rcx | |
7c673cae FG |
167 | mov dword [stream + _internal_state_has_eob], 0 |
168 | ||
224ce89b WB |
169 | MOVDQU xmask, [mask] |
170 | ||
7c673cae FG |
171 | ; state->bitbuf.set_buf(stream->next_out, stream->avail_out); |
172 | mov m_out_buf, [stream + _next_out] | |
173 | mov [stream + _internal_state_bitbuf_m_out_start], m_out_buf | |
174 | mov tmp1 %+ d, [stream + _avail_out] | |
175 | add tmp1, m_out_buf | |
176 | sub tmp1, SLOP | |
224ce89b | 177 | |
7c673cae FG |
178 | mov [stream + _internal_state_bitbuf_m_out_end], tmp1 |
179 | ||
180 | mov m_bits, [stream + _internal_state_bitbuf_m_bits] | |
181 | mov m_bit_count %+ d, [stream + _internal_state_bitbuf_m_bit_count] | |
7c673cae | 182 | mov hufftables, [stream + _hufftables] |
224ce89b WB |
183 | |
184 | mov file_start, [stream + _next_in] | |
185 | ||
186 | mov f_i %+ d, dword [stream + _total_in] | |
187 | sub file_start, f_i | |
188 | ||
189 | mov f_end_i %+ d, [stream + _avail_in] | |
190 | add f_end_i, f_i | |
191 | ||
192 | ; f_end_i -= LA; | |
7c673cae | 193 | sub f_end_i, LA |
7c673cae | 194 | mov [rsp + f_end_i_mem_offset], f_end_i |
224ce89b WB |
195 | ; if (f_end_i <= 0) continue; |
196 | ||
197 | cmp f_end_i, f_i | |
198 | jle input_end | |
7c673cae FG |
199 | |
200 | ; for (f_i = f_start_i; f_i < f_end_i; f_i++) { | |
224ce89b WB |
201 | MARK __body_compute_hash_ %+ ARCH |
202 | MOVDQU xdata, [file_start + f_i] | |
203 | mov curr_data, [file_start + f_i] | |
204 | mov tmp3, curr_data | |
205 | mov tmp6, curr_data | |
7c673cae | 206 | |
224ce89b | 207 | compute_hash hash, curr_data |
7c673cae | 208 | |
224ce89b WB |
209 | shr tmp3, 8 |
210 | compute_hash hash2, tmp3 | |
7c673cae | 211 | |
224ce89b WB |
212 | and hash, HASH_MASK |
213 | and hash2, HASH_MASK | |
7c673cae | 214 | |
224ce89b WB |
215 | cmp dword [stream + _internal_state_has_hist], 0 |
216 | je write_first_byte | |
7c673cae | 217 | |
224ce89b | 218 | jmp loop2 |
7c673cae FG |
219 | align 16 |
220 | ||
221 | loop2: | |
7c673cae FG |
222 | ; if (state->bitbuf.is_full()) { |
223 | cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end] | |
224ce89b | 224 | ja output_end |
7c673cae FG |
225 | |
226 | xor dist, dist | |
227 | xor dist2, dist2 | |
228 | xor tmp3, tmp3 | |
229 | ||
230 | lea tmp1, [file_start + f_i] | |
7c673cae FG |
231 | |
232 | mov dist %+ w, f_i %+ w | |
224ce89b | 233 | dec dist |
7c673cae | 234 | sub dist %+ w, word [stream + _internal_state_head + 2 * hash] |
7c673cae FG |
235 | mov [stream + _internal_state_head + 2 * hash], f_i %+ w |
236 | ||
237 | inc f_i | |
238 | ||
224ce89b WB |
239 | MOVQ tmp6, xdata |
240 | shr tmp5, 16 | |
241 | mov tmp8, tmp5 | |
242 | compute_hash tmp6, tmp5 | |
243 | ||
7c673cae | 244 | mov dist2 %+ w, f_i %+ w |
7c673cae | 245 | dec dist2 |
224ce89b | 246 | sub dist2 %+ w, word [stream + _internal_state_head + 2 * hash2] |
7c673cae FG |
247 | mov [stream + _internal_state_head + 2 * hash2], f_i %+ w |
248 | ||
7c673cae | 249 | ; if ((dist-1) < (D-1)) { |
224ce89b WB |
250 | and dist %+ d, (D-1) |
251 | neg dist | |
7c673cae | 252 | |
224ce89b WB |
253 | shr tmp8, 8 |
254 | compute_hash tmp2, tmp8 | |
7c673cae | 255 | |
224ce89b WB |
256 | and dist2 %+ d, (D-1) |
257 | neg dist2 | |
7c673cae | 258 | |
224ce89b | 259 | MARK __body_compare_ %+ ARCH |
7c673cae | 260 | ;; Check for long len/dist match (>7) with first literal |
224ce89b WB |
261 | MOVQ len, xdata |
262 | mov curr_data, len | |
263 | PSRLDQ xdata, 1 | |
264 | xor len, [tmp1 + dist - 1] | |
7c673cae FG |
265 | jz compare_loop |
266 | ||
224ce89b WB |
267 | MOVD xhash, tmp6 %+ d |
268 | PINSRD xhash, tmp2 %+ d, 1 | |
269 | PAND xhash, xhash, xmask | |
7c673cae FG |
270 | |
271 | ;; Check for len/dist match (>7) with second literal | |
224ce89b WB |
272 | MOVQ len2, xdata |
273 | xor len2, [tmp1 + dist2] | |
7c673cae FG |
274 | jz compare_loop2 |
275 | ||
7c673cae FG |
276 | ;; Specutively load the code for the first literal |
277 | movzx tmp1, curr_data %+ b | |
278 | get_lit_code tmp1, code3, rcx, hufftables | |
279 | ||
280 | ;; Check for len/dist match for first literal | |
224ce89b | 281 | test len %+ d, 0xFFFFFFFF |
7c673cae FG |
282 | jz len_dist_huffman_pre |
283 | ||
284 | ;; Specutively load the code for the second literal | |
285 | shr curr_data, 8 | |
286 | and curr_data, 0xff | |
287 | get_lit_code curr_data, code2, code_len2, hufftables | |
288 | ||
224ce89b | 289 | SHLX code2, code2, rcx |
7c673cae FG |
290 | or code2, code3 |
291 | add code_len2, rcx | |
292 | ||
293 | ;; Check for len/dist match for second literal | |
224ce89b | 294 | test len2 %+ d, 0xFFFFFFFF |
7c673cae FG |
295 | jnz write_lit_bits |
296 | ||
224ce89b | 297 | MARK __body_len_dist_lit_huffman_ %+ ARCH |
7c673cae FG |
298 | len_dist_lit_huffman_pre: |
299 | mov code_len3, rcx | |
7c673cae FG |
300 | bsf len2, len2 |
301 | shr len2, 3 | |
302 | ||
303 | len_dist_lit_huffman: | |
224ce89b WB |
304 | neg dist2 |
305 | ||
7c673cae FG |
306 | %ifndef LONGER_HUFFTABLE |
307 | mov tmp4, dist2 | |
308 | get_dist_code tmp4, code4, code_len2, hufftables ;; clobbers dist, rcx | |
309 | %else | |
224ce89b | 310 | get_dist_code dist2, code4, code_len2, hufftables |
7c673cae FG |
311 | %endif |
312 | get_len_code len2, code, rcx, hufftables ;; rcx is code_len | |
313 | ||
224ce89b | 314 | SHLX code4, code4, rcx |
7c673cae FG |
315 | or code4, code |
316 | add code_len2, rcx | |
317 | ||
224ce89b WB |
318 | add f_i, len2 |
319 | neg len2 | |
7c673cae | 320 | |
224ce89b WB |
321 | MOVQ tmp5, xdata |
322 | shr tmp5, 24 | |
323 | compute_hash tmp4, tmp5 | |
324 | and tmp4, HASH_MASK | |
325 | ||
326 | SHLX code4, code4, code_len3 | |
7c673cae | 327 | or code4, code3 |
224ce89b | 328 | add code_len2, code_len3 |
7c673cae | 329 | |
7c673cae | 330 | ;; Setup for updating hash |
224ce89b | 331 | lea tmp3, [f_i + len2 + 1] ; tmp3 <= k |
7c673cae | 332 | |
224ce89b WB |
333 | MOVDQU xdata, [file_start + f_i] |
334 | mov curr_data, [file_start + f_i] | |
335 | mov curr_data2, curr_data | |
7c673cae | 336 | |
224ce89b WB |
337 | MOVD hash %+ d, xhash |
338 | PEXTRD hash2 %+ d, xhash, 1 | |
7c673cae FG |
339 | mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w |
340 | ||
224ce89b WB |
341 | compute_hash hash, curr_data |
342 | ||
7c673cae | 343 | add tmp3,1 |
224ce89b | 344 | mov [stream + _internal_state_head + 2 * hash2], tmp3 %+ w |
7c673cae | 345 | |
224ce89b WB |
346 | add tmp3, 1 |
347 | mov [stream + _internal_state_head + 2 * tmp4], tmp3 %+ w | |
348 | ||
349 | write_bits m_bits, m_bit_count, code4, code_len2, m_out_buf, tmp4 | |
350 | mov f_end_i, [rsp + f_end_i_mem_offset] | |
351 | ||
352 | shr curr_data2, 8 | |
353 | compute_hash hash2, curr_data2 | |
354 | ||
355 | %ifdef NO_LIMIT_HASH_UPDATE | |
356 | loop3: | |
357 | add tmp3,1 | |
358 | cmp tmp3, f_i | |
359 | jae loop3_done | |
360 | mov tmp6, [file_start + tmp3] | |
361 | compute_hash tmp4, tmp6 | |
362 | and tmp4 %+ d, HASH_MASK | |
363 | ; state->head[hash] = k; | |
364 | mov [stream + _internal_state_head + 2 * tmp4], tmp3 %+ w | |
365 | jmp loop3 | |
366 | loop3_done: | |
367 | %endif | |
368 | ; hash = compute_hash(state->file_start + f_i) & HASH_MASK; | |
369 | and hash %+ d, HASH_MASK | |
370 | and hash2 %+ d, HASH_MASK | |
371 | ||
372 | ; continue | |
373 | cmp f_i, f_end_i | |
374 | jl loop2 | |
375 | jmp input_end | |
7c673cae FG |
376 | ;; encode as dist/len |
377 | ||
224ce89b | 378 | MARK __body_len_dist_huffman_ %+ ARCH |
7c673cae FG |
379 | len_dist_huffman_pre: |
380 | bsf len, len | |
381 | shr len, 3 | |
224ce89b | 382 | |
7c673cae | 383 | len_dist_huffman: |
224ce89b WB |
384 | dec f_i |
385 | neg dist | |
7c673cae FG |
386 | |
387 | ; get_dist_code(dist, &code2, &code_len2); | |
388 | %ifndef LONGER_HUFFTABLE | |
389 | mov tmp3, dist ; since code2 and dist are rbx | |
390 | get_dist_code tmp3, code2, code_len2, hufftables ;; clobbers dist, rcx | |
391 | %else | |
224ce89b | 392 | get_dist_code dist, code2, code_len2, hufftables |
7c673cae FG |
393 | %endif |
394 | ; get_len_code(len, &code, &code_len); | |
395 | get_len_code len, code, rcx, hufftables ;; rcx is code_len | |
396 | ||
397 | ; code2 <<= code_len | |
398 | ; code2 |= code | |
399 | ; code_len2 += code_len | |
224ce89b | 400 | SHLX code2, code2, rcx |
7c673cae FG |
401 | or code2, code |
402 | add code_len2, rcx | |
403 | ||
404 | ;; Setup for updateing hash | |
405 | lea tmp3, [f_i + 2] ; tmp3 <= k | |
406 | add f_i, len | |
7c673cae | 407 | |
224ce89b WB |
408 | MOVD hash %+ d, xhash |
409 | PEXTRD hash2 %+ d, xhash, 1 | |
410 | mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w | |
411 | add tmp3,1 | |
412 | mov [stream + _internal_state_head + 2 * hash2], tmp3 %+ w | |
413 | ||
414 | MOVDQU xdata, [file_start + f_i] | |
415 | mov curr_data, [file_start + f_i] | |
7c673cae FG |
416 | mov curr_data2, curr_data |
417 | compute_hash hash, curr_data | |
7c673cae | 418 | |
224ce89b WB |
419 | write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp7 |
420 | mov f_end_i, [rsp + f_end_i_mem_offset] | |
7c673cae | 421 | |
224ce89b WB |
422 | shr curr_data2, 8 |
423 | compute_hash hash2, curr_data2 | |
424 | ||
425 | %ifdef NO_LIMIT_HASH_UPDATE | |
426 | loop4: | |
427 | add tmp3,1 | |
7c673cae | 428 | cmp tmp3, f_i |
224ce89b WB |
429 | jae loop4_done |
430 | mov tmp6, [file_start + tmp3] | |
431 | compute_hash tmp4, tmp6 | |
432 | and tmp4, HASH_MASK | |
433 | mov [stream + _internal_state_head + 2 * tmp4], tmp3 %+ w | |
434 | jmp loop4 | |
435 | loop4_done: | |
7c673cae FG |
436 | %endif |
437 | ||
224ce89b WB |
438 | ; hash = compute_hash(state->file_start + f_i) & HASH_MASK; |
439 | and hash %+ d, HASH_MASK | |
440 | and hash2 %+ d, HASH_MASK | |
7c673cae FG |
441 | |
442 | ; continue | |
443 | cmp f_i, f_end_i | |
444 | jl loop2 | |
224ce89b | 445 | jmp input_end |
7c673cae | 446 | |
224ce89b | 447 | MARK __body_write_lit_bits_ %+ ARCH |
7c673cae | 448 | write_lit_bits: |
224ce89b | 449 | MOVDQU xdata, [file_start + f_i + 1] |
7c673cae FG |
450 | mov f_end_i, [rsp + f_end_i_mem_offset] |
451 | add f_i, 1 | |
224ce89b | 452 | mov curr_data, [file_start + f_i] |
7c673cae | 453 | |
224ce89b | 454 | MOVD hash %+ d, xhash |
7c673cae FG |
455 | |
456 | write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp3 | |
457 | ||
224ce89b WB |
458 | PEXTRD hash2 %+ d, xhash, 1 |
459 | ||
7c673cae FG |
460 | ; continue |
461 | cmp f_i, f_end_i | |
462 | jl loop2 | |
463 | ||
224ce89b | 464 | input_end: |
7c673cae FG |
465 | mov tmp1, ZSTATE_FLUSH_READ_BUFFER |
466 | mov tmp5, ZSTATE_BODY | |
467 | cmp dword [stream + _end_of_stream], 0 | |
468 | cmovne tmp5, tmp1 | |
469 | cmp dword [stream + _flush], _NO_FLUSH | |
470 | cmovne tmp5, tmp1 | |
471 | mov dword [stream + _internal_state_state], tmp5 %+ d | |
224ce89b WB |
472 | |
473 | output_end: | |
474 | ;; update input buffer | |
475 | add f_end_i, LA | |
476 | mov [stream + _total_in], f_i %+ d | |
477 | add file_start, f_i | |
478 | mov [stream + _next_in], file_start | |
479 | sub f_end_i, f_i | |
480 | mov [stream + _avail_in], f_end_i %+ d | |
481 | ||
482 | ;; update output buffer | |
7c673cae | 483 | mov [stream + _next_out], m_out_buf |
7c673cae FG |
484 | sub m_out_buf, [stream + _internal_state_bitbuf_m_out_start] |
485 | sub [stream + _avail_out], m_out_buf %+ d | |
486 | add [stream + _total_out], m_out_buf %+ d | |
487 | ||
488 | mov [stream + _internal_state_bitbuf_m_bits], m_bits | |
489 | mov [stream + _internal_state_bitbuf_m_bit_count], m_bit_count %+ d | |
490 | ||
7c673cae FG |
491 | mov rbx, [rsp + gpr_save_mem_offset + 0*8] |
492 | mov rsi, [rsp + gpr_save_mem_offset + 1*8] | |
493 | mov rdi, [rsp + gpr_save_mem_offset + 2*8] | |
494 | mov rbp, [rsp + gpr_save_mem_offset + 3*8] | |
495 | mov r12, [rsp + gpr_save_mem_offset + 4*8] | |
496 | mov r13, [rsp + gpr_save_mem_offset + 5*8] | |
497 | mov r14, [rsp + gpr_save_mem_offset + 6*8] | |
498 | mov r15, [rsp + gpr_save_mem_offset + 7*8] | |
7c673cae FG |
499 | |
500 | %ifndef ALIGN_STACK | |
501 | add rsp, stack_size | |
502 | %else | |
503 | mov rsp, rbp | |
504 | pop rbp | |
505 | %endif | |
506 | ret | |
507 | ||
224ce89b | 508 | MARK __body_compare_loops_ %+ ARCH |
7c673cae | 509 | compare_loop: |
224ce89b WB |
510 | MOVD xhash, tmp6 %+ d |
511 | PINSRD xhash, tmp2 %+ d, 1 | |
512 | PAND xhash, xhash, xmask | |
513 | lea tmp2, [tmp1 + dist - 1] | |
7c673cae FG |
514 | %if (COMPARE_TYPE == 1) |
515 | compare250 tmp1, tmp2, len, tmp3 | |
516 | %elif (COMPARE_TYPE == 2) | |
517 | compare250_x tmp1, tmp2, len, tmp3, xtmp0, xtmp1 | |
518 | %elif (COMPARE_TYPE == 3) | |
519 | compare250_y tmp1, tmp2, len, tmp3, ytmp0, ytmp1 | |
520 | %else | |
521 | %error Unknown Compare type COMPARE_TYPE | |
522 | % error | |
523 | %endif | |
524 | jmp len_dist_huffman | |
525 | ||
526 | compare_loop2: | |
224ce89b WB |
527 | lea tmp2, [tmp1 + dist2] |
528 | add tmp1, 1 | |
7c673cae FG |
529 | %if (COMPARE_TYPE == 1) |
530 | compare250 tmp1, tmp2, len2, tmp3 | |
531 | %elif (COMPARE_TYPE == 2) | |
532 | compare250_x tmp1, tmp2, len2, tmp3, xtmp0, xtmp1 | |
533 | %elif (COMPARE_TYPE == 3) | |
534 | compare250_y tmp1, tmp2, len2, tmp3, ytmp0, ytmp1 | |
535 | %else | |
536 | %error Unknown Compare type COMPARE_TYPE | |
537 | % error | |
538 | %endif | |
539 | and curr_data, 0xff | |
540 | get_lit_code curr_data, code3, code_len3, hufftables | |
541 | jmp len_dist_lit_huffman | |
542 | ||
543 | MARK __write_first_byte_ %+ ARCH | |
544 | write_first_byte: | |
545 | cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end] | |
224ce89b WB |
546 | ja output_end |
547 | ||
548 | mov dword [stream + _internal_state_has_hist], 1 | |
7c673cae | 549 | |
7c673cae | 550 | mov [stream + _internal_state_head + 2 * hash], f_i %+ w |
224ce89b WB |
551 | |
552 | mov hash, hash2 | |
553 | shr tmp6, 16 | |
554 | compute_hash hash2, tmp6 | |
555 | ||
556 | MOVD xhash, hash %+ d | |
557 | PINSRD xhash, hash2 %+ d, 1 | |
558 | PAND xhash, xhash, xmask | |
559 | ||
7c673cae FG |
560 | and curr_data, 0xff |
561 | get_lit_code curr_data, code2, code_len2, hufftables | |
562 | jmp write_lit_bits | |
563 | ||
564 | section .data | |
224ce89b WB |
565 | align 16 |
566 | mask: dd HASH_MASK, HASH_MASK, HASH_MASK, HASH_MASK | |
7c673cae | 567 | const_D: dq D |