1 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2 ; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
4 ; Redistribution and use in source and binary forms, with or without
5 ; modification, are permitted provided that the following conditions
7 ; * Redistributions of source code must retain the above copyright
8 ; notice, this list of conditions and the following disclaimer.
9 ; * Redistributions in binary form must reproduce the above copyright
10 ; notice, this list of conditions and the following disclaimer in
11 ; the documentation and/or other materials provided with the
13 ; * Neither the name of Intel Corporation nor the names of its
14 ; contributors may be used to endorse or promote products derived
15 ; from this software without specific prior written permission.
17 ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 ; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 ; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 ; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 ; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
30 %include "options.asm"
31 %include "lz0a_const.asm"
32 %include "data_struct2.asm"
33 %include "bitbuf2.asm"
34 %include "huffman.asm"
35 %include "igzip_compare_types.asm"
38 %include "reg_sizes.asm"
40 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
41 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
42 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
75 %define file_start r13
79 %define hufftables r15
81 %define hash_table level_buf + _hash8k_hash_table
82 %define lit_len_hist level_buf + _hist_lit_len
83 %define dist_hist level_buf + _hist_dist
85 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
86 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
87 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
88 f_end_i_mem_offset equ 0 ; local variable (8 bytes)
91 dist_mask_offset equ 24
92 hash_mask_offset equ 32
95 %xdefine METHOD hash_hist
101 ; void isal_deflate_icf_finish ( isal_zstream *stream )
102 ; arg 1: rcx: addr of stream
103 global isal_deflate_icf_finish_ %+ METHOD %+ _01
104 isal_deflate_icf_finish_ %+ METHOD %+ _01:
106 PUSH_ALL rbx, rsi, rdi, rbp, r12, r13, r14, r15
109 %ifidn __OUTPUT_FORMAT__, win64
115 ; state->bitbuf.set_buf(stream->next_out, stream->avail_out);
116 mov tmp2 %+ d, dword [stream + _internal_state_dist_mask]
117 mov tmp3 %+ d, dword [stream + _internal_state_hash_mask]
118 mov level_buf, [stream + _level_buf]
119 mov m_out_buf, [level_buf + _icf_buf_next]
120 mov [rsp + m_out_start], m_out_buf
121 mov tmp1, [level_buf + _icf_buf_avail_out]
125 mov [rsp + dist_mask_offset], tmp2
126 mov [rsp + hash_mask_offset], tmp3
127 mov [rsp + m_out_end], tmp1
129 mov hufftables, [stream + _hufftables]
131 mov file_start, [stream + _next_in]
133 mov f_i %+ d, dword [stream + _total_in]
136 mov f_end_i %+ d, dword [stream + _avail_in]
139 sub f_end_i, LAST_BYTES_COUNT
140 mov [rsp + f_end_i_mem_offset], f_end_i
141 ; for (f_i = f_start_i; f_i < f_end_i; f_i++) {
145 mov curr_data %+ d, [file_start + f_i]
147 cmp byte [stream + _internal_state_has_hist], IGZIP_NO_HIST
148 jne .skip_write_first_byte
150 cmp m_out_buf, [rsp + m_out_end]
153 mov hmask1 %+ d, [rsp + hash_mask_offset]
154 compute_hash hash, curr_data
155 and hash %+ d, hmask1 %+ d
156 mov [hash_table + 2 * hash], f_i %+ w
157 mov byte [stream + _internal_state_has_hist], IGZIP_HIST
160 .skip_write_first_byte:
163 mov tmp3 %+ d, [rsp + dist_mask_offset]
164 mov hmask1 %+ d, [rsp + hash_mask_offset]
165 ; if (state->bitbuf.is_full()) {
166 cmp m_out_buf, [rsp + m_out_end]
169 ; hash = compute_hash(state->file_start + f_i) & hash_mask;
170 mov curr_data %+ d, [file_start + f_i]
171 compute_hash hash, curr_data
172 and hash %+ d, hmask1 %+ d
174 ; f_index = state->head[hash];
175 movzx f_index %+ d, word [hash_table + 2 * hash]
177 ; state->head[hash] = (uint16_t) f_i;
178 mov [hash_table + 2 * hash], f_i %+ w
180 ; dist = f_i - f_index; // mod 64k
181 mov dist %+ d, f_i %+ d
182 sub dist %+ d, f_index %+ d
183 and dist %+ d, 0xFFFF
185 ; if ((dist-1) <= (D-1)) {
186 mov tmp1 %+ d, dist %+ d
188 cmp tmp1 %+ d, tmp3 %+ d
191 ; len = f_end_i - f_i;
192 mov tmp4, [rsp + f_end_i_mem_offset]
194 add tmp4, LAST_BYTES_COUNT
196 ; if (len > 258) len = 258;
200 ; len = compare(state->file_start + f_i,
201 ; state->file_start + f_i - dist, len);
202 lea tmp1, [file_start + f_i]
205 compare tmp4, tmp1, tmp2, len, tmp3
207 ; if (len >= SHORTEST_MATCH) {
208 cmp len, SHORTEST_MATCH
211 ;; encode as dist/len
213 ; get_dist_code(dist, &code2, &code_len2);
215 get_dist_icf_code dist, code2, tmp3 ;; clobbers dist, rcx
218 lea code, [len + 254]
220 mov hmask2 %+ d, [rsp + hash_mask_offset]
223 inc dword [lit_len_hist + HIST_ELEM_SIZE*code]
225 ; for (k = f_i+1, f_i += len-1; k <= f_i; k++) {
226 lea tmp3, [f_i + 1] ; tmp3 <= k
228 cmp f_i, [rsp + f_end_i_mem_offset]
229 jae .skip_hash_update
231 ; only update hash twice
233 ; hash = compute_hash(state->file_start + k) & hash_mask;
234 mov tmp6 %+ d, dword [file_start + tmp3]
235 compute_hash hash, tmp6
236 and hash %+ d, hmask2 %+ d
237 ; state->head[hash] = k;
238 mov [hash_table + 2 * hash], tmp3 %+ w
242 ; hash = compute_hash(state->file_start + k) & hash_mask;
243 mov tmp6 %+ d, dword [file_start + tmp3]
244 compute_hash hash, tmp6
245 and hash %+ d, hmask2 %+ d
246 ; state->head[hash] = k;
247 mov [hash_table + 2 * hash], tmp3 %+ w
250 write_dword code2, m_out_buf
251 shr code2, DIST_OFFSET
253 inc dword [dist_hist + HIST_ELEM_SIZE*code2]
255 cmp f_i, [rsp + f_end_i_mem_offset]
260 ; get_lit_code(state->file_start[f_i], &code2, &code_len2);
261 movzx tmp5, byte [file_start + f_i]
262 inc dword [lit_len_hist + HIST_ELEM_SIZE*tmp5]
264 write_dword tmp5, m_out_buf
267 cmp f_i, [rsp + f_end_i_mem_offset]
271 mov f_end_i, [rsp + f_end_i_mem_offset]
272 add f_end_i, LAST_BYTES_COUNT
273 mov [rsp + f_end_i_mem_offset], f_end_i
274 ; if ((f_i >= f_end_i) && ! state->bitbuf.is_full()) {
280 cmp m_out_buf, [rsp + m_out_end]
283 movzx tmp5, byte [file_start + f_i]
284 inc dword [lit_len_hist + HIST_ELEM_SIZE*tmp5]
286 write_dword tmp5, m_out_buf
289 cmp f_i, [rsp + f_end_i_mem_offset]
293 cmp word [stream + _end_of_stream], 0
295 cmp word [stream + _flush], _NO_FLUSH
300 mov dword [stream + _internal_state_state], ZSTATE_CREATE_HDR
302 ;; Update input buffer
303 mov f_end_i, [rsp + f_end_i_mem_offset]
304 mov [stream + _total_in], f_i %+ d
305 mov [stream + _internal_state_block_end], f_i %+ d
308 mov [stream + _next_in], file_start
310 mov [stream + _avail_in], f_end_i %+ d
312 ;; Update output buffer
313 mov [level_buf + _icf_buf_next], m_out_buf
315 ; len = state->bitbuf.buffer_used();
316 sub m_out_buf, [rsp + m_out_start]
318 ; stream->avail_out -= len;
319 sub [level_buf + _icf_buf_avail_out], m_out_buf