]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
2 | ; Copyright(c) 2011-2016 Intel Corporation All rights reserved. | |
3 | ; | |
4 | ; Redistribution and use in source and binary forms, with or without | |
5 | ; modification, are permitted provided that the following conditions | |
6 | ; are met: | |
7 | ; * Redistributions of source code must retain the above copyright | |
8 | ; notice, this list of conditions and the following disclaimer. | |
9 | ; * Redistributions in binary form must reproduce the above copyright | |
10 | ; notice, this list of conditions and the following disclaimer in | |
11 | ; the documentation and/or other materials provided with the | |
12 | ; distribution. | |
13 | ; * Neither the name of Intel Corporation nor the names of its | |
14 | ; contributors may be used to endorse or promote products derived | |
15 | ; from this software without specific prior written permission. | |
16 | ; | |
17 | ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
18 | ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
19 | ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
20 | ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
21 | ; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
22 | ; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
23 | ; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
24 | ; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
25 | ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
26 | ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
27 | ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
28 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
29 | ||
30 | %include "options.asm" | |
31 | %include "lz0a_const.asm" | |
32 | %include "data_struct2.asm" | |
33 | %include "bitbuf2.asm" | |
34 | %include "huffman.asm" | |
35 | %include "igzip_compare_types.asm" | |
36 | ||
37 | %include "stdmac.asm" | |
38 | %include "reg_sizes.asm" | |
39 | ||
40 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
41 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
42 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
43 | ||
224ce89b | 44 | %define curr_data rax |
7c673cae FG |
45 | %define tmp1 rax |
46 | ||
47 | %define f_index rbx | |
48 | %define code rbx | |
49 | %define tmp4 rbx | |
50 | %define tmp5 rbx | |
51 | %define tmp6 rbx | |
52 | ||
53 | %define tmp2 rcx | |
54 | %define hash rcx | |
55 | ||
56 | %define tmp3 rdx | |
57 | ||
58 | %define stream rsi | |
59 | ||
60 | %define f_i rdi | |
61 | ||
62 | %define code_len2 rbp | |
63 | ||
64 | %define m_out_buf r8 | |
65 | ||
66 | %define m_bits r9 | |
67 | ||
68 | %define dist r10 | |
69 | ||
70 | %define m_bit_count r11 | |
71 | ||
72 | %define code2 r12 | |
7c673cae FG |
73 | %define f_end_i r12 |
74 | ||
75 | %define file_start r13 | |
76 | ||
77 | %define len r14 | |
78 | ||
79 | %define hufftables r15 | |
80 | ||
81 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
82 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
83 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
84 | f_end_i_mem_offset equ 0 ; local variable (8 bytes) | |
85 | stack_size equ 8 | |
86 | ; void isal_deflate_finish ( isal_zstream *stream ) | |
87 | ; arg 1: rcx: addr of stream | |
88 | global isal_deflate_finish_01 | |
89 | isal_deflate_finish_01: | |
90 | PUSH_ALL rbx, rsi, rdi, rbp, r12, r13, r14, r15 | |
91 | sub rsp, stack_size | |
92 | ||
93 | %ifidn __OUTPUT_FORMAT__, elf64 | |
94 | mov rcx, rdi | |
95 | %endif | |
96 | ||
97 | mov stream, rcx | |
98 | ||
99 | ; state->bitbuf.set_buf(stream->next_out, stream->avail_out); | |
100 | mov m_out_buf, [stream + _next_out] | |
101 | mov [stream + _internal_state_bitbuf_m_out_start], m_out_buf | |
102 | mov tmp1 %+ d, [stream + _avail_out] | |
103 | add tmp1, m_out_buf | |
104 | sub tmp1, SLOP | |
105 | skip_SLOP: | |
106 | mov [stream + _internal_state_bitbuf_m_out_end], tmp1 | |
107 | ||
108 | mov m_bits, [stream + _internal_state_bitbuf_m_bits] | |
109 | mov m_bit_count %+ d, [stream + _internal_state_bitbuf_m_bit_count] | |
110 | ||
111 | mov hufftables, [stream + _hufftables] | |
112 | ||
224ce89b WB |
113 | mov file_start, [stream + _next_in] |
114 | ||
115 | mov f_i %+ d, dword [stream + _total_in] | |
116 | sub file_start, f_i | |
117 | ||
118 | mov f_end_i %+ d, dword [stream + _avail_in] | |
119 | add f_end_i, f_i | |
120 | ||
121 | sub f_end_i, LAST_BYTES_COUNT | |
7c673cae FG |
122 | mov [rsp + f_end_i_mem_offset], f_end_i |
123 | ; for (f_i = f_start_i; f_i < f_end_i; f_i++) { | |
124 | cmp f_i, f_end_i | |
125 | jge end_loop_2 | |
126 | ||
224ce89b WB |
127 | mov curr_data %+ d, [file_start + f_i] |
128 | ||
129 | cmp dword [stream + _internal_state_has_hist], 0 | |
130 | jne skip_write_first_byte | |
131 | ||
132 | cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end] | |
133 | ja end_loop_2 | |
134 | ||
135 | compute_hash hash, curr_data | |
136 | and hash %+ d, HASH_MASK | |
137 | mov [stream + _internal_state_head + 2 * hash], f_i %+ w | |
138 | mov dword [stream + _internal_state_has_hist], 1 | |
139 | jmp encode_literal | |
140 | ||
141 | skip_write_first_byte: | |
7c673cae FG |
142 | |
143 | loop2: | |
144 | ; if (state->bitbuf.is_full()) { | |
145 | cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end] | |
146 | ja end_loop_2 | |
147 | ||
148 | ; hash = compute_hash(state->file_start + f_i) & HASH_MASK; | |
224ce89b WB |
149 | mov curr_data %+ d, [file_start + f_i] |
150 | compute_hash hash, curr_data | |
7c673cae FG |
151 | and hash %+ d, HASH_MASK |
152 | ||
153 | ; f_index = state->head[hash]; | |
154 | movzx f_index %+ d, word [stream + _internal_state_head + 2 * hash] | |
155 | ||
156 | ; state->head[hash] = (uint16_t) f_i; | |
157 | mov [stream + _internal_state_head + 2 * hash], f_i %+ w | |
158 | ||
159 | ; dist = f_i - f_index; // mod 64k | |
160 | mov dist %+ d, f_i %+ d | |
161 | sub dist %+ d, f_index %+ d | |
162 | and dist %+ d, 0xFFFF | |
163 | ||
164 | ; if ((dist-1) <= (D-1)) { | |
165 | mov tmp1 %+ d, dist %+ d | |
166 | sub tmp1 %+ d, 1 | |
167 | cmp tmp1 %+ d, (D-1) | |
168 | jae encode_literal | |
169 | ||
170 | ; len = f_end_i - f_i; | |
171 | mov tmp4, [rsp + f_end_i_mem_offset] | |
172 | sub tmp4, f_i | |
224ce89b | 173 | add tmp4, LAST_BYTES_COUNT |
7c673cae FG |
174 | |
175 | ; if (len > 258) len = 258; | |
176 | cmp tmp4, 258 | |
177 | cmovg tmp4, [c258] | |
178 | ||
179 | ; len = compare(state->file_start + f_i, | |
180 | ; state->file_start + f_i - dist, len); | |
181 | lea tmp1, [file_start + f_i] | |
182 | mov tmp2, tmp1 | |
183 | sub tmp2, dist | |
184 | compare tmp4, tmp1, tmp2, len, tmp3 | |
185 | ||
186 | ; if (len >= SHORTEST_MATCH) { | |
187 | cmp len, SHORTEST_MATCH | |
188 | jb encode_literal | |
189 | ||
190 | ;; encode as dist/len | |
191 | ||
192 | ; get_dist_code(dist, &code2, &code_len2); | |
224ce89b | 193 | dec dist |
7c673cae FG |
194 | get_dist_code dist, code2, code_len2, hufftables ;; clobbers dist, rcx |
195 | ||
196 | ; get_len_code(len, &code, &code_len); | |
197 | get_len_code len, code, rcx, hufftables ;; rcx is code_len | |
198 | ||
199 | ; code2 <<= code_len | |
200 | ; code2 |= code | |
201 | ; code_len2 += code_len | |
224ce89b | 202 | SHLX code2, code2, rcx |
7c673cae FG |
203 | or code2, code |
204 | add code_len2, rcx | |
205 | ||
206 | ; for (k = f_i+1, f_i += len-1; k <= f_i; k++) { | |
207 | lea tmp3, [f_i + 1] ; tmp3 <= k | |
208 | add f_i, len | |
224ce89b WB |
209 | cmp f_i, [rsp + f_end_i_mem_offset] |
210 | jae skip_hash_update | |
211 | ||
7c673cae FG |
212 | ; only update hash twice |
213 | ||
214 | ; hash = compute_hash(state->file_start + k) & HASH_MASK; | |
224ce89b | 215 | mov tmp6 %+ d, dword [file_start + tmp3] |
7c673cae FG |
216 | compute_hash hash, tmp6 |
217 | and hash %+ d, HASH_MASK | |
218 | ; state->head[hash] = k; | |
219 | mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w | |
220 | ||
221 | add tmp3, 1 | |
222 | ||
223 | ; hash = compute_hash(state->file_start + k) & HASH_MASK; | |
224ce89b | 224 | mov tmp6 %+ d, dword [file_start + tmp3] |
7c673cae FG |
225 | compute_hash hash, tmp6 |
226 | and hash %+ d, HASH_MASK | |
227 | ; state->head[hash] = k; | |
228 | mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w | |
229 | ||
224ce89b | 230 | skip_hash_update: |
7c673cae FG |
231 | write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp5 |
232 | ||
233 | ; continue | |
234 | cmp f_i, [rsp + f_end_i_mem_offset] | |
235 | jl loop2 | |
236 | jmp end_loop_2 | |
237 | ||
238 | encode_literal: | |
7c673cae FG |
239 | ; get_lit_code(state->file_start[f_i], &code2, &code_len2); |
240 | movzx tmp5, byte [file_start + f_i] | |
241 | get_lit_code tmp5, code2, code_len2, hufftables | |
242 | ||
243 | write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp5 | |
244 | ||
245 | ; continue | |
246 | add f_i, 1 | |
247 | cmp f_i, [rsp + f_end_i_mem_offset] | |
248 | jl loop2 | |
249 | ||
250 | end_loop_2: | |
224ce89b WB |
251 | mov f_end_i, [rsp + f_end_i_mem_offset] |
252 | add f_end_i, LAST_BYTES_COUNT | |
253 | mov [rsp + f_end_i_mem_offset], f_end_i | |
7c673cae | 254 | ; if ((f_i >= f_end_i) && ! state->bitbuf.is_full()) { |
224ce89b WB |
255 | cmp f_i, f_end_i |
256 | jge write_eob | |
257 | ||
258 | xor tmp5, tmp5 | |
259 | final_bytes: | |
7c673cae FG |
260 | cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end] |
261 | ja not_end | |
224ce89b WB |
262 | movzx tmp5, byte [file_start + f_i] |
263 | get_lit_code tmp5, code2, code_len2, hufftables | |
264 | write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp3 | |
7c673cae | 265 | |
224ce89b WB |
266 | inc f_i |
267 | cmp f_i, [rsp + f_end_i_mem_offset] | |
268 | jl final_bytes | |
269 | ||
270 | write_eob: | |
271 | cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end] | |
272 | ja not_end | |
7c673cae | 273 | |
7c673cae FG |
274 | ; get_lit_code(256, &code2, &code_len2); |
275 | get_lit_code 256, code2, code_len2, hufftables | |
276 | ||
277 | write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp1 | |
278 | ||
279 | mov dword [stream + _internal_state_has_eob], 1 | |
280 | cmp dword [stream + _end_of_stream], 1 | |
281 | jne sync_flush | |
282 | ; state->state = ZSTATE_TRL; | |
283 | mov dword [stream + _internal_state_state], ZSTATE_TRL | |
284 | jmp not_end | |
285 | ||
286 | sync_flush: | |
287 | ; state->state = ZSTATE_SYNC_FLUSH; | |
288 | mov dword [stream + _internal_state_state], ZSTATE_SYNC_FLUSH | |
289 | ; } | |
290 | not_end: | |
291 | ||
7c673cae | 292 | |
224ce89b WB |
293 | ;; Update input buffer |
294 | mov f_end_i, [rsp + f_end_i_mem_offset] | |
295 | mov [stream + _total_in], f_i %+ d | |
296 | add file_start, f_i | |
297 | mov [stream + _next_in], file_start | |
298 | sub f_end_i, f_i | |
299 | mov [stream + _avail_in], f_end_i %+ d | |
300 | ||
301 | ;; Update output buffer | |
7c673cae FG |
302 | mov [stream + _next_out], m_out_buf |
303 | ; len = state->bitbuf.buffer_used(); | |
304 | sub m_out_buf, [stream + _internal_state_bitbuf_m_out_start] | |
305 | ||
306 | ; stream->avail_out -= len; | |
307 | sub [stream + _avail_out], m_out_buf %+ d | |
308 | ; stream->total_out += len; | |
309 | add [stream + _total_out], m_out_buf %+ d | |
310 | ||
311 | mov [stream + _internal_state_bitbuf_m_bits], m_bits | |
312 | mov [stream + _internal_state_bitbuf_m_bit_count], m_bit_count %+ d | |
313 | add rsp, stack_size | |
314 | POP_ALL | |
315 | ret | |
316 | ||
317 | section .data | |
318 | align 4 | |
319 | c258: dq 258 |