]> git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/isa-l/igzip/igzip_finish.asm
import 15.2.0 Octopus source
[ceph.git] / ceph / src / spdk / isa-l / igzip / igzip_finish.asm
1 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2 ; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
3 ;
4 ; Redistribution and use in source and binary forms, with or without
5 ; modification, are permitted provided that the following conditions
6 ; are met:
7 ; * Redistributions of source code must retain the above copyright
8 ; notice, this list of conditions and the following disclaimer.
9 ; * Redistributions in binary form must reproduce the above copyright
10 ; notice, this list of conditions and the following disclaimer in
11 ; the documentation and/or other materials provided with the
12 ; distribution.
13 ; * Neither the name of Intel Corporation nor the names of its
14 ; contributors may be used to endorse or promote products derived
15 ; from this software without specific prior written permission.
16 ;
17 ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 ; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 ; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 ; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 ; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30 %include "options.asm"
31 %include "lz0a_const.asm"
32 %include "data_struct2.asm"
33 %include "bitbuf2.asm"
34 %include "huffman.asm"
35 %include "igzip_compare_types.asm"
36
37 %include "stdmac.asm"
38 %include "reg_sizes.asm"
39
40 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
41 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
42 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
43
44 %define curr_data rax
45 %define tmp1 rax
46
47 %define f_index rbx
48 %define code rbx
49 %define tmp4 rbx
50 %define tmp5 rbx
51 %define tmp6 rbx
52
53 %define tmp2 rcx
54 %define hash rcx
55
56 %define tmp3 rdx
57
58 %define stream rsi
59
60 %define f_i rdi
61
62 %define code_len2 rbp
63 %define hmask1 rbp
64
65 %define m_out_buf r8
66
67 %define m_bits r9
68
69 %define dist r10
70 %define hmask2 r10
71
72 %define m_bit_count r11
73
74 %define code2 r12
75 %define f_end_i r12
76
77 %define file_start r13
78
79 %define len r14
80
81 %define hufftables r15
82
83 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
84 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
85 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
86 f_end_i_mem_offset equ 0 ; local variable (8 bytes)
87 stack_size equ 8
88 ; void isal_deflate_finish ( isal_zstream *stream )
89 ; arg 1: rcx: addr of stream
90 global isal_deflate_finish_01
91 isal_deflate_finish_01:
92 PUSH_ALL rbx, rsi, rdi, rbp, r12, r13, r14, r15
93 sub rsp, stack_size
94
95 %ifidn __OUTPUT_FORMAT__, elf64
96 mov rcx, rdi
97 %endif
98
99 mov stream, rcx
100
101 ; state->bitbuf.set_buf(stream->next_out, stream->avail_out);
102 mov m_out_buf, [stream + _next_out]
103 mov [stream + _internal_state_bitbuf_m_out_start], m_out_buf
104 mov tmp1 %+ d, [stream + _avail_out]
105 add tmp1, m_out_buf
106 sub tmp1, SLOP
107 skip_SLOP:
108 mov [stream + _internal_state_bitbuf_m_out_end], tmp1
109
110 mov m_bits, [stream + _internal_state_bitbuf_m_bits]
111 mov m_bit_count %+ d, [stream + _internal_state_bitbuf_m_bit_count]
112
113 mov hufftables, [stream + _hufftables]
114
115 mov file_start, [stream + _next_in]
116
117 mov f_i %+ d, dword [stream + _total_in]
118 sub file_start, f_i
119
120 mov f_end_i %+ d, dword [stream + _avail_in]
121 add f_end_i, f_i
122
123 sub f_end_i, LAST_BYTES_COUNT
124 mov [rsp + f_end_i_mem_offset], f_end_i
125 ; for (f_i = f_start_i; f_i < f_end_i; f_i++) {
126 cmp f_i, f_end_i
127 jge end_loop_2
128
129 mov curr_data %+ d, [file_start + f_i]
130
131 cmp byte [stream + _internal_state_has_hist], IGZIP_NO_HIST
132 jne skip_write_first_byte
133
134 cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
135 ja end_loop_2
136 mov hmask1 %+ d, dword [stream + _internal_state_hash_mask]
137 compute_hash hash, curr_data
138 and hash %+ d, hmask1 %+ d
139 mov [stream + _internal_state_head + 2 * hash], f_i %+ w
140 mov byte [stream + _internal_state_has_hist], IGZIP_HIST
141 jmp encode_literal
142
143 skip_write_first_byte:
144
145 loop2:
146 mov tmp3 %+ d, dword [stream + _internal_state_dist_mask]
147 mov hmask1 %+ d, dword [stream + _internal_state_hash_mask]
148 ; if (state->bitbuf.is_full()) {
149 cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
150 ja end_loop_2
151
152 ; hash = compute_hash(state->file_start + f_i) & hash_mask;
153 mov curr_data %+ d, [file_start + f_i]
154 compute_hash hash, curr_data
155 and hash %+ d, hmask1 %+ d
156
157 ; f_index = state->head[hash];
158 movzx f_index %+ d, word [stream + _internal_state_head + 2 * hash]
159
160 ; state->head[hash] = (uint16_t) f_i;
161 mov [stream + _internal_state_head + 2 * hash], f_i %+ w
162
163 ; dist = f_i - f_index; // mod 64k
164 mov dist %+ d, f_i %+ d
165 sub dist %+ d, f_index %+ d
166 and dist %+ d, 0xFFFF
167
168 ; if ((dist-1) <= (D-1)) {
169 mov tmp1 %+ d, dist %+ d
170 sub tmp1 %+ d, 1
171 cmp tmp1 %+ d, tmp3 %+ d
172 jae encode_literal
173
174 ; len = f_end_i - f_i;
175 mov tmp4, [rsp + f_end_i_mem_offset]
176 sub tmp4, f_i
177 add tmp4, LAST_BYTES_COUNT
178
179 ; if (len > 258) len = 258;
180 cmp tmp4, 258
181 cmovg tmp4, [c258]
182
183 ; len = compare(state->file_start + f_i,
184 ; state->file_start + f_i - dist, len);
185 lea tmp1, [file_start + f_i]
186 mov tmp2, tmp1
187 sub tmp2, dist
188 compare tmp4, tmp1, tmp2, len, tmp3
189
190 ; if (len >= SHORTEST_MATCH) {
191 cmp len, SHORTEST_MATCH
192 jb encode_literal
193
194 ;; encode as dist/len
195
196 ; get_dist_code(dist, &code2, &code_len2);
197 dec dist
198 get_dist_code dist, code2, code_len2, hufftables ;; clobbers dist, rcx
199
200 ; get_len_code(len, &code, &code_len);
201 get_len_code len, code, rcx, hufftables ;; rcx is code_len
202
203 mov hmask2 %+ d, dword [stream + _internal_state_hash_mask]
204 ; code2 <<= code_len
205 ; code2 |= code
206 ; code_len2 += code_len
207 SHLX code2, code2, rcx
208 or code2, code
209 add code_len2, rcx
210
211 ; for (k = f_i+1, f_i += len-1; k <= f_i; k++) {
212 lea tmp3, [f_i + 1] ; tmp3 <= k
213 add f_i, len
214 cmp f_i, [rsp + f_end_i_mem_offset]
215 jae skip_hash_update
216
217 ; only update hash twice
218
219 ; hash = compute_hash(state->file_start + k) & hash_mask;
220 mov tmp6 %+ d, dword [file_start + tmp3]
221 compute_hash hash, tmp6
222 and hash %+ d, hmask2 %+ d
223 ; state->head[hash] = k;
224 mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
225
226 add tmp3, 1
227
228 ; hash = compute_hash(state->file_start + k) & hash_mask;
229 mov tmp6 %+ d, dword [file_start + tmp3]
230 compute_hash hash, tmp6
231 and hash %+ d, hmask2 %+ d
232 ; state->head[hash] = k;
233 mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
234
235 skip_hash_update:
236 write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf
237
238 ; continue
239 cmp f_i, [rsp + f_end_i_mem_offset]
240 jl loop2
241 jmp end_loop_2
242
243 encode_literal:
244 ; get_lit_code(state->file_start[f_i], &code2, &code_len2);
245 movzx tmp5, byte [file_start + f_i]
246 get_lit_code tmp5, code2, code_len2, hufftables
247
248 write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf
249
250 ; continue
251 add f_i, 1
252 cmp f_i, [rsp + f_end_i_mem_offset]
253 jl loop2
254
255 end_loop_2:
256 mov f_end_i, [rsp + f_end_i_mem_offset]
257 add f_end_i, LAST_BYTES_COUNT
258 mov [rsp + f_end_i_mem_offset], f_end_i
259 ; if ((f_i >= f_end_i) && ! state->bitbuf.is_full()) {
260 cmp f_i, f_end_i
261 jge write_eob
262
263 xor tmp5, tmp5
264 final_bytes:
265 cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
266 ja not_end
267 movzx tmp5, byte [file_start + f_i]
268 get_lit_code tmp5, code2, code_len2, hufftables
269 write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf
270
271 inc f_i
272 cmp f_i, [rsp + f_end_i_mem_offset]
273 jl final_bytes
274
275 write_eob:
276 cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
277 ja not_end
278
279 ; get_lit_code(256, &code2, &code_len2);
280 get_lit_code 256, code2, code_len2, hufftables
281
282 write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf
283
284 mov byte [stream + _internal_state_has_eob], 1
285 cmp word [stream + _end_of_stream], 1
286 jne sync_flush
287 ; state->state = ZSTATE_TRL;
288 mov dword [stream + _internal_state_state], ZSTATE_TRL
289 jmp not_end
290
291 sync_flush:
292 ; state->state = ZSTATE_SYNC_FLUSH;
293 mov dword [stream + _internal_state_state], ZSTATE_SYNC_FLUSH
294 ; }
295 not_end:
296
297
298 ;; Update input buffer
299 mov f_end_i, [rsp + f_end_i_mem_offset]
300 mov [stream + _total_in], f_i %+ d
301 add file_start, f_i
302 mov [stream + _next_in], file_start
303 sub f_end_i, f_i
304 mov [stream + _avail_in], f_end_i %+ d
305
306 ;; Update output buffer
307 mov [stream + _next_out], m_out_buf
308 ; len = state->bitbuf.buffer_used();
309 sub m_out_buf, [stream + _internal_state_bitbuf_m_out_start]
310
311 ; stream->avail_out -= len;
312 sub [stream + _avail_out], m_out_buf %+ d
313 ; stream->total_out += len;
314 add [stream + _total_out], m_out_buf %+ d
315
316 mov [stream + _internal_state_bitbuf_m_bits], m_bits
317 mov [stream + _internal_state_bitbuf_m_bit_count], m_bit_count %+ d
318 add rsp, stack_size
319 POP_ALL
320 ret
321
322 section .data
323 align 4
324 c258: dq 258