]> git.proxmox.com Git - ceph.git/blob - ceph/src/isa-l/igzip/igzip_icf_finish.asm
update sources to v12.1.1
[ceph.git] / ceph / src / isa-l / igzip / igzip_icf_finish.asm
1 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2 ; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
3 ;
4 ; Redistribution and use in source and binary forms, with or without
5 ; modification, are permitted provided that the following conditions
6 ; are met:
7 ; * Redistributions of source code must retain the above copyright
8 ; notice, this list of conditions and the following disclaimer.
9 ; * Redistributions in binary form must reproduce the above copyright
10 ; notice, this list of conditions and the following disclaimer in
11 ; the documentation and/or other materials provided with the
12 ; distribution.
13 ; * Neither the name of Intel Corporation nor the names of its
14 ; contributors may be used to endorse or promote products derived
15 ; from this software without specific prior written permission.
16 ;
17 ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 ; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 ; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 ; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 ; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30 %include "options.asm"
31 %include "lz0a_const.asm"
32 %include "data_struct2.asm"
33 %include "bitbuf2.asm"
34 %include "huffman.asm"
35 %include "igzip_compare_types.asm"
36
37 %include "stdmac.asm"
38 %include "reg_sizes.asm"
39
40 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
41 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
42 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
43
44 %define curr_data rax
45 %define tmp1 rax
46
47 %define f_index rbx
48 %define code rbx
49 %define tmp4 rbx
50 %define tmp5 rbx
51 %define tmp6 rbx
52
53 %define tmp2 rcx
54 %define hash rcx
55
56 %define tmp3 rdx
57
58 %define stream rsi
59
60 %define f_i rdi
61
62 %define code_len2 rbp
63
64 %define m_out_buf r8
65
66 %define dist r10
67
68 %define code2 r12
69 %define f_end_i r12
70
71 %define file_start r13
72
73 %define len r14
74
75 %define hufftables r15
76
77 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
78 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
79 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
80 f_end_i_mem_offset equ 0 ; local variable (8 bytes)
81 m_out_end equ 8
82 m_out_start equ 16
83 stack_size equ 32
84 ; void isal_deflate_icf_finish ( isal_zstream *stream )
85 ; arg 1: rcx: addr of stream
86 global isal_deflate_icf_finish_01
87 isal_deflate_icf_finish_01:
88 PUSH_ALL rbx, rsi, rdi, rbp, r12, r13, r14, r15
89 sub rsp, stack_size
90
91 %ifidn __OUTPUT_FORMAT__, win64
92 mov stream, rcx
93 %else
94 mov stream, rdi
95 %endif
96
97 ; state->bitbuf.set_buf(stream->next_out, stream->avail_out);
98 mov tmp1, [stream + _level_buf]
99 mov m_out_buf, [tmp1 + _icf_buf_next]
100 mov [rsp + m_out_start], m_out_buf
101 mov tmp1, [tmp1 + _icf_buf_avail_out]
102 add tmp1, m_out_buf
103 sub tmp1, 4
104
105 mov [rsp + m_out_end], tmp1
106
107 mov hufftables, [stream + _hufftables]
108
109 mov file_start, [stream + _next_in]
110
111 mov f_i %+ d, dword [stream + _total_in]
112 sub file_start, f_i
113
114 mov f_end_i %+ d, dword [stream + _avail_in]
115 add f_end_i, f_i
116
117 sub f_end_i, LAST_BYTES_COUNT
118 mov [rsp + f_end_i_mem_offset], f_end_i
119 ; for (f_i = f_start_i; f_i < f_end_i; f_i++) {
120 cmp f_i, f_end_i
121 jge end_loop_2
122
123 mov curr_data %+ d, [file_start + f_i]
124
125 cmp dword [stream + _internal_state_has_hist], 0
126 jne skip_write_first_byte
127
128 cmp m_out_buf, [rsp + m_out_end]
129 ja end_loop_2
130
131 compute_hash hash, curr_data
132 and hash %+ d, HASH_MASK
133 mov [stream + _internal_state_head + 2 * hash], f_i %+ w
134 mov dword [stream + _internal_state_has_hist], 1
135 jmp encode_literal
136
137 skip_write_first_byte:
138
139 loop2:
140 ; if (state->bitbuf.is_full()) {
141 cmp m_out_buf, [rsp + m_out_end]
142 ja end_loop_2
143
144 ; hash = compute_hash(state->file_start + f_i) & HASH_MASK;
145 mov curr_data %+ d, [file_start + f_i]
146 compute_hash hash, curr_data
147 and hash %+ d, HASH_MASK
148
149 ; f_index = state->head[hash];
150 movzx f_index %+ d, word [stream + _internal_state_head + 2 * hash]
151
152 ; state->head[hash] = (uint16_t) f_i;
153 mov [stream + _internal_state_head + 2 * hash], f_i %+ w
154
155 ; dist = f_i - f_index; // mod 64k
156 mov dist %+ d, f_i %+ d
157 sub dist %+ d, f_index %+ d
158 and dist %+ d, 0xFFFF
159
160 ; if ((dist-1) <= (D-1)) {
161 mov tmp1 %+ d, dist %+ d
162 sub tmp1 %+ d, 1
163 cmp tmp1 %+ d, (D-1)
164 jae encode_literal
165
166 ; len = f_end_i - f_i;
167 mov tmp4, [rsp + f_end_i_mem_offset]
168 sub tmp4, f_i
169 add tmp4, LAST_BYTES_COUNT
170
171 ; if (len > 258) len = 258;
172 cmp tmp4, 258
173 cmovg tmp4, [c258]
174
175 ; len = compare(state->file_start + f_i,
176 ; state->file_start + f_i - dist, len);
177 lea tmp1, [file_start + f_i]
178 mov tmp2, tmp1
179 sub tmp2, dist
180 compare tmp4, tmp1, tmp2, len, tmp3
181
182 ; if (len >= SHORTEST_MATCH) {
183 cmp len, SHORTEST_MATCH
184 jb encode_literal
185
186 ;; encode as dist/len
187
188 ; get_dist_code(dist, &code2, &code_len2);
189 dec dist
190 get_dist_icf_code dist, code2, tmp3 ;; clobbers dist, rcx
191
192 ;; get_len_code
193 lea code, [len + 254]
194
195 or code2, code
196 inc word [stream + _internal_state_hist_lit_len + HIST_ELEM_SIZE*code]
197
198 ; for (k = f_i+1, f_i += len-1; k <= f_i; k++) {
199 lea tmp3, [f_i + 1] ; tmp3 <= k
200 add f_i, len
201 cmp f_i, [rsp + f_end_i_mem_offset]
202 jae skip_hash_update
203
204 ; only update hash twice
205
206 ; hash = compute_hash(state->file_start + k) & HASH_MASK;
207 mov tmp6 %+ d, dword [file_start + tmp3]
208 compute_hash hash, tmp6
209 and hash %+ d, HASH_MASK
210 ; state->head[hash] = k;
211 mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
212
213 add tmp3, 1
214
215 ; hash = compute_hash(state->file_start + k) & HASH_MASK;
216 mov tmp6 %+ d, dword [file_start + tmp3]
217 compute_hash hash, tmp6
218 and hash %+ d, HASH_MASK
219 ; state->head[hash] = k;
220 mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
221
222 skip_hash_update:
223 write_dword code2, m_out_buf
224 shr code2, DIST_OFFSET
225 and code2, 0x1F
226 inc word [stream + _internal_state_hist_dist + HIST_ELEM_SIZE*code2]
227 ; continue
228 cmp f_i, [rsp + f_end_i_mem_offset]
229 jl loop2
230 jmp end_loop_2
231
232 encode_literal:
233 ; get_lit_code(state->file_start[f_i], &code2, &code_len2);
234 movzx tmp5, byte [file_start + f_i]
235 inc word [stream + _internal_state_hist_lit_len + HIST_ELEM_SIZE*tmp5]
236 or tmp5, LIT
237 write_dword tmp5, m_out_buf
238 ; continue
239 add f_i, 1
240 cmp f_i, [rsp + f_end_i_mem_offset]
241 jl loop2
242
243 end_loop_2:
244 mov f_end_i, [rsp + f_end_i_mem_offset]
245 add f_end_i, LAST_BYTES_COUNT
246 mov [rsp + f_end_i_mem_offset], f_end_i
247 ; if ((f_i >= f_end_i) && ! state->bitbuf.is_full()) {
248 cmp f_i, f_end_i
249 jge input_end
250
251 xor tmp5, tmp5
252 final_bytes:
253 cmp m_out_buf, [rsp + m_out_end]
254 ja out_end
255
256 movzx tmp5, byte [file_start + f_i]
257 inc word [stream + _internal_state_hist_lit_len + HIST_ELEM_SIZE*tmp5]
258 or tmp5, LIT
259 write_dword tmp5, m_out_buf
260
261 inc f_i
262 cmp f_i, [rsp + f_end_i_mem_offset]
263 jl final_bytes
264
265 input_end:
266 cmp dword [stream + _end_of_stream], 0
267 jne out_end
268 cmp dword [stream + _flush], _NO_FLUSH
269 jne out_end
270 jmp end
271
272 out_end:
273 mov dword [stream + _internal_state_state], ZSTATE_CREATE_HDR
274 end:
275 ;; Update input buffer
276 mov f_end_i, [rsp + f_end_i_mem_offset]
277 mov [stream + _total_in], f_i %+ d
278 add file_start, f_i
279 mov [stream + _next_in], file_start
280 sub f_end_i, f_i
281 mov [stream + _avail_in], f_end_i %+ d
282
283 ;; Update output buffer
284 mov tmp1, [stream + _level_buf]
285 mov [tmp1 + _icf_buf_next], m_out_buf
286
287 ; len = state->bitbuf.buffer_used();
288 sub m_out_buf, [rsp + m_out_start]
289
290 ; stream->avail_out -= len;
291 sub [tmp1 + _icf_buf_avail_out], m_out_buf
292
293 add rsp, stack_size
294 POP_ALL
295 ret
296
297 section .data
298 align 4
299 c258: dq 258