]> git.proxmox.com Git - ceph.git/blob - ceph/src/isa-l/igzip/igzip_icf_finish.asm
bump version to 18.2.4-pve3
[ceph.git] / ceph / src / isa-l / igzip / igzip_icf_finish.asm
1 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2 ; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
3 ;
4 ; Redistribution and use in source and binary forms, with or without
5 ; modification, are permitted provided that the following conditions
6 ; are met:
7 ; * Redistributions of source code must retain the above copyright
8 ; notice, this list of conditions and the following disclaimer.
9 ; * Redistributions in binary form must reproduce the above copyright
10 ; notice, this list of conditions and the following disclaimer in
11 ; the documentation and/or other materials provided with the
12 ; distribution.
13 ; * Neither the name of Intel Corporation nor the names of its
14 ; contributors may be used to endorse or promote products derived
15 ; from this software without specific prior written permission.
16 ;
17 ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 ; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 ; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 ; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 ; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30 %include "options.asm"
31 %include "lz0a_const.asm"
32 %include "data_struct2.asm"
33 %include "bitbuf2.asm"
34 %include "huffman.asm"
35 %include "igzip_compare_types.asm"
36
37 %include "stdmac.asm"
38 %include "reg_sizes.asm"
39
40 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
41 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
42 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
43
44 %define curr_data rax
45 %define tmp1 rax
46
47 %define f_index rbx
48 %define code rbx
49 %define tmp4 rbx
50 %define tmp5 rbx
51 %define tmp6 rbx
52
53 %define tmp2 rcx
54 %define hash rcx
55
56 %define tmp3 rdx
57
58 %define stream rsi
59
60 %define f_i rdi
61
62 %define code_len2 rbp
63 %define hmask1 rbp
64
65 %define m_out_buf r8
66
67 %define level_buf r9
68
69 %define dist r10
70 %define hmask2 r10
71
72 %define code2 r12
73 %define f_end_i r12
74
75 %define file_start r13
76
77 %define len r14
78
79 %define hufftables r15
80
81 %define hash_table level_buf + _hash8k_hash_table
82 %define lit_len_hist level_buf + _hist_lit_len
83 %define dist_hist level_buf + _hist_dist
84
85 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
86 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
87 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
88 f_end_i_mem_offset equ 0 ; local variable (8 bytes)
89 m_out_end equ 8
90 m_out_start equ 16
91 dist_mask_offset equ 24
92 hash_mask_offset equ 32
93 stack_size equ 5*8
94
95 %xdefine METHOD hash_hist
96
97 [bits 64]
98 default rel
99 section .text
100
101 ; void isal_deflate_icf_finish ( isal_zstream *stream )
102 ; arg 1: rcx: addr of stream
103 global isal_deflate_icf_finish_ %+ METHOD %+ _01
104 isal_deflate_icf_finish_ %+ METHOD %+ _01:
105 endbranch
106 PUSH_ALL rbx, rsi, rdi, rbp, r12, r13, r14, r15
107 sub rsp, stack_size
108
109 %ifidn __OUTPUT_FORMAT__, win64
110 mov stream, rcx
111 %else
112 mov stream, rdi
113 %endif
114
115 ; state->bitbuf.set_buf(stream->next_out, stream->avail_out);
116 mov tmp2 %+ d, dword [stream + _internal_state_dist_mask]
117 mov tmp3 %+ d, dword [stream + _internal_state_hash_mask]
118 mov level_buf, [stream + _level_buf]
119 mov m_out_buf, [level_buf + _icf_buf_next]
120 mov [rsp + m_out_start], m_out_buf
121 mov tmp1, [level_buf + _icf_buf_avail_out]
122 add tmp1, m_out_buf
123 sub tmp1, 4
124
125 mov [rsp + dist_mask_offset], tmp2
126 mov [rsp + hash_mask_offset], tmp3
127 mov [rsp + m_out_end], tmp1
128
129 mov hufftables, [stream + _hufftables]
130
131 mov file_start, [stream + _next_in]
132
133 mov f_i %+ d, dword [stream + _total_in]
134 sub file_start, f_i
135
136 mov f_end_i %+ d, dword [stream + _avail_in]
137 add f_end_i, f_i
138
139 sub f_end_i, LAST_BYTES_COUNT
140 mov [rsp + f_end_i_mem_offset], f_end_i
141 ; for (f_i = f_start_i; f_i < f_end_i; f_i++) {
142 cmp f_i, f_end_i
143 jge .end_loop_2
144
145 mov curr_data %+ d, [file_start + f_i]
146
147 cmp byte [stream + _internal_state_has_hist], IGZIP_NO_HIST
148 jne .skip_write_first_byte
149
150 cmp m_out_buf, [rsp + m_out_end]
151 ja .end_loop_2
152
153 mov hmask1 %+ d, [rsp + hash_mask_offset]
154 compute_hash hash, curr_data
155 and hash %+ d, hmask1 %+ d
156 mov [hash_table + 2 * hash], f_i %+ w
157 mov byte [stream + _internal_state_has_hist], IGZIP_HIST
158 jmp .encode_literal
159
160 .skip_write_first_byte:
161
162 .loop2:
163 mov tmp3 %+ d, [rsp + dist_mask_offset]
164 mov hmask1 %+ d, [rsp + hash_mask_offset]
165 ; if (state->bitbuf.is_full()) {
166 cmp m_out_buf, [rsp + m_out_end]
167 ja .end_loop_2
168
169 ; hash = compute_hash(state->file_start + f_i) & hash_mask;
170 mov curr_data %+ d, [file_start + f_i]
171 compute_hash hash, curr_data
172 and hash %+ d, hmask1 %+ d
173
174 ; f_index = state->head[hash];
175 movzx f_index %+ d, word [hash_table + 2 * hash]
176
177 ; state->head[hash] = (uint16_t) f_i;
178 mov [hash_table + 2 * hash], f_i %+ w
179
180 ; dist = f_i - f_index; // mod 64k
181 mov dist %+ d, f_i %+ d
182 sub dist %+ d, f_index %+ d
183 and dist %+ d, 0xFFFF
184
185 ; if ((dist-1) <= (D-1)) {
186 mov tmp1 %+ d, dist %+ d
187 sub tmp1 %+ d, 1
188 cmp tmp1 %+ d, tmp3 %+ d
189 jae .encode_literal
190
191 ; len = f_end_i - f_i;
192 mov tmp4, [rsp + f_end_i_mem_offset]
193 sub tmp4, f_i
194 add tmp4, LAST_BYTES_COUNT
195
196 ; if (len > 258) len = 258;
197 cmp tmp4, 258
198 cmovg tmp4, [c258]
199
200 ; len = compare(state->file_start + f_i,
201 ; state->file_start + f_i - dist, len);
202 lea tmp1, [file_start + f_i]
203 mov tmp2, tmp1
204 sub tmp2, dist
205 compare tmp4, tmp1, tmp2, len, tmp3
206
207 ; if (len >= SHORTEST_MATCH) {
208 cmp len, SHORTEST_MATCH
209 jb .encode_literal
210
211 ;; encode as dist/len
212
213 ; get_dist_code(dist, &code2, &code_len2);
214 dec dist
215 get_dist_icf_code dist, code2, tmp3 ;; clobbers dist, rcx
216
217 ;; get_len_code
218 lea code, [len + 254]
219
220 mov hmask2 %+ d, [rsp + hash_mask_offset]
221
222 or code2, code
223 inc dword [lit_len_hist + HIST_ELEM_SIZE*code]
224
225 ; for (k = f_i+1, f_i += len-1; k <= f_i; k++) {
226 lea tmp3, [f_i + 1] ; tmp3 <= k
227 add f_i, len
228 cmp f_i, [rsp + f_end_i_mem_offset]
229 jae .skip_hash_update
230
231 ; only update hash twice
232
233 ; hash = compute_hash(state->file_start + k) & hash_mask;
234 mov tmp6 %+ d, dword [file_start + tmp3]
235 compute_hash hash, tmp6
236 and hash %+ d, hmask2 %+ d
237 ; state->head[hash] = k;
238 mov [hash_table + 2 * hash], tmp3 %+ w
239
240 add tmp3, 1
241
242 ; hash = compute_hash(state->file_start + k) & hash_mask;
243 mov tmp6 %+ d, dword [file_start + tmp3]
244 compute_hash hash, tmp6
245 and hash %+ d, hmask2 %+ d
246 ; state->head[hash] = k;
247 mov [hash_table + 2 * hash], tmp3 %+ w
248
249 .skip_hash_update:
250 write_dword code2, m_out_buf
251 shr code2, DIST_OFFSET
252 and code2, 0x1F
253 inc dword [dist_hist + HIST_ELEM_SIZE*code2]
254 ; continue
255 cmp f_i, [rsp + f_end_i_mem_offset]
256 jl .loop2
257 jmp .end_loop_2
258
259 .encode_literal:
260 ; get_lit_code(state->file_start[f_i], &code2, &code_len2);
261 movzx tmp5, byte [file_start + f_i]
262 inc dword [lit_len_hist + HIST_ELEM_SIZE*tmp5]
263 or tmp5, LIT
264 write_dword tmp5, m_out_buf
265 ; continue
266 add f_i, 1
267 cmp f_i, [rsp + f_end_i_mem_offset]
268 jl .loop2
269
270 .end_loop_2:
271 mov f_end_i, [rsp + f_end_i_mem_offset]
272 add f_end_i, LAST_BYTES_COUNT
273 mov [rsp + f_end_i_mem_offset], f_end_i
274 ; if ((f_i >= f_end_i) && ! state->bitbuf.is_full()) {
275 cmp f_i, f_end_i
276 jge .input_end
277
278 xor tmp5, tmp5
279 .final_bytes:
280 cmp m_out_buf, [rsp + m_out_end]
281 ja .out_end
282
283 movzx tmp5, byte [file_start + f_i]
284 inc dword [lit_len_hist + HIST_ELEM_SIZE*tmp5]
285 or tmp5, LIT
286 write_dword tmp5, m_out_buf
287
288 inc f_i
289 cmp f_i, [rsp + f_end_i_mem_offset]
290 jl .final_bytes
291
292 .input_end:
293 cmp word [stream + _end_of_stream], 0
294 jne .out_end
295 cmp word [stream + _flush], _NO_FLUSH
296 jne .out_end
297 jmp .end
298
299 .out_end:
300 mov dword [stream + _internal_state_state], ZSTATE_CREATE_HDR
301 .end:
302 ;; Update input buffer
303 mov f_end_i, [rsp + f_end_i_mem_offset]
304 mov [stream + _total_in], f_i %+ d
305 mov [stream + _internal_state_block_end], f_i %+ d
306
307 add file_start, f_i
308 mov [stream + _next_in], file_start
309 sub f_end_i, f_i
310 mov [stream + _avail_in], f_end_i %+ d
311
312 ;; Update output buffer
313 mov [level_buf + _icf_buf_next], m_out_buf
314
315 ; len = state->bitbuf.buffer_used();
316 sub m_out_buf, [rsp + m_out_start]
317
318 ; stream->avail_out -= len;
319 sub [level_buf + _icf_buf_avail_out], m_out_buf
320
321 add rsp, stack_size
322 POP_ALL
323 ret
324
325 section .data
326 align 4
327 c258: dq 258