]> git.proxmox.com Git - ceph.git/blame - ceph/src/isa-l/igzip/igzip_finish.asm
update sources to v12.1.1
[ceph.git] / ceph / src / isa-l / igzip / igzip_finish.asm
CommitLineData
7c673cae
FG
1;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
3;
4; Redistribution and use in source and binary forms, with or without
5; modification, are permitted provided that the following conditions
6; are met:
7; * Redistributions of source code must retain the above copyright
8; notice, this list of conditions and the following disclaimer.
9; * Redistributions in binary form must reproduce the above copyright
10; notice, this list of conditions and the following disclaimer in
11; the documentation and/or other materials provided with the
12; distribution.
13; * Neither the name of Intel Corporation nor the names of its
14; contributors may be used to endorse or promote products derived
15; from this software without specific prior written permission.
16;
17; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30%include "options.asm"
31%include "lz0a_const.asm"
32%include "data_struct2.asm"
33%include "bitbuf2.asm"
34%include "huffman.asm"
35%include "igzip_compare_types.asm"
36
37%include "stdmac.asm"
38%include "reg_sizes.asm"
39
40;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
41;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
42;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
43
224ce89b 44%define curr_data rax
7c673cae
FG
45%define tmp1 rax
46
47%define f_index rbx
48%define code rbx
49%define tmp4 rbx
50%define tmp5 rbx
51%define tmp6 rbx
52
53%define tmp2 rcx
54%define hash rcx
55
56%define tmp3 rdx
57
58%define stream rsi
59
60%define f_i rdi
61
62%define code_len2 rbp
63
64%define m_out_buf r8
65
66%define m_bits r9
67
68%define dist r10
69
70%define m_bit_count r11
71
72%define code2 r12
7c673cae
FG
73%define f_end_i r12
74
75%define file_start r13
76
77%define len r14
78
79%define hufftables r15
80
81;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
82;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
83;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
84f_end_i_mem_offset equ 0 ; local variable (8 bytes)
85stack_size equ 8
86; void isal_deflate_finish ( isal_zstream *stream )
87; arg 1: rcx: addr of stream
88global isal_deflate_finish_01
89isal_deflate_finish_01:
90 PUSH_ALL rbx, rsi, rdi, rbp, r12, r13, r14, r15
91 sub rsp, stack_size
92
93%ifidn __OUTPUT_FORMAT__, elf64
94 mov rcx, rdi
95%endif
96
97 mov stream, rcx
98
99 ; state->bitbuf.set_buf(stream->next_out, stream->avail_out);
100 mov m_out_buf, [stream + _next_out]
101 mov [stream + _internal_state_bitbuf_m_out_start], m_out_buf
102 mov tmp1 %+ d, [stream + _avail_out]
103 add tmp1, m_out_buf
104 sub tmp1, SLOP
105skip_SLOP:
106 mov [stream + _internal_state_bitbuf_m_out_end], tmp1
107
108 mov m_bits, [stream + _internal_state_bitbuf_m_bits]
109 mov m_bit_count %+ d, [stream + _internal_state_bitbuf_m_bit_count]
110
111 mov hufftables, [stream + _hufftables]
112
224ce89b
WB
113 mov file_start, [stream + _next_in]
114
115 mov f_i %+ d, dword [stream + _total_in]
116 sub file_start, f_i
117
118 mov f_end_i %+ d, dword [stream + _avail_in]
119 add f_end_i, f_i
120
121 sub f_end_i, LAST_BYTES_COUNT
7c673cae
FG
122 mov [rsp + f_end_i_mem_offset], f_end_i
123 ; for (f_i = f_start_i; f_i < f_end_i; f_i++) {
124 cmp f_i, f_end_i
125 jge end_loop_2
126
224ce89b
WB
127 mov curr_data %+ d, [file_start + f_i]
128
129 cmp dword [stream + _internal_state_has_hist], 0
130 jne skip_write_first_byte
131
132 cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
133 ja end_loop_2
134
135 compute_hash hash, curr_data
136 and hash %+ d, HASH_MASK
137 mov [stream + _internal_state_head + 2 * hash], f_i %+ w
138 mov dword [stream + _internal_state_has_hist], 1
139 jmp encode_literal
140
141skip_write_first_byte:
7c673cae
FG
142
143loop2:
144 ; if (state->bitbuf.is_full()) {
145 cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
146 ja end_loop_2
147
148 ; hash = compute_hash(state->file_start + f_i) & HASH_MASK;
224ce89b
WB
149 mov curr_data %+ d, [file_start + f_i]
150 compute_hash hash, curr_data
7c673cae
FG
151 and hash %+ d, HASH_MASK
152
153 ; f_index = state->head[hash];
154 movzx f_index %+ d, word [stream + _internal_state_head + 2 * hash]
155
156 ; state->head[hash] = (uint16_t) f_i;
157 mov [stream + _internal_state_head + 2 * hash], f_i %+ w
158
159 ; dist = f_i - f_index; // mod 64k
160 mov dist %+ d, f_i %+ d
161 sub dist %+ d, f_index %+ d
162 and dist %+ d, 0xFFFF
163
164 ; if ((dist-1) <= (D-1)) {
165 mov tmp1 %+ d, dist %+ d
166 sub tmp1 %+ d, 1
167 cmp tmp1 %+ d, (D-1)
168 jae encode_literal
169
170 ; len = f_end_i - f_i;
171 mov tmp4, [rsp + f_end_i_mem_offset]
172 sub tmp4, f_i
224ce89b 173 add tmp4, LAST_BYTES_COUNT
7c673cae
FG
174
175 ; if (len > 258) len = 258;
176 cmp tmp4, 258
177 cmovg tmp4, [c258]
178
179 ; len = compare(state->file_start + f_i,
180 ; state->file_start + f_i - dist, len);
181 lea tmp1, [file_start + f_i]
182 mov tmp2, tmp1
183 sub tmp2, dist
184 compare tmp4, tmp1, tmp2, len, tmp3
185
186 ; if (len >= SHORTEST_MATCH) {
187 cmp len, SHORTEST_MATCH
188 jb encode_literal
189
190 ;; encode as dist/len
191
192 ; get_dist_code(dist, &code2, &code_len2);
224ce89b 193 dec dist
7c673cae
FG
194 get_dist_code dist, code2, code_len2, hufftables ;; clobbers dist, rcx
195
196 ; get_len_code(len, &code, &code_len);
197 get_len_code len, code, rcx, hufftables ;; rcx is code_len
198
199 ; code2 <<= code_len
200 ; code2 |= code
201 ; code_len2 += code_len
224ce89b 202 SHLX code2, code2, rcx
7c673cae
FG
203 or code2, code
204 add code_len2, rcx
205
206 ; for (k = f_i+1, f_i += len-1; k <= f_i; k++) {
207 lea tmp3, [f_i + 1] ; tmp3 <= k
208 add f_i, len
224ce89b
WB
209 cmp f_i, [rsp + f_end_i_mem_offset]
210 jae skip_hash_update
211
7c673cae
FG
212 ; only update hash twice
213
214 ; hash = compute_hash(state->file_start + k) & HASH_MASK;
224ce89b 215 mov tmp6 %+ d, dword [file_start + tmp3]
7c673cae
FG
216 compute_hash hash, tmp6
217 and hash %+ d, HASH_MASK
218 ; state->head[hash] = k;
219 mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
220
221 add tmp3, 1
222
223 ; hash = compute_hash(state->file_start + k) & HASH_MASK;
224ce89b 224 mov tmp6 %+ d, dword [file_start + tmp3]
7c673cae
FG
225 compute_hash hash, tmp6
226 and hash %+ d, HASH_MASK
227 ; state->head[hash] = k;
228 mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
229
224ce89b 230skip_hash_update:
7c673cae
FG
231 write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp5
232
233 ; continue
234 cmp f_i, [rsp + f_end_i_mem_offset]
235 jl loop2
236 jmp end_loop_2
237
238encode_literal:
7c673cae
FG
239 ; get_lit_code(state->file_start[f_i], &code2, &code_len2);
240 movzx tmp5, byte [file_start + f_i]
241 get_lit_code tmp5, code2, code_len2, hufftables
242
243 write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp5
244
245 ; continue
246 add f_i, 1
247 cmp f_i, [rsp + f_end_i_mem_offset]
248 jl loop2
249
250end_loop_2:
224ce89b
WB
251 mov f_end_i, [rsp + f_end_i_mem_offset]
252 add f_end_i, LAST_BYTES_COUNT
253 mov [rsp + f_end_i_mem_offset], f_end_i
7c673cae 254 ; if ((f_i >= f_end_i) && ! state->bitbuf.is_full()) {
224ce89b
WB
255 cmp f_i, f_end_i
256 jge write_eob
257
258 xor tmp5, tmp5
259final_bytes:
7c673cae
FG
260 cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
261 ja not_end
224ce89b
WB
262 movzx tmp5, byte [file_start + f_i]
263 get_lit_code tmp5, code2, code_len2, hufftables
264 write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp3
7c673cae 265
224ce89b
WB
266 inc f_i
267 cmp f_i, [rsp + f_end_i_mem_offset]
268 jl final_bytes
269
270write_eob:
271 cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
272 ja not_end
7c673cae 273
7c673cae
FG
274 ; get_lit_code(256, &code2, &code_len2);
275 get_lit_code 256, code2, code_len2, hufftables
276
277 write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp1
278
279 mov dword [stream + _internal_state_has_eob], 1
280 cmp dword [stream + _end_of_stream], 1
281 jne sync_flush
282 ; state->state = ZSTATE_TRL;
283 mov dword [stream + _internal_state_state], ZSTATE_TRL
284 jmp not_end
285
286sync_flush:
287 ; state->state = ZSTATE_SYNC_FLUSH;
288 mov dword [stream + _internal_state_state], ZSTATE_SYNC_FLUSH
289 ; }
290not_end:
291
7c673cae 292
224ce89b
WB
293 ;; Update input buffer
294 mov f_end_i, [rsp + f_end_i_mem_offset]
295 mov [stream + _total_in], f_i %+ d
296 add file_start, f_i
297 mov [stream + _next_in], file_start
298 sub f_end_i, f_i
299 mov [stream + _avail_in], f_end_i %+ d
300
301 ;; Update output buffer
7c673cae
FG
302 mov [stream + _next_out], m_out_buf
303 ; len = state->bitbuf.buffer_used();
304 sub m_out_buf, [stream + _internal_state_bitbuf_m_out_start]
305
306 ; stream->avail_out -= len;
307 sub [stream + _avail_out], m_out_buf %+ d
308 ; stream->total_out += len;
309 add [stream + _total_out], m_out_buf %+ d
310
311 mov [stream + _internal_state_bitbuf_m_bits], m_bits
312 mov [stream + _internal_state_bitbuf_m_bit_count], m_bit_count %+ d
313 add rsp, stack_size
314 POP_ALL
315 ret
316
317section .data
318 align 4
319c258: dq 258