]> git.proxmox.com Git - ceph.git/blob - ceph/src/isa-l/igzip/igzip_finish.asm
69b92815b51d16148d025a8dc2c88d94373da690
[ceph.git] / ceph / src / isa-l / igzip / igzip_finish.asm
1 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2 ; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
3 ;
4 ; Redistribution and use in source and binary forms, with or without
5 ; modification, are permitted provided that the following conditions
6 ; are met:
7 ; * Redistributions of source code must retain the above copyright
8 ; notice, this list of conditions and the following disclaimer.
9 ; * Redistributions in binary form must reproduce the above copyright
10 ; notice, this list of conditions and the following disclaimer in
11 ; the documentation and/or other materials provided with the
12 ; distribution.
13 ; * Neither the name of Intel Corporation nor the names of its
14 ; contributors may be used to endorse or promote products derived
15 ; from this software without specific prior written permission.
16 ;
17 ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 ; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 ; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 ; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 ; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30 %include "options.asm"
31 %include "lz0a_const.asm"
32 %include "data_struct2.asm"
33 %include "bitbuf2.asm"
34 %include "huffman.asm"
35 %include "igzip_compare_types.asm"
36
37 %include "stdmac.asm"
38 %include "reg_sizes.asm"
39
40 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
41 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
42 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
43
44 %define tmp1 rax
45
46 %define f_index rbx
47 %define code rbx
48 %define tmp4 rbx
49 %define tmp5 rbx
50 %define tmp6 rbx
51
52 %define tmp2 rcx
53 %define hash rcx
54
55 %define tmp3 rdx
56
57 %define stream rsi
58
59 %define f_i rdi
60
61 %define code_len2 rbp
62
63 %define m_out_buf r8
64
65 %define m_bits r9
66
67 %define dist r10
68
69 %define m_bit_count r11
70
71 %define code2 r12
72
73 %define f_end_i r12
74
75 %define file_start r13
76
77 %define len r14
78
79 %define hufftables r15
80
81 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
82 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
83 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
84 f_end_i_mem_offset equ 0 ; local variable (8 bytes)
85 stack_size equ 8
86 ; void isal_deflate_finish ( isal_zstream *stream )
87 ; arg 1: rcx: addr of stream
88 global isal_deflate_finish_01
89 isal_deflate_finish_01:
90 PUSH_ALL rbx, rsi, rdi, rbp, r12, r13, r14, r15
91 sub rsp, stack_size
92
93 %ifidn __OUTPUT_FORMAT__, elf64
94 mov rcx, rdi
95 %endif
96
97 mov stream, rcx
98
99 ; state->bitbuf.set_buf(stream->next_out, stream->avail_out);
100 mov m_out_buf, [stream + _next_out]
101 mov [stream + _internal_state_bitbuf_m_out_start], m_out_buf
102 mov tmp1 %+ d, [stream + _avail_out]
103 add tmp1, m_out_buf
104 sub tmp1, SLOP
105 skip_SLOP:
106 mov [stream + _internal_state_bitbuf_m_out_end], tmp1
107
108 mov m_bits, [stream + _internal_state_bitbuf_m_bits]
109 mov m_bit_count %+ d, [stream + _internal_state_bitbuf_m_bit_count]
110
111 mov hufftables, [stream + _hufftables]
112
113 ; f_i = state->b_bytes_processed;
114 ; f_end_i = state->b_bytes_valid;
115 mov f_i %+ d, [stream + _internal_state_b_bytes_processed]
116 mov f_end_i %+ d, [stream + _internal_state_b_bytes_valid]
117
118 ; f_i += (uint32_t)(state->buffer - state->file_start);
119 ; f_end_i += (uint32_t)(state->buffer - state->file_start);
120 mov file_start, [stream + _internal_state_file_start]
121 lea tmp1, [stream + _internal_state_buffer]
122 sub tmp1, file_start
123 add f_i, tmp1
124 add f_end_i, tmp1
125 mov [rsp + f_end_i_mem_offset], f_end_i
126 ; for (f_i = f_start_i; f_i < f_end_i; f_i++) {
127 cmp f_i, f_end_i
128 jge end_loop_2
129
130 mov tmp1 %+ d, [file_start + f_i]
131
132 loop2:
133 ; if (state->bitbuf.is_full()) {
134 cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
135 ja end_loop_2
136
137 ; hash = compute_hash(state->file_start + f_i) & HASH_MASK;
138 compute_hash hash, tmp1
139 and hash %+ d, HASH_MASK
140
141 ; f_index = state->head[hash];
142 movzx f_index %+ d, word [stream + _internal_state_head + 2 * hash]
143
144 ; state->head[hash] = (uint16_t) f_i;
145 mov [stream + _internal_state_head + 2 * hash], f_i %+ w
146
147 ; dist = f_i - f_index; // mod 64k
148 mov dist %+ d, f_i %+ d
149 sub dist %+ d, f_index %+ d
150 and dist %+ d, 0xFFFF
151
152 ; if ((dist-1) <= (D-1)) {
153 mov tmp1 %+ d, dist %+ d
154 sub tmp1 %+ d, 1
155 cmp tmp1 %+ d, (D-1)
156 jae encode_literal
157
158 ; len = f_end_i - f_i;
159 mov tmp4, [rsp + f_end_i_mem_offset]
160 sub tmp4, f_i
161
162 ; if (len > 258) len = 258;
163 cmp tmp4, 258
164 cmovg tmp4, [c258]
165
166 ; len = compare(state->file_start + f_i,
167 ; state->file_start + f_i - dist, len);
168 lea tmp1, [file_start + f_i]
169 mov tmp2, tmp1
170 sub tmp2, dist
171 compare tmp4, tmp1, tmp2, len, tmp3
172
173 ; if (len >= SHORTEST_MATCH) {
174 cmp len, SHORTEST_MATCH
175 jb encode_literal
176
177 ;; encode as dist/len
178
179 ; get_dist_code(dist, &code2, &code_len2);
180 get_dist_code dist, code2, code_len2, hufftables ;; clobbers dist, rcx
181
182 ; get_len_code(len, &code, &code_len);
183 get_len_code len, code, rcx, hufftables ;; rcx is code_len
184
185 ; code2 <<= code_len
186 ; code2 |= code
187 ; code_len2 += code_len
188 %ifdef USE_HSWNI
189 shlx code2, code2, rcx
190 %else
191 shl code2, cl
192 %endif
193 or code2, code
194 add code_len2, rcx
195
196 ; for (k = f_i+1, f_i += len-1; k <= f_i; k++) {
197 lea tmp3, [f_i + 1] ; tmp3 <= k
198 add f_i, len
199 %ifdef LIMIT_HASH_UPDATE
200 ; only update hash twice
201
202 ; hash = compute_hash(state->file_start + k) & HASH_MASK;
203 mov tmp6 %+ d, [file_start + tmp3]
204 compute_hash hash, tmp6
205 and hash %+ d, HASH_MASK
206 ; state->head[hash] = k;
207 mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
208
209 add tmp3, 1
210
211 ; hash = compute_hash(state->file_start + k) & HASH_MASK;
212 mov tmp6 %+ d, [file_start + tmp3]
213 compute_hash hash, tmp6
214 and hash %+ d, HASH_MASK
215 ; state->head[hash] = k;
216 mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
217
218 %else
219 loop3:
220 ; hash = compute_hash(state->file_start + k) & HASH_MASK;
221 mov tmp6 %+ d, [file_start + tmp3]
222 compute_hash hash, tmp6
223 and hash %+ d, HASH_MASK
224 ; state->head[hash] = k;
225 mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
226 inc tmp3
227 cmp tmp3, f_i
228 jl loop3
229 %endif
230
231 mov tmp1 %+ d, [file_start + f_i]
232
233 write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp5
234
235 ; continue
236 cmp f_i, [rsp + f_end_i_mem_offset]
237 jl loop2
238 jmp end_loop_2
239
240 encode_literal:
241 mov tmp1 %+ d, [file_start + f_i + 1]
242
243 ; get_lit_code(state->file_start[f_i], &code2, &code_len2);
244 movzx tmp5, byte [file_start + f_i]
245 get_lit_code tmp5, code2, code_len2, hufftables
246
247 write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp5
248
249 ; continue
250 add f_i, 1
251 cmp f_i, [rsp + f_end_i_mem_offset]
252 jl loop2
253
254 end_loop_2:
255
256 ; if ((f_i >= f_end_i) && ! state->bitbuf.is_full()) {
257 cmp f_i, [rsp + f_end_i_mem_offset]
258 jl not_end
259 cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
260 ja not_end
261
262 cmp dword [stream + _end_of_stream], 1
263 jne cont
264 cmp dword [stream + _internal_state_left_over], 0
265 jg not_end
266
267 cont:
268 ; get_lit_code(256, &code2, &code_len2);
269 get_lit_code 256, code2, code_len2, hufftables
270
271 write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp1
272
273 mov dword [stream + _internal_state_has_eob], 1
274 cmp dword [stream + _end_of_stream], 1
275 jne sync_flush
276 ; state->state = ZSTATE_TRL;
277 mov dword [stream + _internal_state_state], ZSTATE_TRL
278 jmp not_end
279
280 sync_flush:
281 ; state->state = ZSTATE_SYNC_FLUSH;
282 mov dword [stream + _internal_state_state], ZSTATE_SYNC_FLUSH
283 ; }
284 not_end:
285
286 ; state->b_bytes_processed = f_i - (state->buffer - state->file_start);
287 add f_i, [stream + _internal_state_file_start]
288 sub f_i, stream
289 sub f_i, _internal_state_buffer
290 mov [stream + _internal_state_b_bytes_processed], f_i %+ d
291
292 ; // update output buffer
293 ; stream->next_out = state->bitbuf.buffer_ptr();
294 mov [stream + _next_out], m_out_buf
295 ; len = state->bitbuf.buffer_used();
296 sub m_out_buf, [stream + _internal_state_bitbuf_m_out_start]
297
298 ; stream->avail_out -= len;
299 sub [stream + _avail_out], m_out_buf %+ d
300 ; stream->total_out += len;
301 add [stream + _total_out], m_out_buf %+ d
302
303 mov [stream + _internal_state_bitbuf_m_bits], m_bits
304 mov [stream + _internal_state_bitbuf_m_bit_count], m_bit_count %+ d
305 add rsp, stack_size
306 POP_ALL
307 ret
308
309 section .data
310 align 4
311 c258: dq 258