]>
Commit | Line | Data |
---|---|---|
f67539c2 TL |
1 | /********************************************************************** |
2 | Copyright(c) 2019 Arm Corporation All rights reserved. | |
3 | ||
4 | Redistribution and use in source and binary forms, with or without | |
5 | modification, are permitted provided that the following conditions | |
6 | are met: | |
7 | * Redistributions of source code must retain the above copyright | |
8 | notice, this list of conditions and the following disclaimer. | |
9 | * Redistributions in binary form must reproduce the above copyright | |
10 | notice, this list of conditions and the following disclaimer in | |
11 | the documentation and/or other materials provided with the | |
12 | distribution. | |
13 | * Neither the name of Arm Corporation nor the names of its | |
14 | contributors may be used to endorse or promote products derived | |
15 | from this software without specific prior written permission. | |
16 | ||
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
28 | **********************************************************************/ | |
29 | .arch armv8-a+crc | |
30 | .text | |
31 | ||
32 | #include "lz0a_const_aarch64.h" | |
33 | #include "data_struct_aarch64.h" | |
34 | #include "huffman_aarch64.h" | |
35 | #include "bitbuf2_aarch64.h" | |
36 | #include "stdmac_aarch64.h" | |
37 | ||
38 | /* | |
39 | declare Macros | |
40 | */ | |
41 | .macro declare_generic_reg name:req,reg:req,default:req | |
42 | \name .req \default\reg | |
43 | w_\name .req w\reg | |
44 | x_\name .req x\reg | |
45 | .endm | |
46 | ||
47 | /* | |
48 | void isal_deflate_icf_finish_hash_hist_aarch64(struct isal_zstream *stream); | |
49 | */ | |
50 | ||
51 | /* constant */ | |
52 | ||
53 | /* offset of struct isal_zstream */ | |
54 | .equ offset_next_in, 0 | |
55 | .equ offset_avail_in, 8 | |
56 | .equ offset_total_in, 12 | |
57 | .equ offset_next_out, 16 | |
58 | .equ offset_avail_out, 24 | |
59 | .equ offset_total_out, 28 | |
60 | .equ offset_hufftables, 32 | |
61 | .equ offset_level, 40 | |
62 | .equ offset_level_buf_size, 44 | |
63 | .equ offset_level_buf, 48 | |
64 | .equ offset_end_of_stream, 56 | |
65 | .equ offset_flush, 58 | |
66 | .equ offset_gzip_flag, 60 | |
67 | .equ offset_hist_bits, 62 | |
68 | .equ offset_state, 64 | |
69 | .equ offset_state_block_end, 72 | |
70 | .equ offset_state_state, 84 | |
71 | .equ offset_state_has_hist, 135 | |
72 | ||
73 | /* offset of struct level_buf */ | |
74 | .equ offset_encode_tables, 0 | |
75 | .equ offset_hist, 2176 | |
76 | .equ offset_hist_d_hist, 2176 | |
77 | .equ offset_hist_ll_hist, 2296 | |
78 | .equ offset_deflate_hdr_count, 4348 | |
79 | .equ offset_deflate_hdr_extra_bits, 4352 | |
80 | .equ offset_deflate_hdr, 4356 | |
81 | .equ offset_icf_buf_next, 4688 | |
82 | .equ offset_icf_buf_avail_out, 4696 | |
83 | .equ offset_icf_buf_start, 4704 | |
84 | .equ offset_hash8k, 4712 | |
85 | .equ offset_hash_hist, 4712 | |
86 | ||
87 | /* offset of struct isal_zstate */ | |
88 | .equ offset_dist_mask, 12 | |
89 | .equ offset_hash_mask, 16 | |
90 | .equ offset_state_of_zstate, 20 | |
91 | ||
92 | /* macros*/ | |
93 | .equ ISAL_LOOK_AHEAD, 288 | |
94 | ||
95 | /* arguments */ | |
96 | declare_generic_reg stream, 0,x | |
97 | ||
98 | declare_generic_reg param0, 0,x | |
99 | declare_generic_reg param1, 1,x | |
100 | declare_generic_reg param2, 2,x | |
101 | declare_generic_reg param3, 3,x | |
102 | declare_generic_reg param4, 4,x | |
103 | declare_generic_reg param5, 5,x | |
104 | declare_generic_reg param6, 6,x | |
105 | ||
106 | /* local variable */ | |
107 | declare_generic_reg stream_saved, 15,x | |
108 | declare_generic_reg level_buf, 13,x | |
109 | declare_generic_reg start_in, 21,x | |
110 | declare_generic_reg start_out, 22,x | |
111 | declare_generic_reg state, 23,x | |
112 | declare_generic_reg end_out, 12,x | |
113 | declare_generic_reg end_in, 11,x | |
114 | declare_generic_reg next_in, 8,x | |
115 | declare_generic_reg next_out, 10,x | |
116 | declare_generic_reg next_out_iter, 5,x | |
117 | declare_generic_reg file_start, 18,x | |
118 | declare_generic_reg last_seen, 14,x | |
119 | ||
120 | declare_generic_reg literal_code, 9,w | |
121 | declare_generic_reg hash_mask, 19,w | |
122 | declare_generic_reg hist_size, 20,w | |
123 | declare_generic_reg dist, 7,w | |
124 | declare_generic_reg dist_inc, 24,w | |
125 | ||
126 | declare_generic_reg tmp0, 25,x | |
127 | declare_generic_reg tmp1, 26,x | |
128 | declare_generic_reg tmp2, 27,x | |
129 | declare_generic_reg tmp3, 28,x | |
130 | ||
131 | .align 2 | |
132 | .type write_deflate_icf_constprop, %function | |
133 | write_deflate_icf_constprop: | |
134 | ldrh w2, [x0] | |
135 | mov w3, 30 | |
136 | bfi w2, w1, 0, 10 | |
137 | strh w2, [x0] | |
138 | ldr w1, [x0] | |
139 | bfi w1, w3, 10, 9 | |
140 | str w1, [x0] | |
141 | ubfx x1, x1, 16, 3 | |
142 | strh w1, [x0, 2] | |
143 | ret | |
144 | .size write_deflate_icf_constprop, .-write_deflate_icf_constprop | |
145 | ||
146 | .align 2 | |
147 | .type write_deflate_icf, %function | |
148 | write_deflate_icf: | |
149 | ldrh w4, [x0] | |
150 | bfi w4, w1, 0, 10 | |
151 | strh w4, [x0] | |
152 | ldr w1, [x0] | |
153 | bfi w1, w2, 10, 9 | |
154 | str w1, [x0] | |
155 | lsr w1, w1, 16 | |
156 | bfi w1, w3, 3, 13 | |
157 | strh w1, [x0, 2] | |
158 | ret | |
159 | .size write_deflate_icf, .-write_deflate_icf | |
160 | ||
161 | .align 2 | |
162 | .type update_state, %function | |
163 | update_state: | |
164 | sub x7, x2, x1 | |
165 | ldr x4, [x0, 48] | |
166 | cmp x7, 0 | |
167 | ble .L48 | |
168 | mov w1, 1 | |
169 | strb w1, [x0, 135] | |
170 | .L48: | |
171 | ldr w1, [x0, 12] | |
172 | sub x6, x6, x5 | |
173 | str x2, [x0] | |
174 | sub x3, x3, x2 | |
175 | add w1, w1, w7 | |
176 | stp w3, w1, [x0, 8] | |
177 | str w1, [x0, 72] | |
178 | asr x6, x6, 2 | |
179 | str x5, [x4, 4688] | |
180 | str x6, [x4, 4696] | |
181 | ret | |
182 | .size update_state, .-update_state | |
183 | ||
184 | .align 2 | |
185 | .global isal_deflate_icf_finish_hash_hist_aarch64 | |
186 | .type isal_deflate_icf_finish_hash_hist_aarch64, %function | |
187 | isal_deflate_icf_finish_hash_hist_aarch64: | |
188 | ldr w_end_in, [stream, 8] // stream->avail_in | |
189 | cbz w_end_in, .stream_not_available | |
190 | ||
191 | stp x29, x30, [sp, -96]! | |
192 | add x29, sp, 0 | |
193 | stp x19, x20, [sp, 16] | |
194 | stp x21, x22, [sp, 32] | |
195 | stp x23, x24, [sp, 48] | |
196 | stp x25, x26, [sp, 64] | |
197 | stp x27, x28, [sp, 80] | |
198 | ||
199 | mov stream_saved, stream | |
200 | ldr level_buf, [stream, offset_level_buf] // 48 | |
201 | ldr start_in, [stream, offset_next_in] // 0 | |
202 | ldr start_out, [level_buf, offset_icf_buf_next] // 4688 | |
203 | add state, stream, offset_state // 64 | |
204 | ldr end_out, [level_buf, offset_icf_buf_avail_out] // 4696 | |
205 | mov next_in, start_in | |
206 | ldr w_file_start, [stream, offset_total_in] // 12 | |
207 | mov tmp0, offset_hash_hist // 4712 | |
208 | add last_seen, level_buf, tmp0 | |
209 | add end_in, start_in, end_in, uxtw | |
210 | and end_out, end_out, -4 | |
211 | mov next_out, start_out | |
212 | ldp hist_size, hash_mask, [state, offset_dist_mask] // 12 | |
213 | sub file_start, start_in, file_start | |
214 | add end_out, start_out, end_out | |
215 | mov next_out_iter, next_out | |
216 | ||
217 | add x0, next_in, 3 | |
218 | cmp end_in, x0 // x0 <= next_in + 3 | |
219 | bls .while_first_end | |
220 | ||
221 | .p2align 3 | |
222 | .while_first: | |
223 | cmp next_out, end_out | |
224 | bcs .save_and_update_state | |
225 | ldr literal_code, [next_in] | |
226 | mov w0, literal_code | |
227 | crc32cw w0, wzr, w0 | |
228 | and w0, w0, hash_mask | |
229 | sub x2, next_in, file_start | |
230 | lsl x0, x0, 1 | |
231 | ldrh dist, [last_seen, x0] | |
232 | strh w2, [last_seen, x0] | |
233 | sub w2, w2, dist | |
234 | and dist, w2, 65535 | |
235 | sub dist_inc, dist, #1 | |
236 | cmp dist_inc, hist_size | |
237 | bcs .skip_compare258 | |
238 | ||
239 | mov x2, 0 | |
240 | sub w2, w_end_in, w8 | |
241 | mov x1, next_in | |
242 | sub x0, next_in, x7, uxth | |
243 | ||
244 | compare_max_258_bytes param0,param1,param2,tmp2,tmp0,tmp1 | |
245 | mov w0, w_tmp2 | |
246 | and w2, w0, 65535 | |
247 | ||
248 | cmp w2, 3 | |
249 | bhi .while_first_match_length | |
250 | ||
251 | .skip_compare258: | |
252 | and literal_code, literal_code, 255 // get_lit_icf_code | |
253 | add next_in, next_in, 1 | |
254 | mov w1, literal_code | |
255 | mov x0, next_out | |
256 | add x_literal_code, level_buf, x_literal_code, uxtb 2 // level_buf->hist.ll_hist | |
257 | ||
258 | ldr w_tmp0, [x_literal_code, offset_hist_ll_hist] // 2296 | |
259 | add w_tmp0, w_tmp0, 1 | |
260 | str w_tmp0, [x_literal_code, offset_hist_ll_hist] // 2296 | |
261 | ||
262 | bl write_deflate_icf_constprop // write_deflate_icf | |
263 | ||
264 | add next_out, next_out, 4 | |
265 | .while_first_check: | |
266 | add x0, next_in, 3 | |
267 | mov next_out_iter, next_out | |
268 | cmp end_in, x0 | |
269 | bhi .while_first | |
270 | ||
271 | .while_first_end: | |
272 | cmp next_in, end_in | |
273 | bcs .while_2nd_end | |
274 | ||
275 | cmp next_out, end_out | |
276 | bcc .while_2nd_handle | |
277 | b .save_and_update_state_2nd | |
278 | ||
279 | .p2align 2 | |
280 | .while_2nd: | |
281 | cmp end_out, next_out_iter | |
282 | bls .save_and_update_state_2nd | |
283 | ||
284 | .while_2nd_handle: | |
285 | ldrb w2, [next_in], 1 | |
286 | mov x0, next_out_iter | |
287 | add next_out_iter, next_out_iter, 4 | |
288 | mov w1, w2 | |
289 | add x2, level_buf, x2, uxtb 2 | |
290 | ||
291 | ldr w_tmp0, [x2, offset_hist_ll_hist] // 2296 | |
292 | add w_tmp0, w_tmp0, 1 | |
293 | str w_tmp0, [x2, offset_hist_ll_hist] // 2296 | |
294 | ||
295 | bl write_deflate_icf_constprop | |
296 | cmp end_in, next_in | |
297 | bne .while_2nd | |
298 | ||
299 | mov next_in, end_in | |
300 | b .end_of_stream_check_and_exit | |
301 | ||
302 | .p2align 2 | |
303 | .while_first_match_length: | |
304 | and w0, w0, 65535 | |
305 | mov w3, 0 | |
306 | add w1, w0, 254 // get_len_icf_code | |
307 | cmp dist, 2 | |
308 | bhi .compute_dist_icf_code | |
309 | ||
310 | .while_first_match_length_end: | |
311 | ubfiz x_tmp2, x1, 2, 17 | |
312 | add x_tmp1, level_buf, x24, uxtw 2 | |
313 | add x_tmp2, level_buf, x_tmp2 | |
314 | ||
315 | add next_in, next_in, x2, uxth | |
316 | mov w2, dist_inc | |
317 | ||
318 | ldr w_tmp0, [x_tmp2, offset_hist_ll_hist] // 2296 | |
319 | add w_tmp0, w_tmp0, 1 | |
320 | str w_tmp0, [x_tmp2, offset_hist_ll_hist] // 2296 | |
321 | ||
322 | mov x0, next_out | |
323 | ldr w_tmp0, [x_tmp1, offset_hist_d_hist] // 2176 | |
324 | add w_tmp0, w_tmp0, 1 | |
325 | str w_tmp0, [x_tmp1, offset_hist_d_hist] // 2176 | |
326 | ||
327 | bl write_deflate_icf | |
328 | add next_out, next_out, 4 | |
329 | b .while_first_check | |
330 | ||
331 | // compute_dist_icf_code | |
332 | .p2align 2 | |
333 | .compute_dist_icf_code: | |
334 | clz w3, dist_inc | |
335 | mov w0, 30 | |
336 | sub w0, w0, w3 | |
337 | ||
338 | mov w3, 1 | |
339 | lsl w3, w3, w0 | |
340 | sub w3, w3, #1 | |
341 | and w3, w3, dist_inc | |
342 | lsl w4, w0, 1 | |
343 | lsr dist_inc, dist_inc, w0 | |
344 | add dist_inc, dist_inc, w4 | |
345 | b .while_first_match_length_end | |
346 | ||
347 | .while_2nd_end: | |
348 | beq .end_of_stream_check_and_exit | |
349 | mov param6, end_out | |
350 | b .update_state | |
351 | ||
352 | .end_of_stream_check_and_exit: | |
353 | ldr w_tmp0, [stream_saved, offset_end_of_stream] // 56 | |
354 | cbz w_tmp0, .update_state_2nd | |
355 | b .save_and_update_state_2nd | |
356 | ||
357 | .p2align 3 | |
358 | .save_and_update_state_2nd: | |
359 | mov w_tmp0, 2 | |
360 | str w_tmp0, [state, offset_state_of_zstate] // 20 | |
361 | .update_state_2nd: | |
362 | mov param6, end_out | |
363 | b .update_state | |
364 | ||
365 | .p2align 2 | |
366 | .save_and_update_state: | |
367 | mov param6, end_out | |
368 | mov param5, next_out | |
369 | mov w_tmp0, 2 | |
370 | str w_tmp0, [state, offset_state_of_zstate] // 20 | |
371 | .update_state: | |
372 | mov param4, start_out | |
373 | mov param1, start_in | |
374 | mov param3, end_in | |
375 | mov param2, next_in | |
376 | mov param0, stream_saved | |
377 | ||
378 | ldp x19, x20, [sp, 16] | |
379 | ldp x21, x22, [sp, 32] | |
380 | ldp x23, x24, [sp, 48] | |
381 | ldp x25, x26, [sp, 64] | |
382 | ldp x27, x28, [sp, 80] | |
383 | ldp x29, x30, [sp], 96 | |
384 | ||
385 | b update_state | |
386 | ||
387 | .p2align 2 | |
388 | .stream_not_available: | |
389 | ldr w1, [stream, offset_end_of_stream] // 56 | |
390 | cbz w1, .done | |
391 | ||
392 | mov w1, 2 | |
393 | str w1, [stream, offset_state_state] // 84 | |
394 | .done: | |
395 | ret | |
396 | ||
397 | .size isal_deflate_icf_finish_hash_hist_aarch64, .-isal_deflate_icf_finish_hash_hist_aarch64 |