]> git.proxmox.com Git - ceph.git/blame - ceph/src/spdk/isa-l/igzip/aarch64/isal_deflate_icf_finish_hash_hist.S
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / spdk / isa-l / igzip / aarch64 / isal_deflate_icf_finish_hash_hist.S
CommitLineData
f67539c2
TL
1/**********************************************************************
2 Copyright(c) 2019 Arm Corporation All rights reserved.
3
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7 * Redistributions of source code must retain the above copyright
8 notice, this list of conditions and the following disclaimer.
9 * Redistributions in binary form must reproduce the above copyright
10 notice, this list of conditions and the following disclaimer in
11 the documentation and/or other materials provided with the
12 distribution.
13 * Neither the name of Arm Corporation nor the names of its
14 contributors may be used to endorse or promote products derived
15 from this software without specific prior written permission.
16
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28**********************************************************************/
29 .arch armv8-a+crc
30 .text
31
32#include "lz0a_const_aarch64.h"
33#include "data_struct_aarch64.h"
34#include "huffman_aarch64.h"
35#include "bitbuf2_aarch64.h"
36#include "stdmac_aarch64.h"
37
38/*
39declare Macros
40*/
41.macro declare_generic_reg name:req,reg:req,default:req
42 \name .req \default\reg
43 w_\name .req w\reg
44 x_\name .req x\reg
45.endm
46
47/*
48void isal_deflate_icf_finish_hash_hist_aarch64(struct isal_zstream *stream);
49*/
50
51/* constant */
52
53/* offset of struct isal_zstream */
54.equ offset_next_in, 0
55.equ offset_avail_in, 8
56.equ offset_total_in, 12
57.equ offset_next_out, 16
58.equ offset_avail_out, 24
59.equ offset_total_out, 28
60.equ offset_hufftables, 32
61.equ offset_level, 40
62.equ offset_level_buf_size, 44
63.equ offset_level_buf, 48
64.equ offset_end_of_stream, 56
65.equ offset_flush, 58
66.equ offset_gzip_flag, 60
67.equ offset_hist_bits, 62
68.equ offset_state, 64
69.equ offset_state_block_end, 72
70.equ offset_state_state, 84
71.equ offset_state_has_hist, 135
72
73/* offset of struct level_buf */
74.equ offset_encode_tables, 0
75.equ offset_hist, 2176
76.equ offset_hist_d_hist, 2176
77.equ offset_hist_ll_hist, 2296
78.equ offset_deflate_hdr_count, 4348
79.equ offset_deflate_hdr_extra_bits, 4352
80.equ offset_deflate_hdr, 4356
81.equ offset_icf_buf_next, 4688
82.equ offset_icf_buf_avail_out, 4696
83.equ offset_icf_buf_start, 4704
84.equ offset_hash8k, 4712
85.equ offset_hash_hist, 4712
86
87/* offset of struct isal_zstate */
88.equ offset_dist_mask, 12
89.equ offset_hash_mask, 16
90.equ offset_state_of_zstate, 20
91
92/* macros*/
93.equ ISAL_LOOK_AHEAD, 288
94
95 /* arguments */
96 declare_generic_reg stream, 0,x
97
98 declare_generic_reg param0, 0,x
99 declare_generic_reg param1, 1,x
100 declare_generic_reg param2, 2,x
101 declare_generic_reg param3, 3,x
102 declare_generic_reg param4, 4,x
103 declare_generic_reg param5, 5,x
104 declare_generic_reg param6, 6,x
105
106 /* local variable */
107 declare_generic_reg stream_saved, 15,x
108 declare_generic_reg level_buf, 13,x
109 declare_generic_reg start_in, 21,x
110 declare_generic_reg start_out, 22,x
111 declare_generic_reg state, 23,x
112 declare_generic_reg end_out, 12,x
113 declare_generic_reg end_in, 11,x
114 declare_generic_reg next_in, 8,x
115 declare_generic_reg next_out, 10,x
116 declare_generic_reg next_out_iter, 5,x
117 declare_generic_reg file_start, 18,x
118 declare_generic_reg last_seen, 14,x
119
120 declare_generic_reg literal_code, 9,w
121 declare_generic_reg hash_mask, 19,w
122 declare_generic_reg hist_size, 20,w
123 declare_generic_reg dist, 7,w
124 declare_generic_reg dist_inc, 24,w
125
126 declare_generic_reg tmp0, 25,x
127 declare_generic_reg tmp1, 26,x
128 declare_generic_reg tmp2, 27,x
129 declare_generic_reg tmp3, 28,x
130
131 .align 2
132 .type write_deflate_icf_constprop, %function
133write_deflate_icf_constprop:
134 ldrh w2, [x0]
135 mov w3, 30
136 bfi w2, w1, 0, 10
137 strh w2, [x0]
138 ldr w1, [x0]
139 bfi w1, w3, 10, 9
140 str w1, [x0]
141 ubfx x1, x1, 16, 3
142 strh w1, [x0, 2]
143 ret
144 .size write_deflate_icf_constprop, .-write_deflate_icf_constprop
145
146 .align 2
147 .type write_deflate_icf, %function
148write_deflate_icf:
149 ldrh w4, [x0]
150 bfi w4, w1, 0, 10
151 strh w4, [x0]
152 ldr w1, [x0]
153 bfi w1, w2, 10, 9
154 str w1, [x0]
155 lsr w1, w1, 16
156 bfi w1, w3, 3, 13
157 strh w1, [x0, 2]
158 ret
159 .size write_deflate_icf, .-write_deflate_icf
160
161 .align 2
162 .type update_state, %function
163update_state:
164 sub x7, x2, x1
165 ldr x4, [x0, 48]
166 cmp x7, 0
167 ble .L48
168 mov w1, 1
169 strb w1, [x0, 135]
170.L48:
171 ldr w1, [x0, 12]
172 sub x6, x6, x5
173 str x2, [x0]
174 sub x3, x3, x2
175 add w1, w1, w7
176 stp w3, w1, [x0, 8]
177 str w1, [x0, 72]
178 asr x6, x6, 2
179 str x5, [x4, 4688]
180 str x6, [x4, 4696]
181 ret
182 .size update_state, .-update_state
183
184 .align 2
185 .global isal_deflate_icf_finish_hash_hist_aarch64
186 .type isal_deflate_icf_finish_hash_hist_aarch64, %function
187isal_deflate_icf_finish_hash_hist_aarch64:
188 ldr w_end_in, [stream, 8] // stream->avail_in
189 cbz w_end_in, .stream_not_available
190
191 stp x29, x30, [sp, -96]!
192 add x29, sp, 0
193 stp x19, x20, [sp, 16]
194 stp x21, x22, [sp, 32]
195 stp x23, x24, [sp, 48]
196 stp x25, x26, [sp, 64]
197 stp x27, x28, [sp, 80]
198
199 mov stream_saved, stream
200 ldr level_buf, [stream, offset_level_buf] // 48
201 ldr start_in, [stream, offset_next_in] // 0
202 ldr start_out, [level_buf, offset_icf_buf_next] // 4688
203 add state, stream, offset_state // 64
204 ldr end_out, [level_buf, offset_icf_buf_avail_out] // 4696
205 mov next_in, start_in
206 ldr w_file_start, [stream, offset_total_in] // 12
207 mov tmp0, offset_hash_hist // 4712
208 add last_seen, level_buf, tmp0
209 add end_in, start_in, end_in, uxtw
210 and end_out, end_out, -4
211 mov next_out, start_out
212 ldp hist_size, hash_mask, [state, offset_dist_mask] // 12
213 sub file_start, start_in, file_start
214 add end_out, start_out, end_out
215 mov next_out_iter, next_out
216
217 add x0, next_in, 3
218 cmp end_in, x0 // x0 <= next_in + 3
219 bls .while_first_end
220
221 .p2align 3
222.while_first:
223 cmp next_out, end_out
224 bcs .save_and_update_state
225 ldr literal_code, [next_in]
226 mov w0, literal_code
227 crc32cw w0, wzr, w0
228 and w0, w0, hash_mask
229 sub x2, next_in, file_start
230 lsl x0, x0, 1
231 ldrh dist, [last_seen, x0]
232 strh w2, [last_seen, x0]
233 sub w2, w2, dist
234 and dist, w2, 65535
235 sub dist_inc, dist, #1
236 cmp dist_inc, hist_size
237 bcs .skip_compare258
238
239 mov x2, 0
240 sub w2, w_end_in, w8
241 mov x1, next_in
242 sub x0, next_in, x7, uxth
243
244 compare_max_258_bytes param0,param1,param2,tmp2,tmp0,tmp1
245 mov w0, w_tmp2
246 and w2, w0, 65535
247
248 cmp w2, 3
249 bhi .while_first_match_length
250
251.skip_compare258:
252 and literal_code, literal_code, 255 // get_lit_icf_code
253 add next_in, next_in, 1
254 mov w1, literal_code
255 mov x0, next_out
256 add x_literal_code, level_buf, x_literal_code, uxtb 2 // level_buf->hist.ll_hist
257
258 ldr w_tmp0, [x_literal_code, offset_hist_ll_hist] // 2296
259 add w_tmp0, w_tmp0, 1
260 str w_tmp0, [x_literal_code, offset_hist_ll_hist] // 2296
261
262 bl write_deflate_icf_constprop // write_deflate_icf
263
264 add next_out, next_out, 4
265.while_first_check:
266 add x0, next_in, 3
267 mov next_out_iter, next_out
268 cmp end_in, x0
269 bhi .while_first
270
271.while_first_end:
272 cmp next_in, end_in
273 bcs .while_2nd_end
274
275 cmp next_out, end_out
276 bcc .while_2nd_handle
277 b .save_and_update_state_2nd
278
279 .p2align 2
280.while_2nd:
281 cmp end_out, next_out_iter
282 bls .save_and_update_state_2nd
283
284.while_2nd_handle:
285 ldrb w2, [next_in], 1
286 mov x0, next_out_iter
287 add next_out_iter, next_out_iter, 4
288 mov w1, w2
289 add x2, level_buf, x2, uxtb 2
290
291 ldr w_tmp0, [x2, offset_hist_ll_hist] // 2296
292 add w_tmp0, w_tmp0, 1
293 str w_tmp0, [x2, offset_hist_ll_hist] // 2296
294
295 bl write_deflate_icf_constprop
296 cmp end_in, next_in
297 bne .while_2nd
298
299 mov next_in, end_in
300 b .end_of_stream_check_and_exit
301
302 .p2align 2
303.while_first_match_length:
304 and w0, w0, 65535
305 mov w3, 0
306 add w1, w0, 254 // get_len_icf_code
307 cmp dist, 2
308 bhi .compute_dist_icf_code
309
310.while_first_match_length_end:
311 ubfiz x_tmp2, x1, 2, 17
312 add x_tmp1, level_buf, x24, uxtw 2
313 add x_tmp2, level_buf, x_tmp2
314
315 add next_in, next_in, x2, uxth
316 mov w2, dist_inc
317
318 ldr w_tmp0, [x_tmp2, offset_hist_ll_hist] // 2296
319 add w_tmp0, w_tmp0, 1
320 str w_tmp0, [x_tmp2, offset_hist_ll_hist] // 2296
321
322 mov x0, next_out
323 ldr w_tmp0, [x_tmp1, offset_hist_d_hist] // 2176
324 add w_tmp0, w_tmp0, 1
325 str w_tmp0, [x_tmp1, offset_hist_d_hist] // 2176
326
327 bl write_deflate_icf
328 add next_out, next_out, 4
329 b .while_first_check
330
331// compute_dist_icf_code
332 .p2align 2
333.compute_dist_icf_code:
334 clz w3, dist_inc
335 mov w0, 30
336 sub w0, w0, w3
337
338 mov w3, 1
339 lsl w3, w3, w0
340 sub w3, w3, #1
341 and w3, w3, dist_inc
342 lsl w4, w0, 1
343 lsr dist_inc, dist_inc, w0
344 add dist_inc, dist_inc, w4
345 b .while_first_match_length_end
346
347.while_2nd_end:
348 beq .end_of_stream_check_and_exit
349 mov param6, end_out
350 b .update_state
351
352.end_of_stream_check_and_exit:
353 ldr w_tmp0, [stream_saved, offset_end_of_stream] // 56
354 cbz w_tmp0, .update_state_2nd
355 b .save_and_update_state_2nd
356
357 .p2align 3
358.save_and_update_state_2nd:
359 mov w_tmp0, 2
360 str w_tmp0, [state, offset_state_of_zstate] // 20
361.update_state_2nd:
362 mov param6, end_out
363 b .update_state
364
365 .p2align 2
366.save_and_update_state:
367 mov param6, end_out
368 mov param5, next_out
369 mov w_tmp0, 2
370 str w_tmp0, [state, offset_state_of_zstate] // 20
371.update_state:
372 mov param4, start_out
373 mov param1, start_in
374 mov param3, end_in
375 mov param2, next_in
376 mov param0, stream_saved
377
378 ldp x19, x20, [sp, 16]
379 ldp x21, x22, [sp, 32]
380 ldp x23, x24, [sp, 48]
381 ldp x25, x26, [sp, 64]
382 ldp x27, x28, [sp, 80]
383 ldp x29, x30, [sp], 96
384
385 b update_state
386
387 .p2align 2
388.stream_not_available:
389 ldr w1, [stream, offset_end_of_stream] // 56
390 cbz w1, .done
391
392 mov w1, 2
393 str w1, [stream, offset_state_state] // 84
394.done:
395 ret
396
397 .size isal_deflate_icf_finish_hash_hist_aarch64, .-isal_deflate_icf_finish_hash_hist_aarch64