]> git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/isa-l/igzip/aarch64/igzip_deflate_body_aarch64.S
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / spdk / isa-l / igzip / aarch64 / igzip_deflate_body_aarch64.S
1 /**********************************************************************
2 Copyright(c) 2019 Arm Corporation All rights reserved.
3
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7 * Redistributions of source code must retain the above copyright
8 notice, this list of conditions and the following disclaimer.
9 * Redistributions in binary form must reproduce the above copyright
10 notice, this list of conditions and the following disclaimer in
11 the documentation and/or other materials provided with the
12 distribution.
13 * Neither the name of Arm Corporation nor the names of its
14 contributors may be used to endorse or promote products derived
15 from this software without specific prior written permission.
16
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 **********************************************************************/
29 .arch armv8-a+crc
30 .text
31 .align 2
32 #include "lz0a_const_aarch64.h"
33 #include "data_struct_aarch64.h"
34 #include "huffman_aarch64.h"
35 #include "bitbuf2_aarch64.h"
36 #include "stdmac_aarch64.h"
37 /*
38 declare Macros
39 */
40
41 .macro declare_generic_reg name:req,reg:req,default:req
42 \name .req \default\reg
43 w_\name .req w\reg
44 x_\name .req x\reg
45 .endm
46
47 .macro update_state stream:req,start_in:req,next_in:req,end_in:req, \
48 m_out_buf:req,m_out_start:req,tmp0:req,tmp1:req
49
50 //m_out_buf=bytes_written
51 sub x_\m_out_buf,x_\m_out_buf,x_\m_out_start
52 cmp next_in,start_in
53 bls skip_has_hist
54 mov w_\tmp0,1
55 strb w_\tmp0,[x_\stream,_internal_state_has_hist]
56 skip_has_hist:
57 ldr w_\tmp0,[\stream,_total_in]
58 ldr x_\m_out_start,[\stream,_next_out] //m_out_start = next_out
59
60 str x_\next_in,[\stream,_next_in]
61 sub x_\start_in,x_\next_in,x_\start_in
62 sub x_\end_in,x_\end_in,x_\next_in
63 add w_\tmp0,w_\tmp0,w_\start_in
64 stp w_\end_in,w_\tmp0,[\stream,_avail_in]
65 //next_in=avail_out,start_in=total_out
66 ldp w_\next_in,w_\start_in,[\stream,_avail_out]
67 add x_\m_out_start,x_\m_out_start,x_\m_out_buf
68 str x_\m_out_start,[\stream,_next_out]
69 add w_\start_in,w_\start_in,w_\m_out_buf
70 sub w_\next_in,w_\next_in,w_\m_out_buf
71 stp w_\next_in,w_\start_in,[\stream,_avail_out]
72 .endm
73
74
75 .global isal_deflate_body_aarch64
76 .type isal_deflate_body_aarch64, %function
77 /*
78 void isal_deflate_body_aarch64(struct isal_zstream *stream)
79 */
80 declare_generic_reg stream, 0,x //struct isal_zstream *stream
81 declare_generic_reg state, 8,x //&stream->state
82 declare_generic_reg avail_in, 9,w
83 declare_generic_reg end_of_stream, 10,w //can be used in loop
84
85 declare_generic_reg hash_mask, 11,w
86 declare_generic_reg match_length, 12,w
87 declare_generic_reg hufftables, 13,x
88
89 declare_generic_reg m_out_buf, 14,x
90 declare_generic_reg m_out_start, 15,x
91 declare_generic_reg m_out_end, 16,x
92 declare_generic_reg m_bits, 17,x
93 declare_generic_reg m_bit_count, 18,w
94
95 declare_generic_reg start_in, 19,x
96 declare_generic_reg end_in, 20,x
97 declare_generic_reg next_in, 21,x
98 declare_generic_reg loop_end_cnt, 22,x
99
100 declare_generic_reg literal, 23,w
101 declare_generic_reg hash, 24,w
102 declare_generic_reg dist, 25,w
103
104 declare_generic_reg last_seen, 26,x
105 declare_generic_reg file_start, 27,x
106 declare_generic_reg hist_size, 28,w
107
108 declare_generic_reg tmp0, 5 ,w
109 declare_generic_reg tmp1, 6 ,w
110 declare_generic_reg tmp2, 7 ,w
111
112 declare_generic_reg code, 3,x
113 declare_generic_reg code_len, 24,x
114 declare_generic_reg code2, 10,x
115 declare_generic_reg code_len2, 4,x
116
117
118 isal_deflate_body_aarch64:
119 //save registers
120 push_stack
121 ldr avail_in, [stream, _avail_in]
122 cbz avail_in, exit_save_state
123
124 // set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
125 ldr w_m_out_end,[stream,_avail_out]
126 ldr m_out_buf,[stream,_next_out]
127 add m_out_end,m_out_buf,w_m_out_end,uxtw
128 sub m_out_end,m_out_end , 8
129 mov m_out_start,m_out_buf
130 stp m_out_buf,m_out_end,[stream, _bitbuf + _internal_state + _m_out_buf]
131 str m_out_start,[stream, _bitbuf + _internal_state + _m_out_start]
132 ldr m_bit_count ,[stream,_internal_state_bitbuf_m_bit_count]
133 ldr m_bits ,[stream,_internal_state_bitbuf_m_bits]
134
135
136 //init variables
137 //last_seen=&stream.internal_state.head = _internal_state+_head
138 add last_seen,stream,65536
139 add last_seen,last_seen,_internal_state+_head -65536
140
141
142 //start_in=stream->next_in;next_in=start_in
143 ldr start_in,[stream,_next_in]
144 mov next_in,start_in
145 add end_in,start_in,avail_in,uxtw //avail_in reg is free now
146 sub loop_end_cnt,end_in,289 //loop end
147 cmp next_in,loop_end_cnt
148
149
150 //file_start = (uint8_t *) ((uintptr_t) stream->next_in - stream->total_in);
151 ldr w_file_start,[stream,_total_in]
152 sub file_start,next_in,file_start,uxtw
153
154 //uint32_t hist_size = state->dist_mask;
155 ldr hist_size,[stream,_internal_state + _dist_mask]
156
157 //uint32_t hash_mask = state->hash_mask;
158 ldr hash_mask,[stream,_internal_state + _hash_mask]
159
160 ldr hufftables,[stream,_hufftables]
161
162 bhi main_loop_end
163 main_loop_start:
164 //is_full(&state->bitbuf)
165 cmp m_out_buf,m_out_end
166 bhi update_state_exit
167
168 ldr literal,[next_in]
169 crc32cw hash,wzr,literal
170 and hash,hash,hash_mask
171
172 ///dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
173 ldrh w_tmp0,[last_seen,x_hash,lsl 1] //tmp_w last_seen[hash]
174 sub x_dist,next_in,file_start
175 //last_seen[hash] = (uint64_t) (next_in - file_start);
176 strh dist,[last_seen,x_hash,lsl 1]
177 sub dist,dist,w_tmp0
178 and dist,dist,0xffff
179
180 sub w_tmp0,dist,1
181 cmp hist_size,w_tmp0
182 bls get_lit_code
183
184 ///match_length = compare258(next_in - dist, next_in, 258);
185 sub x_tmp2,next_in,x_dist
186 compare_258_bytes tmp2,next_in,match_length,tmp0,tmp1
187 cmp match_length,3
188 bls get_lit_code
189
190 sub x_tmp0,next_in,file_start
191 ldr literal,[next_in,1]
192 crc32cw hash,wzr,literal
193 and hash,hash,hash_mask
194 add tmp0,tmp0,1
195 strh tmp0,[last_seen,x_hash,lsl 1]
196 //call_print_b hash,dist,last_seen
197
198 ldr literal,[next_in,2]
199 crc32cw hash,wzr,literal
200 and hash,hash,hash_mask
201 add tmp0,tmp0,1
202 strh tmp0,[last_seen,x_hash,lsl 1]
203
204 //get_len_code(stream->hufftables, match_length, &code,
205 // &code_len);
206 get_len_code hufftables,match_length,code,code_len,tmp0
207
208 //get_dist_code(stream->hufftables, dist, &code2, &code_len2);
209 get_dist_code hufftables,dist,code2,code_len2,tmp0,tmp1,tmp2
210
211 //code |= code2 << code_len;
212 //code_len += code_len2;
213 lsl code2,code2,code_len
214 orr code,code,code2
215 add code_len,code_len,code_len2
216
217 //next_in += match_length;
218 add next_in,next_in,match_length,uxtw
219
220 //write_bits(&state->bitbuf, code, code_len);
221 update_bits stream,code,code_len,m_bits,m_bit_count,m_out_buf
222
223
224
225 cmp next_in,loop_end_cnt
226 bls main_loop_start
227 b main_loop_end
228 get_lit_code:
229 //get_lit_code(stream->hufftables, literal & 0xFF, &code, &code_len);
230 and literal,literal,0xff
231 get_lit_code hufftables,literal,code,code_len
232
233 //next_in++;
234 add next_in,next_in,1
235
236 //write_bits(&state->bitbuf, code, code_len);
237 update_bits stream,code,code_len,m_bits,m_bit_count,m_out_buf
238 cmp next_in,loop_end_cnt
239 bls main_loop_start
240
241 main_loop_end:
242 //update state here
243
244 //load end_of_stream and flush together
245 ldr w_end_of_stream, [stream, _end_of_stream]
246 //(stream->end_of_stream || stream->flush != 0)
247 cbz w_end_of_stream, update_state_exit
248 mov w_tmp0 , ZSTATE_FLUSH_READ_BUFFER
249 str w_tmp0, [stream, _internal_state+_state]
250 update_state_exit:
251 update_state stream,start_in,next_in,end_in,m_out_buf,m_out_start,tmp0,tmp1
252 exit_ret:
253 pop_stack
254 ret
255 exit_save_state:
256 ldr w_end_of_stream, [stream, _end_of_stream]
257 cbz w_end_of_stream, exit_ret //(stream->end_of_stream || stream->flush != 0)
258 mov w_tmp0 , ZSTATE_FLUSH_READ_BUFFER
259 str w_tmp0, [stream, _internal_state+_state]
260 b exit_ret
261 .size isal_deflate_body_aarch64, .-isal_deflate_body_aarch64