1 /**********************************************************************
2 Copyright(c) 2019 Arm Corporation All rights reserved.
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
7 * Redistributions of source code must retain the above copyright
8 notice, this list of conditions and the following disclaimer.
9 * Redistributions in binary form must reproduce the above copyright
10 notice, this list of conditions and the following disclaimer in
11 the documentation and/or other materials provided with the
13 * Neither the name of Arm Corporation nor the names of its
14 contributors may be used to endorse or promote products derived
15 from this software without specific prior written permission.
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 **********************************************************************/
32 #include "lz0a_const_aarch64.h"
33 #include "data_struct_aarch64.h"
34 #include "huffman_aarch64.h"
35 #include "bitbuf2_aarch64.h"
36 #include "stdmac_aarch64.h"
41 .macro declare_generic_reg name:req,reg:req,default:req
42 \name .req \default\reg
47 .macro update_state stream:req,start_in:req,next_in:req,end_in:req, \
48 m_out_buf:req,m_out_start:req,tmp0:req,tmp1:req
50 //m_out_buf=bytes_written
51 sub x_\m_out_buf,x_\m_out_buf,x_\m_out_start
55 strb w_\tmp0,[x_\stream,_internal_state_has_hist]
57 ldr w_\tmp0,[\stream,_total_in]
58 ldr x_\m_out_start,[\stream,_next_out] //m_out_start = next_out
60 str x_\next_in,[\stream,_next_in]
61 sub x_\start_in,x_\next_in,x_\start_in
62 sub x_\end_in,x_\end_in,x_\next_in
63 add w_\tmp0,w_\tmp0,w_\start_in
64 stp w_\end_in,w_\tmp0,[\stream,_avail_in]
65 //next_in=avail_out,start_in=total_out
66 ldp w_\next_in,w_\start_in,[\stream,_avail_out]
67 add x_\m_out_start,x_\m_out_start,x_\m_out_buf
68 str x_\m_out_start,[\stream,_next_out]
69 add w_\start_in,w_\start_in,w_\m_out_buf
70 sub w_\next_in,w_\next_in,w_\m_out_buf
71 stp w_\next_in,w_\start_in,[\stream,_avail_out]
75 .global isal_deflate_body_aarch64
76 .type isal_deflate_body_aarch64, %function
78 void isal_deflate_body_aarch64(struct isal_zstream *stream)
80 declare_generic_reg stream, 0,x //struct isal_zstream *stream
81 declare_generic_reg state, 8,x //&stream->state
82 declare_generic_reg avail_in, 9,w
83 declare_generic_reg end_of_stream, 10,w //can be used in loop
85 declare_generic_reg hash_mask, 11,w
86 declare_generic_reg match_length, 12,w
87 declare_generic_reg hufftables, 13,x
89 declare_generic_reg m_out_buf, 14,x
90 declare_generic_reg m_out_start, 15,x
91 declare_generic_reg m_out_end, 16,x
92 declare_generic_reg m_bits, 17,x
93 declare_generic_reg m_bit_count, 18,w
95 declare_generic_reg start_in, 19,x
96 declare_generic_reg end_in, 20,x
97 declare_generic_reg next_in, 21,x
98 declare_generic_reg loop_end_cnt, 22,x
100 declare_generic_reg literal, 23,w
101 declare_generic_reg hash, 24,w
102 declare_generic_reg dist, 25,w
104 declare_generic_reg last_seen, 26,x
105 declare_generic_reg file_start, 27,x
106 declare_generic_reg hist_size, 28,w
108 declare_generic_reg tmp0, 5 ,w
109 declare_generic_reg tmp1, 6 ,w
110 declare_generic_reg tmp2, 7 ,w
112 declare_generic_reg code, 3,x
113 declare_generic_reg code_len, 24,x
114 declare_generic_reg code2, 10,x
115 declare_generic_reg code_len2, 4,x
118 isal_deflate_body_aarch64:
121 ldr avail_in, [stream, _avail_in]
122 cbz avail_in, exit_save_state
124 // set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
125 ldr w_m_out_end,[stream,_avail_out]
126 ldr m_out_buf,[stream,_next_out]
127 add m_out_end,m_out_buf,w_m_out_end,uxtw
128 sub m_out_end,m_out_end , 8
129 mov m_out_start,m_out_buf
130 stp m_out_buf,m_out_end,[stream, _bitbuf + _internal_state + _m_out_buf]
131 str m_out_start,[stream, _bitbuf + _internal_state + _m_out_start]
132 ldr m_bit_count ,[stream,_internal_state_bitbuf_m_bit_count]
133 ldr m_bits ,[stream,_internal_state_bitbuf_m_bits]
137 //last_seen=&stream.internal_state.head = _internal_state+_head
138 add last_seen,stream,65536
139 add last_seen,last_seen,_internal_state+_head -65536
142 //start_in=stream->next_in;next_in=start_in
143 ldr start_in,[stream,_next_in]
145 add end_in,start_in,avail_in,uxtw //avail_in reg is free now
146 sub loop_end_cnt,end_in,289 //loop end
147 cmp next_in,loop_end_cnt
150 //file_start = (uint8_t *) ((uintptr_t) stream->next_in - stream->total_in);
151 ldr w_file_start,[stream,_total_in]
152 sub file_start,next_in,file_start,uxtw
154 //uint32_t hist_size = state->dist_mask;
155 ldr hist_size,[stream,_internal_state + _dist_mask]
157 //uint32_t hash_mask = state->hash_mask;
158 ldr hash_mask,[stream,_internal_state + _hash_mask]
160 ldr hufftables,[stream,_hufftables]
164 //is_full(&state->bitbuf)
165 cmp m_out_buf,m_out_end
166 bhi update_state_exit
168 ldr literal,[next_in]
169 crc32cw hash,wzr,literal
170 and hash,hash,hash_mask
172 ///dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
173 ldrh w_tmp0,[last_seen,x_hash,lsl 1] //tmp_w last_seen[hash]
174 sub x_dist,next_in,file_start
175 //last_seen[hash] = (uint64_t) (next_in - file_start);
176 strh dist,[last_seen,x_hash,lsl 1]
184 ///match_length = compare258(next_in - dist, next_in, 258);
185 sub x_tmp2,next_in,x_dist
186 compare_258_bytes tmp2,next_in,match_length,tmp0,tmp1
190 sub x_tmp0,next_in,file_start
191 ldr literal,[next_in,1]
192 crc32cw hash,wzr,literal
193 and hash,hash,hash_mask
195 strh tmp0,[last_seen,x_hash,lsl 1]
196 //call_print_b hash,dist,last_seen
198 ldr literal,[next_in,2]
199 crc32cw hash,wzr,literal
200 and hash,hash,hash_mask
202 strh tmp0,[last_seen,x_hash,lsl 1]
204 //get_len_code(stream->hufftables, match_length, &code,
206 get_len_code hufftables,match_length,code,code_len,tmp0
208 //get_dist_code(stream->hufftables, dist, &code2, &code_len2);
209 get_dist_code hufftables,dist,code2,code_len2,tmp0,tmp1,tmp2
211 //code |= code2 << code_len;
212 //code_len += code_len2;
213 lsl code2,code2,code_len
215 add code_len,code_len,code_len2
217 //next_in += match_length;
218 add next_in,next_in,match_length,uxtw
220 //write_bits(&state->bitbuf, code, code_len);
221 update_bits stream,code,code_len,m_bits,m_bit_count,m_out_buf
225 cmp next_in,loop_end_cnt
229 //get_lit_code(stream->hufftables, literal & 0xFF, &code, &code_len);
230 and literal,literal,0xff
231 get_lit_code hufftables,literal,code,code_len
234 add next_in,next_in,1
236 //write_bits(&state->bitbuf, code, code_len);
237 update_bits stream,code,code_len,m_bits,m_bit_count,m_out_buf
238 cmp next_in,loop_end_cnt
244 //load end_of_stream and flush together
245 ldr w_end_of_stream, [stream, _end_of_stream]
246 //(stream->end_of_stream || stream->flush != 0)
247 cbz w_end_of_stream, update_state_exit
248 mov w_tmp0 , ZSTATE_FLUSH_READ_BUFFER
249 str w_tmp0, [stream, _internal_state+_state]
251 update_state stream,start_in,next_in,end_in,m_out_buf,m_out_start,tmp0,tmp1
256 ldr w_end_of_stream, [stream, _end_of_stream]
257 cbz w_end_of_stream, exit_ret //(stream->end_of_stream || stream->flush != 0)
258 mov w_tmp0 , ZSTATE_FLUSH_READ_BUFFER
259 str w_tmp0, [stream, _internal_state+_state]
261 .size isal_deflate_body_aarch64, .-isal_deflate_body_aarch64