]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /********************************************************************** |
2 | Copyright(c) 2011-2016 Intel Corporation All rights reserved. | |
3 | ||
4 | Redistribution and use in source and binary forms, with or without | |
5 | modification, are permitted provided that the following conditions | |
6 | are met: | |
7 | * Redistributions of source code must retain the above copyright | |
8 | notice, this list of conditions and the following disclaimer. | |
9 | * Redistributions in binary form must reproduce the above copyright | |
10 | notice, this list of conditions and the following disclaimer in | |
11 | the documentation and/or other materials provided with the | |
12 | distribution. | |
13 | * Neither the name of Intel Corporation nor the names of its | |
14 | contributors may be used to endorse or promote products derived | |
15 | from this software without specific prior written permission. | |
16 | ||
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
28 | **********************************************************************/ | |
29 | ||
30 | #include <stdint.h> | |
31 | #include <stdlib.h> | |
32 | #include <assert.h> | |
33 | #include "igzip_lib.h" | |
f91f0fd5 | 34 | #include "unaligned.h" |
7c673cae | 35 | |
f91f0fd5 | 36 | #if __x86_64__ || __i386__ || _M_X64 || _M_IX86 |
7c673cae FG |
37 | #ifdef _MSC_VER |
38 | # include <intrin.h> | |
39 | # define inline __inline | |
40 | #else | |
41 | # include <x86intrin.h> | |
42 | #endif | |
f91f0fd5 TL |
43 | #else |
44 | # define inline __inline | |
45 | #endif //__x86_64__ || __i386__ || _M_X64 || _M_IX86 | |
7c673cae | 46 | |
f91f0fd5 TL |
47 | /** |
48 | * @brief Calculate the bit offset of the msb. | |
49 | * @param val 32-bit unsigned integer input | |
50 | * | |
51 | * @returns bit offset of msb starting at 1 for first bit | |
52 | */ | |
7c673cae FG |
53 | static inline uint32_t bsr(uint32_t val) |
54 | { | |
55 | uint32_t msb; | |
f91f0fd5 TL |
56 | #if defined(_MSC_VER) |
57 | unsigned long ret = 0; | |
58 | if (val != 0) { | |
59 | _BitScanReverse(&ret, val); | |
60 | msb = ret + 1; | |
61 | } | |
62 | else | |
63 | msb = 0; | |
64 | #elif defined( __LZCNT__) | |
65 | msb = 32 - __lzcnt32(val); | |
66 | #elif defined(__x86_64__) || defined(__aarch64__) | |
67 | msb = (val == 0)? 0 : 32 - __builtin_clz(val); | |
7c673cae FG |
68 | #else |
69 | for(msb = 0; val > 0; val >>= 1) | |
70 | msb++; | |
71 | #endif | |
72 | return msb; | |
73 | } | |
74 | ||
f91f0fd5 | 75 | static inline uint32_t tzbytecnt(uint64_t val) |
7c673cae FG |
76 | { |
77 | uint32_t cnt; | |
78 | ||
f91f0fd5 TL |
79 | #ifdef __BMI__ |
80 | cnt = __tzcnt_u64(val); | |
81 | cnt = cnt / 8; | |
82 | #elif defined(__x86_64__) || defined(__aarch64__) | |
7c673cae | 83 | |
f91f0fd5 TL |
84 | cnt = (val == 0)? 64 : __builtin_ctzll(val); |
85 | cnt = cnt / 8; | |
7c673cae FG |
86 | |
87 | #else | |
88 | for(cnt = 8; val > 0; val <<= 8) | |
89 | cnt -= 1; | |
90 | #endif | |
91 | return cnt; | |
92 | } | |
93 | ||
94 | static void compute_dist_code(struct isal_hufftables *hufftables, uint16_t dist, uint64_t *p_code, uint64_t *p_len) | |
95 | { | |
224ce89b | 96 | assert(dist > IGZIP_DIST_TABLE_SIZE); |
7c673cae FG |
97 | |
98 | dist -= 1; | |
99 | uint32_t msb; | |
100 | uint32_t num_extra_bits; | |
101 | uint32_t extra_bits; | |
102 | uint32_t sym; | |
103 | uint32_t len; | |
104 | uint32_t code; | |
105 | ||
106 | msb = bsr(dist); | |
107 | assert(msb >= 1); | |
108 | num_extra_bits = msb - 2; | |
109 | extra_bits = dist & ((1 << num_extra_bits) - 1); | |
110 | dist >>= num_extra_bits; | |
111 | sym = dist + 2 * num_extra_bits; | |
112 | assert(sym < 30); | |
224ce89b WB |
113 | code = hufftables->dcodes[sym - IGZIP_DECODE_OFFSET]; |
114 | len = hufftables->dcodes_sizes[sym - IGZIP_DECODE_OFFSET]; | |
7c673cae FG |
115 | *p_code = code | (extra_bits << len); |
116 | *p_len = len + num_extra_bits; | |
117 | } | |
118 | ||
119 | static inline void get_dist_code(struct isal_hufftables *hufftables, uint32_t dist, uint64_t *code, uint64_t *len) | |
120 | { | |
121 | if (dist < 1) | |
122 | dist = 0; | |
123 | assert(dist >= 1); | |
124 | assert(dist <= 32768); | |
224ce89b | 125 | if (dist <= IGZIP_DIST_TABLE_SIZE) { |
7c673cae FG |
126 | uint64_t code_len; |
127 | code_len = hufftables->dist_table[dist - 1]; | |
128 | *code = code_len >> 5; | |
129 | *len = code_len & 0x1F; | |
130 | } else { | |
131 | compute_dist_code(hufftables, dist, code, len); | |
132 | } | |
133 | } | |
134 | ||
135 | static inline void get_len_code(struct isal_hufftables *hufftables, uint32_t length, uint64_t *code, uint64_t *len) | |
136 | { | |
137 | assert(length >= 3); | |
138 | assert(length <= 258); | |
139 | ||
140 | uint64_t code_len; | |
141 | code_len = hufftables->len_table[length - 3]; | |
142 | *code = code_len >> 5; | |
143 | *len = code_len & 0x1F; | |
144 | } | |
145 | ||
146 | static inline void get_lit_code(struct isal_hufftables *hufftables, uint32_t lit, uint64_t *code, uint64_t *len) | |
147 | { | |
148 | assert(lit <= 256); | |
149 | ||
150 | *code = hufftables->lit_table[lit]; | |
151 | *len = hufftables->lit_table_sizes[lit]; | |
152 | } | |
153 | ||
224ce89b WB |
154 | static void compute_dist_icf_code(uint32_t dist, uint32_t *code, uint32_t *extra_bits) |
155 | { | |
156 | uint32_t msb; | |
157 | uint32_t num_extra_bits; | |
158 | ||
159 | dist -= 1; | |
160 | msb = bsr(dist); | |
161 | assert(msb >= 1); | |
162 | num_extra_bits = msb - 2; | |
163 | *extra_bits = dist & ((1 << num_extra_bits) - 1); | |
164 | dist >>= num_extra_bits; | |
165 | *code = dist + 2 * num_extra_bits; | |
166 | assert(*code < 30); | |
167 | } | |
168 | ||
169 | static inline void get_dist_icf_code(uint32_t dist, uint32_t *code, uint32_t *extra_bits) | |
170 | { | |
171 | assert(dist >= 1); | |
172 | assert(dist <= 32768); | |
173 | if (dist <= 2) { | |
174 | *code = dist - 1; | |
175 | *extra_bits = 0; | |
176 | } else { | |
177 | compute_dist_icf_code(dist, code, extra_bits); | |
178 | } | |
179 | } | |
180 | ||
181 | static inline void get_len_icf_code(uint32_t length, uint32_t *code) | |
182 | { | |
183 | assert(length >= 3); | |
184 | assert(length <= 258); | |
185 | ||
186 | *code = length + 254; | |
187 | } | |
188 | ||
189 | static inline void get_lit_icf_code(uint32_t lit, uint32_t *code) | |
190 | { | |
191 | assert(lit <= 256); | |
192 | ||
193 | *code = lit; | |
194 | } | |
195 | ||
7c673cae FG |
196 | /** |
197 | * @brief Returns a hash of the first 3 bytes of input data. | |
198 | */ | |
199 | static inline uint32_t compute_hash(uint32_t data) | |
200 | { | |
224ce89b | 201 | #ifdef __SSE4_2__ |
7c673cae FG |
202 | |
203 | return _mm_crc32_u32(0, data); | |
204 | ||
205 | #else | |
f91f0fd5 | 206 | uint64_t hash; |
7c673cae | 207 | /* Use multiplication to create a hash, 0xBDD06057 is a prime number */ |
f91f0fd5 TL |
208 | hash = data; |
209 | hash *= 0xB2D06057; | |
210 | hash >>= 16; | |
211 | hash *= 0xB2D06057; | |
212 | hash >>= 16; | |
213 | ||
214 | return hash; | |
7c673cae | 215 | |
224ce89b | 216 | #endif /* __SSE4_2__ */ |
7c673cae FG |
217 | } |
218 | ||
f91f0fd5 TL |
219 | #define PROD1 0xFFFFE84B |
220 | #define PROD2 0xFFFF97B1 | |
221 | static inline uint32_t compute_hash_mad(uint32_t data) | |
222 | { | |
223 | int16_t data_low; | |
224 | int16_t data_high; | |
225 | ||
226 | data_low = data; | |
227 | data_high = data >> 16; | |
228 | data = PROD1 * data_low + PROD2 * data_high; | |
229 | ||
230 | data_low = data; | |
231 | data_high = data >> 16; | |
232 | data = PROD1 * data_low + PROD2 * data_high; | |
233 | ||
234 | return data; | |
235 | } | |
236 | ||
237 | static inline uint32_t compute_long_hash(uint64_t data) { | |
238 | ||
239 | return compute_hash(data >> 32)^compute_hash(data); | |
240 | } | |
7c673cae FG |
241 | |
242 | /** | |
243 | * @brief Returns how long str1 and str2 have the same symbols. | |
244 | * @param str1: First input string. | |
245 | * @param str2: Second input string. | |
246 | * @param max_length: length of the smaller string. | |
247 | */ | |
248 | static inline int compare258(uint8_t * str1, uint8_t * str2, uint32_t max_length) | |
249 | { | |
250 | uint32_t count; | |
251 | uint64_t test; | |
252 | uint64_t loop_length; | |
253 | ||
254 | if(max_length > 258) | |
255 | max_length = 258; | |
256 | ||
257 | loop_length = max_length & ~0x7; | |
258 | ||
259 | for(count = 0; count < loop_length; count += 8){ | |
f91f0fd5 TL |
260 | test = load_u64(str1); |
261 | test ^= load_u64(str2); | |
262 | if(test != 0) | |
263 | return count + tzbytecnt(test); | |
264 | str1 += 8; | |
265 | str2 += 8; | |
266 | } | |
267 | ||
268 | switch(max_length % 8){ | |
269 | ||
270 | case 7: | |
271 | if(*str1++ != *str2++) | |
272 | return count; | |
273 | count++; | |
274 | case 6: | |
275 | if(*str1++ != *str2++) | |
276 | return count; | |
277 | count++; | |
278 | case 5: | |
279 | if(*str1++ != *str2++) | |
280 | return count; | |
281 | count++; | |
282 | case 4: | |
283 | if(*str1++ != *str2++) | |
284 | return count; | |
285 | count++; | |
286 | case 3: | |
287 | if(*str1++ != *str2++) | |
288 | return count; | |
289 | count++; | |
290 | case 2: | |
291 | if(*str1++ != *str2++) | |
292 | return count; | |
293 | count++; | |
294 | case 1: | |
295 | if(*str1 != *str2) | |
296 | return count; | |
297 | count++; | |
298 | } | |
299 | ||
300 | return count; | |
301 | } | |
302 | ||
303 | /** | |
304 | * @brief Returns how long str1 and str2 have the same symbols. | |
305 | * @param str1: First input string. | |
306 | * @param str2: Second input string. | |
307 | * @param max_length: length of the smaller string. | |
308 | */ | |
309 | static inline int compare(uint8_t * str1, uint8_t * str2, uint32_t max_length) | |
310 | { | |
311 | uint32_t count; | |
312 | uint64_t test; | |
313 | uint64_t loop_length; | |
314 | ||
315 | loop_length = max_length & ~0x7; | |
316 | ||
317 | for(count = 0; count < loop_length; count += 8){ | |
318 | test = load_u64(str1); | |
319 | test ^= load_u64(str2); | |
7c673cae | 320 | if(test != 0) |
f91f0fd5 | 321 | return count + tzbytecnt(test); |
7c673cae FG |
322 | str1 += 8; |
323 | str2 += 8; | |
324 | } | |
325 | ||
326 | switch(max_length % 8){ | |
327 | ||
328 | case 7: | |
329 | if(*str1++ != *str2++) | |
330 | return count; | |
331 | count++; | |
332 | case 6: | |
333 | if(*str1++ != *str2++) | |
334 | return count; | |
335 | count++; | |
336 | case 5: | |
337 | if(*str1++ != *str2++) | |
338 | return count; | |
339 | count++; | |
340 | case 4: | |
341 | if(*str1++ != *str2++) | |
342 | return count; | |
343 | count++; | |
344 | case 3: | |
345 | if(*str1++ != *str2++) | |
346 | return count; | |
347 | count++; | |
348 | case 2: | |
349 | if(*str1++ != *str2++) | |
350 | return count; | |
351 | count++; | |
352 | case 1: | |
353 | if(*str1 != *str2) | |
354 | return count; | |
355 | count++; | |
356 | } | |
357 | ||
358 | return count; | |
359 | } |