]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /* ****************************************************************** |
2 | Huffman encoder, part of New Generation Entropy library | |
3 | Copyright (C) 2013-2016, Yann Collet. | |
4 | ||
5 | BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) | |
6 | ||
7 | Redistribution and use in source and binary forms, with or without | |
8 | modification, are permitted provided that the following conditions are | |
9 | met: | |
10 | ||
11 | * Redistributions of source code must retain the above copyright | |
12 | notice, this list of conditions and the following disclaimer. | |
13 | * Redistributions in binary form must reproduce the above | |
14 | copyright notice, this list of conditions and the following disclaimer | |
15 | in the documentation and/or other materials provided with the | |
16 | distribution. | |
17 | ||
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
22 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
24 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
26 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
29 | ||
30 | You can contact the author at : | |
31 | - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy | |
32 | - Public forum : https://groups.google.com/forum/#!forum/lz4c | |
33 | ****************************************************************** */ | |
34 | ||
35 | /* ************************************************************** | |
36 | * Compiler specifics | |
37 | ****************************************************************/ | |
38 | #ifdef _MSC_VER /* Visual Studio */ | |
39 | # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ | |
40 | #endif | |
41 | ||
42 | ||
43 | /* ************************************************************** | |
44 | * Includes | |
45 | ****************************************************************/ | |
46 | #include <string.h> /* memcpy, memset */ | |
47 | #include <stdio.h> /* printf (debug) */ | |
9f95a23c | 48 | #include "compiler.h" |
7c673cae | 49 | #include "bitstream.h" |
9f95a23c | 50 | #include "hist.h" |
7c673cae FG |
51 | #define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */ |
52 | #include "fse.h" /* header compression */ | |
53 | #define HUF_STATIC_LINKING_ONLY | |
54 | #include "huf.h" | |
11fdf7f2 | 55 | #include "error_private.h" |
7c673cae FG |
56 | |
57 | ||
58 | /* ************************************************************** | |
59 | * Error Management | |
60 | ****************************************************************/ | |
11fdf7f2 | 61 | #define HUF_isError ERR_isError |
9f95a23c | 62 | #define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ |
11fdf7f2 | 63 | #define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e |
7c673cae FG |
64 | #define CHECK_F(f) { CHECK_V_F(_var_err__, f); } |
65 | ||
66 | ||
67 | /* ************************************************************** | |
68 | * Utils | |
69 | ****************************************************************/ | |
70 | unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) | |
71 | { | |
72 | return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); | |
73 | } | |
74 | ||
75 | ||
76 | /* ******************************************************* | |
77 | * HUF : Huffman block compression | |
78 | *********************************************************/ | |
79 | /* HUF_compressWeights() : | |
80 | * Same as FSE_compress(), but dedicated to huff0's weights compression. | |
81 | * The use case needs much less stack memory. | |
82 | * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX. | |
83 | */ | |
84 | #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6 | |
9f95a23c | 85 | static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize) |
7c673cae FG |
86 | { |
87 | BYTE* const ostart = (BYTE*) dst; | |
88 | BYTE* op = ostart; | |
89 | BYTE* const oend = ostart + dstSize; | |
90 | ||
9f95a23c | 91 | unsigned maxSymbolValue = HUF_TABLELOG_MAX; |
7c673cae FG |
92 | U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER; |
93 | ||
94 | FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)]; | |
95 | BYTE scratchBuffer[1<<MAX_FSE_TABLELOG_FOR_HUFF_HEADER]; | |
96 | ||
9f95a23c | 97 | unsigned count[HUF_TABLELOG_MAX+1]; |
7c673cae FG |
98 | S16 norm[HUF_TABLELOG_MAX+1]; |
99 | ||
100 | /* init conditions */ | |
101 | if (wtSize <= 1) return 0; /* Not compressible */ | |
102 | ||
103 | /* Scan input and build symbol stats */ | |
9f95a23c | 104 | { unsigned const maxCount = HIST_count_simple(count, &maxSymbolValue, weightTable, wtSize); /* never fails */ |
7c673cae | 105 | if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */ |
9f95a23c | 106 | if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */ |
7c673cae FG |
107 | } |
108 | ||
109 | tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue); | |
110 | CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue) ); | |
111 | ||
112 | /* Write table description header */ | |
113 | { CHECK_V_F(hSize, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) ); | |
114 | op += hSize; | |
115 | } | |
116 | ||
117 | /* Compress */ | |
118 | CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) ); | |
119 | { CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, weightTable, wtSize, CTable) ); | |
120 | if (cSize == 0) return 0; /* not enough space for compressed data */ | |
121 | op += cSize; | |
122 | } | |
123 | ||
124 | return op-ostart; | |
125 | } | |
126 | ||
127 | ||
128 | struct HUF_CElt_s { | |
129 | U16 val; | |
130 | BYTE nbBits; | |
131 | }; /* typedef'd to HUF_CElt within "huf.h" */ | |
132 | ||
133 | /*! HUF_writeCTable() : | |
11fdf7f2 | 134 | `CTable` : Huffman tree to save, using huf representation. |
7c673cae FG |
135 | @return : size of saved CTable */ |
136 | size_t HUF_writeCTable (void* dst, size_t maxDstSize, | |
9f95a23c | 137 | const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog) |
7c673cae FG |
138 | { |
139 | BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */ | |
140 | BYTE huffWeight[HUF_SYMBOLVALUE_MAX]; | |
141 | BYTE* op = (BYTE*)dst; | |
142 | U32 n; | |
143 | ||
144 | /* check conditions */ | |
145 | if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); | |
146 | ||
147 | /* convert to weight */ | |
148 | bitsToWeight[0] = 0; | |
149 | for (n=1; n<huffLog+1; n++) | |
150 | bitsToWeight[n] = (BYTE)(huffLog + 1 - n); | |
151 | for (n=0; n<maxSymbolValue; n++) | |
152 | huffWeight[n] = bitsToWeight[CTable[n].nbBits]; | |
153 | ||
154 | /* attempt weights compression by FSE */ | |
155 | { CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, huffWeight, maxSymbolValue) ); | |
156 | if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */ | |
157 | op[0] = (BYTE)hSize; | |
158 | return hSize+1; | |
159 | } } | |
160 | ||
161 | /* write raw values as 4-bits (max : 15) */ | |
162 | if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */ | |
163 | if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */ | |
164 | op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1)); | |
165 | huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */ | |
166 | for (n=0; n<maxSymbolValue; n+=2) | |
167 | op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]); | |
168 | return ((maxSymbolValue+1)/2) + 1; | |
169 | } | |
170 | ||
171 | ||
9f95a23c | 172 | size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize) |
7c673cae FG |
173 | { |
174 | BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; /* init not required, even though some static analyzer may complain */ | |
175 | U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */ | |
176 | U32 tableLog = 0; | |
177 | U32 nbSymbols = 0; | |
178 | ||
179 | /* get symbol weights */ | |
180 | CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize)); | |
181 | ||
182 | /* check result */ | |
183 | if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); | |
11fdf7f2 | 184 | if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall); |
7c673cae FG |
185 | |
186 | /* Prepare base value per rank */ | |
187 | { U32 n, nextRankStart = 0; | |
188 | for (n=1; n<=tableLog; n++) { | |
189 | U32 current = nextRankStart; | |
190 | nextRankStart += (rankVal[n] << (n-1)); | |
191 | rankVal[n] = current; | |
192 | } } | |
193 | ||
194 | /* fill nbBits */ | |
195 | { U32 n; for (n=0; n<nbSymbols; n++) { | |
196 | const U32 w = huffWeight[n]; | |
197 | CTable[n].nbBits = (BYTE)(tableLog + 1 - w); | |
198 | } } | |
199 | ||
200 | /* fill val */ | |
201 | { U16 nbPerRank[HUF_TABLELOG_MAX+2] = {0}; /* support w=0=>n=tableLog+1 */ | |
202 | U16 valPerRank[HUF_TABLELOG_MAX+2] = {0}; | |
203 | { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; } | |
204 | /* determine stating value per rank */ | |
205 | valPerRank[tableLog+1] = 0; /* for w==0 */ | |
206 | { U16 min = 0; | |
207 | U32 n; for (n=tableLog; n>0; n--) { /* start at n=tablelog <-> w=1 */ | |
208 | valPerRank[n] = min; /* get starting value within each rank */ | |
209 | min += nbPerRank[n]; | |
210 | min >>= 1; | |
211 | } } | |
212 | /* assign value within rank, symbol order */ | |
11fdf7f2 | 213 | { U32 n; for (n=0; n<nbSymbols; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; } |
7c673cae FG |
214 | } |
215 | ||
11fdf7f2 | 216 | *maxSymbolValuePtr = nbSymbols - 1; |
7c673cae FG |
217 | return readSize; |
218 | } | |
219 | ||
9f95a23c TL |
220 | U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue) |
221 | { | |
222 | const HUF_CElt* table = (const HUF_CElt*)symbolTable; | |
223 | assert(symbolValue <= HUF_SYMBOLVALUE_MAX); | |
224 | return table[symbolValue].nbBits; | |
225 | } | |
226 | ||
7c673cae FG |
227 | |
228 | typedef struct nodeElt_s { | |
229 | U32 count; | |
230 | U16 parent; | |
231 | BYTE byte; | |
232 | BYTE nbBits; | |
233 | } nodeElt; | |
234 | ||
235 | static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) | |
236 | { | |
237 | const U32 largestBits = huffNode[lastNonNull].nbBits; | |
238 | if (largestBits <= maxNbBits) return largestBits; /* early exit : no elt > maxNbBits */ | |
239 | ||
240 | /* there are several too large elements (at least >= 2) */ | |
241 | { int totalCost = 0; | |
242 | const U32 baseCost = 1 << (largestBits - maxNbBits); | |
243 | U32 n = lastNonNull; | |
244 | ||
245 | while (huffNode[n].nbBits > maxNbBits) { | |
246 | totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)); | |
247 | huffNode[n].nbBits = (BYTE)maxNbBits; | |
248 | n --; | |
249 | } /* n stops at huffNode[n].nbBits <= maxNbBits */ | |
250 | while (huffNode[n].nbBits == maxNbBits) n--; /* n end at index of smallest symbol using < maxNbBits */ | |
251 | ||
252 | /* renorm totalCost */ | |
253 | totalCost >>= (largestBits - maxNbBits); /* note : totalCost is necessarily a multiple of baseCost */ | |
254 | ||
255 | /* repay normalized cost */ | |
256 | { U32 const noSymbol = 0xF0F0F0F0; | |
257 | U32 rankLast[HUF_TABLELOG_MAX+2]; | |
258 | int pos; | |
259 | ||
260 | /* Get pos of last (smallest) symbol per rank */ | |
261 | memset(rankLast, 0xF0, sizeof(rankLast)); | |
262 | { U32 currentNbBits = maxNbBits; | |
263 | for (pos=n ; pos >= 0; pos--) { | |
264 | if (huffNode[pos].nbBits >= currentNbBits) continue; | |
265 | currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */ | |
266 | rankLast[maxNbBits-currentNbBits] = pos; | |
267 | } } | |
268 | ||
269 | while (totalCost > 0) { | |
270 | U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1; | |
271 | for ( ; nBitsToDecrease > 1; nBitsToDecrease--) { | |
272 | U32 highPos = rankLast[nBitsToDecrease]; | |
273 | U32 lowPos = rankLast[nBitsToDecrease-1]; | |
274 | if (highPos == noSymbol) continue; | |
275 | if (lowPos == noSymbol) break; | |
276 | { U32 const highTotal = huffNode[highPos].count; | |
277 | U32 const lowTotal = 2 * huffNode[lowPos].count; | |
278 | if (highTotal <= lowTotal) break; | |
279 | } } | |
280 | /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */ | |
11fdf7f2 TL |
281 | /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */ |
282 | while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol)) | |
7c673cae FG |
283 | nBitsToDecrease ++; |
284 | totalCost -= 1 << (nBitsToDecrease-1); | |
285 | if (rankLast[nBitsToDecrease-1] == noSymbol) | |
286 | rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; /* this rank is no longer empty */ | |
287 | huffNode[rankLast[nBitsToDecrease]].nbBits ++; | |
288 | if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */ | |
289 | rankLast[nBitsToDecrease] = noSymbol; | |
290 | else { | |
291 | rankLast[nBitsToDecrease]--; | |
292 | if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease) | |
293 | rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */ | |
294 | } } /* while (totalCost > 0) */ | |
295 | ||
296 | while (totalCost < 0) { /* Sometimes, cost correction overshoot */ | |
297 | if (rankLast[1] == noSymbol) { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */ | |
298 | while (huffNode[n].nbBits == maxNbBits) n--; | |
299 | huffNode[n+1].nbBits--; | |
300 | rankLast[1] = n+1; | |
301 | totalCost++; | |
302 | continue; | |
303 | } | |
304 | huffNode[ rankLast[1] + 1 ].nbBits--; | |
305 | rankLast[1]++; | |
306 | totalCost ++; | |
307 | } } } /* there are several too large elements (at least >= 2) */ | |
308 | ||
309 | return maxNbBits; | |
310 | } | |
311 | ||
312 | ||
313 | typedef struct { | |
314 | U32 base; | |
315 | U32 current; | |
316 | } rankPos; | |
317 | ||
9f95a23c | 318 | static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue) |
7c673cae FG |
319 | { |
320 | rankPos rank[32]; | |
321 | U32 n; | |
322 | ||
323 | memset(rank, 0, sizeof(rank)); | |
324 | for (n=0; n<=maxSymbolValue; n++) { | |
325 | U32 r = BIT_highbit32(count[n] + 1); | |
326 | rank[r].base ++; | |
327 | } | |
328 | for (n=30; n>0; n--) rank[n-1].base += rank[n].base; | |
329 | for (n=0; n<32; n++) rank[n].current = rank[n].base; | |
330 | for (n=0; n<=maxSymbolValue; n++) { | |
331 | U32 const c = count[n]; | |
332 | U32 const r = BIT_highbit32(c+1) + 1; | |
333 | U32 pos = rank[r].current++; | |
9f95a23c TL |
334 | while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) { |
335 | huffNode[pos] = huffNode[pos-1]; | |
336 | pos--; | |
337 | } | |
7c673cae FG |
338 | huffNode[pos].count = c; |
339 | huffNode[pos].byte = (BYTE)n; | |
340 | } | |
341 | } | |
342 | ||
343 | ||
344 | /** HUF_buildCTable_wksp() : | |
345 | * Same as HUF_buildCTable(), but using externally allocated scratch buffer. | |
9f95a23c | 346 | * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of HUF_CTABLE_WORKSPACE_SIZE_U32 unsigned. |
7c673cae FG |
347 | */ |
348 | #define STARTNODE (HUF_SYMBOLVALUE_MAX+1) | |
9f95a23c TL |
349 | typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32]; |
350 | size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize) | |
7c673cae FG |
351 | { |
352 | nodeElt* const huffNode0 = (nodeElt*)workSpace; | |
353 | nodeElt* const huffNode = huffNode0+1; | |
354 | U32 n, nonNullRank; | |
355 | int lowS, lowN; | |
356 | U16 nodeNb = STARTNODE; | |
357 | U32 nodeRoot; | |
358 | ||
359 | /* safety checks */ | |
9f95a23c TL |
360 | if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ |
361 | if (wkspSize < sizeof(huffNodeTable)) return ERROR(workSpace_tooSmall); | |
7c673cae | 362 | if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT; |
9f95a23c | 363 | if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); |
7c673cae FG |
364 | memset(huffNode0, 0, sizeof(huffNodeTable)); |
365 | ||
366 | /* sort, decreasing order */ | |
367 | HUF_sort(huffNode, count, maxSymbolValue); | |
368 | ||
369 | /* init for parents */ | |
370 | nonNullRank = maxSymbolValue; | |
371 | while(huffNode[nonNullRank].count == 0) nonNullRank--; | |
372 | lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb; | |
373 | huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count; | |
374 | huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb; | |
375 | nodeNb++; lowS-=2; | |
376 | for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30); | |
377 | huffNode0[0].count = (U32)(1U<<31); /* fake entry, strong barrier */ | |
378 | ||
379 | /* create parents */ | |
380 | while (nodeNb <= nodeRoot) { | |
381 | U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; | |
382 | U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; | |
383 | huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count; | |
384 | huffNode[n1].parent = huffNode[n2].parent = nodeNb; | |
385 | nodeNb++; | |
386 | } | |
387 | ||
388 | /* distribute weights (unlimited tree height) */ | |
389 | huffNode[nodeRoot].nbBits = 0; | |
390 | for (n=nodeRoot-1; n>=STARTNODE; n--) | |
391 | huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; | |
392 | for (n=0; n<=nonNullRank; n++) | |
393 | huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; | |
394 | ||
395 | /* enforce maxTableLog */ | |
396 | maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits); | |
397 | ||
398 | /* fill result into tree (val, nbBits) */ | |
399 | { U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0}; | |
400 | U16 valPerRank[HUF_TABLELOG_MAX+1] = {0}; | |
401 | if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */ | |
402 | for (n=0; n<=nonNullRank; n++) | |
403 | nbPerRank[huffNode[n].nbBits]++; | |
404 | /* determine stating value per rank */ | |
405 | { U16 min = 0; | |
406 | for (n=maxNbBits; n>0; n--) { | |
407 | valPerRank[n] = min; /* get starting value within each rank */ | |
408 | min += nbPerRank[n]; | |
409 | min >>= 1; | |
410 | } } | |
411 | for (n=0; n<=maxSymbolValue; n++) | |
412 | tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */ | |
413 | for (n=0; n<=maxSymbolValue; n++) | |
414 | tree[n].val = valPerRank[tree[n].nbBits]++; /* assign value within rank, symbol order */ | |
415 | } | |
416 | ||
417 | return maxNbBits; | |
418 | } | |
419 | ||
420 | /** HUF_buildCTable() : | |
9f95a23c | 421 | * @return : maxNbBits |
7c673cae FG |
422 | * Note : count is used before tree is written, so they can safely overlap |
423 | */ | |
9f95a23c | 424 | size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits) |
7c673cae FG |
425 | { |
426 | huffNodeTable nodeTable; | |
427 | return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, nodeTable, sizeof(nodeTable)); | |
428 | } | |
429 | ||
11fdf7f2 TL |
430 | static size_t HUF_estimateCompressedSize(HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) |
431 | { | |
432 | size_t nbBits = 0; | |
433 | int s; | |
434 | for (s = 0; s <= (int)maxSymbolValue; ++s) { | |
435 | nbBits += CTable[s].nbBits * count[s]; | |
436 | } | |
437 | return nbBits >> 3; | |
438 | } | |
439 | ||
440 | static int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { | |
441 | int bad = 0; | |
442 | int s; | |
443 | for (s = 0; s <= (int)maxSymbolValue; ++s) { | |
444 | bad |= (count[s] != 0) & (CTable[s].nbBits == 0); | |
445 | } | |
446 | return !bad; | |
447 | } | |
448 | ||
9f95a23c TL |
449 | size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } |
450 | ||
451 | FORCE_INLINE_TEMPLATE void | |
452 | HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable) | |
7c673cae FG |
453 | { |
454 | BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits); | |
455 | } | |
456 | ||
11fdf7f2 | 457 | #define HUF_FLUSHBITS(s) BIT_flushBits(s) |
7c673cae FG |
458 | |
459 | #define HUF_FLUSHBITS_1(stream) \ | |
460 | if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream) | |
461 | ||
462 | #define HUF_FLUSHBITS_2(stream) \ | |
463 | if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream) | |
464 | ||
9f95a23c TL |
465 | FORCE_INLINE_TEMPLATE size_t |
466 | HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize, | |
467 | const void* src, size_t srcSize, | |
468 | const HUF_CElt* CTable) | |
7c673cae FG |
469 | { |
470 | const BYTE* ip = (const BYTE*) src; | |
471 | BYTE* const ostart = (BYTE*)dst; | |
472 | BYTE* const oend = ostart + dstSize; | |
473 | BYTE* op = ostart; | |
474 | size_t n; | |
7c673cae FG |
475 | BIT_CStream_t bitC; |
476 | ||
477 | /* init */ | |
478 | if (dstSize < 8) return 0; /* not enough space to compress */ | |
479 | { size_t const initErr = BIT_initCStream(&bitC, op, oend-op); | |
480 | if (HUF_isError(initErr)) return 0; } | |
481 | ||
482 | n = srcSize & ~3; /* join to mod 4 */ | |
483 | switch (srcSize & 3) | |
484 | { | |
485 | case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable); | |
486 | HUF_FLUSHBITS_2(&bitC); | |
11fdf7f2 | 487 | /* fall-through */ |
7c673cae FG |
488 | case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable); |
489 | HUF_FLUSHBITS_1(&bitC); | |
11fdf7f2 | 490 | /* fall-through */ |
7c673cae FG |
491 | case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable); |
492 | HUF_FLUSHBITS(&bitC); | |
11fdf7f2 TL |
493 | /* fall-through */ |
494 | case 0 : /* fall-through */ | |
495 | default: break; | |
7c673cae FG |
496 | } |
497 | ||
498 | for (; n>0; n-=4) { /* note : n&3==0 at this stage */ | |
499 | HUF_encodeSymbol(&bitC, ip[n- 1], CTable); | |
500 | HUF_FLUSHBITS_1(&bitC); | |
501 | HUF_encodeSymbol(&bitC, ip[n- 2], CTable); | |
502 | HUF_FLUSHBITS_2(&bitC); | |
503 | HUF_encodeSymbol(&bitC, ip[n- 3], CTable); | |
504 | HUF_FLUSHBITS_1(&bitC); | |
505 | HUF_encodeSymbol(&bitC, ip[n- 4], CTable); | |
506 | HUF_FLUSHBITS(&bitC); | |
507 | } | |
508 | ||
509 | return BIT_closeCStream(&bitC); | |
510 | } | |
511 | ||
9f95a23c | 512 | #if DYNAMIC_BMI2 |
7c673cae | 513 | |
9f95a23c TL |
514 | static TARGET_ATTRIBUTE("bmi2") size_t |
515 | HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize, | |
516 | const void* src, size_t srcSize, | |
517 | const HUF_CElt* CTable) | |
518 | { | |
519 | return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); | |
520 | } | |
521 | ||
522 | static size_t | |
523 | HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize, | |
524 | const void* src, size_t srcSize, | |
525 | const HUF_CElt* CTable) | |
526 | { | |
527 | return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); | |
528 | } | |
529 | ||
530 | static size_t | |
531 | HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, | |
532 | const void* src, size_t srcSize, | |
533 | const HUF_CElt* CTable, const int bmi2) | |
534 | { | |
535 | if (bmi2) { | |
536 | return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable); | |
537 | } | |
538 | return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable); | |
539 | } | |
540 | ||
541 | #else | |
542 | ||
543 | static size_t | |
544 | HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, | |
545 | const void* src, size_t srcSize, | |
546 | const HUF_CElt* CTable, const int bmi2) | |
547 | { | |
548 | (void)bmi2; | |
549 | return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); | |
550 | } | |
551 | ||
552 | #endif | |
553 | ||
554 | size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) | |
555 | { | |
556 | return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); | |
557 | } | |
558 | ||
559 | ||
560 | static size_t | |
561 | HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, | |
562 | const void* src, size_t srcSize, | |
563 | const HUF_CElt* CTable, int bmi2) | |
7c673cae FG |
564 | { |
565 | size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */ | |
566 | const BYTE* ip = (const BYTE*) src; | |
567 | const BYTE* const iend = ip + srcSize; | |
568 | BYTE* const ostart = (BYTE*) dst; | |
569 | BYTE* const oend = ostart + dstSize; | |
570 | BYTE* op = ostart; | |
571 | ||
572 | if (dstSize < 6 + 1 + 1 + 1 + 8) return 0; /* minimum space to compress successfully */ | |
573 | if (srcSize < 12) return 0; /* no saving possible : too small input */ | |
574 | op += 6; /* jumpTable */ | |
575 | ||
9f95a23c | 576 | { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) ); |
7c673cae | 577 | if (cSize==0) return 0; |
9f95a23c | 578 | assert(cSize <= 65535); |
7c673cae FG |
579 | MEM_writeLE16(ostart, (U16)cSize); |
580 | op += cSize; | |
581 | } | |
582 | ||
583 | ip += segmentSize; | |
9f95a23c | 584 | { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) ); |
7c673cae | 585 | if (cSize==0) return 0; |
9f95a23c | 586 | assert(cSize <= 65535); |
7c673cae FG |
587 | MEM_writeLE16(ostart+2, (U16)cSize); |
588 | op += cSize; | |
589 | } | |
590 | ||
591 | ip += segmentSize; | |
9f95a23c | 592 | { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) ); |
7c673cae | 593 | if (cSize==0) return 0; |
9f95a23c | 594 | assert(cSize <= 65535); |
7c673cae FG |
595 | MEM_writeLE16(ostart+4, (U16)cSize); |
596 | op += cSize; | |
597 | } | |
598 | ||
599 | ip += segmentSize; | |
9f95a23c | 600 | { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, iend-ip, CTable, bmi2) ); |
7c673cae FG |
601 | if (cSize==0) return 0; |
602 | op += cSize; | |
603 | } | |
604 | ||
605 | return op-ostart; | |
606 | } | |
607 | ||
9f95a23c TL |
608 | size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) |
609 | { | |
610 | return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); | |
611 | } | |
612 | ||
613 | typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e; | |
7c673cae | 614 | |
11fdf7f2 TL |
615 | static size_t HUF_compressCTable_internal( |
616 | BYTE* const ostart, BYTE* op, BYTE* const oend, | |
617 | const void* src, size_t srcSize, | |
9f95a23c | 618 | HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2) |
11fdf7f2 | 619 | { |
9f95a23c TL |
620 | size_t const cSize = (nbStreams==HUF_singleStream) ? |
621 | HUF_compress1X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2) : | |
622 | HUF_compress4X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2); | |
11fdf7f2 TL |
623 | if (HUF_isError(cSize)) { return cSize; } |
624 | if (cSize==0) { return 0; } /* uncompressible */ | |
625 | op += cSize; | |
626 | /* check compressibility */ | |
627 | if ((size_t)(op-ostart) >= srcSize-1) { return 0; } | |
628 | return op-ostart; | |
629 | } | |
630 | ||
9f95a23c TL |
631 | typedef struct { |
632 | unsigned count[HUF_SYMBOLVALUE_MAX + 1]; | |
633 | HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1]; | |
634 | huffNodeTable nodeTable; | |
635 | } HUF_compress_tables_t; | |
11fdf7f2 | 636 | |
9f95a23c TL |
637 | /* HUF_compress_internal() : |
638 | * `workSpace` must a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */ | |
639 | static size_t | |
640 | HUF_compress_internal (void* dst, size_t dstSize, | |
641 | const void* src, size_t srcSize, | |
642 | unsigned maxSymbolValue, unsigned huffLog, | |
643 | HUF_nbStreams_e nbStreams, | |
644 | void* workSpace, size_t wkspSize, | |
645 | HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat, | |
646 | const int bmi2) | |
7c673cae | 647 | { |
9f95a23c | 648 | HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace; |
7c673cae FG |
649 | BYTE* const ostart = (BYTE*)dst; |
650 | BYTE* const oend = ostart + dstSize; | |
651 | BYTE* op = ostart; | |
652 | ||
7c673cae | 653 | /* checks & inits */ |
9f95a23c TL |
654 | if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ |
655 | if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall); | |
656 | if (!srcSize) return 0; /* Uncompressed */ | |
657 | if (!dstSize) return 0; /* cannot fit anything within dst budget */ | |
7c673cae FG |
658 | if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */ |
659 | if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); | |
9f95a23c | 660 | if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); |
7c673cae FG |
661 | if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX; |
662 | if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT; | |
663 | ||
9f95a23c | 664 | /* Heuristic : If old table is valid, use it for small inputs */ |
11fdf7f2 | 665 | if (preferRepeat && repeat && *repeat == HUF_repeat_valid) { |
9f95a23c TL |
666 | return HUF_compressCTable_internal(ostart, op, oend, |
667 | src, srcSize, | |
668 | nbStreams, oldHufTable, bmi2); | |
11fdf7f2 TL |
669 | } |
670 | ||
7c673cae | 671 | /* Scan input and build symbol stats */ |
9f95a23c | 672 | { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace, wkspSize) ); |
7c673cae | 673 | if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */ |
9f95a23c | 674 | if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */ |
7c673cae FG |
675 | } |
676 | ||
11fdf7f2 | 677 | /* Check validity of previous table */ |
9f95a23c TL |
678 | if ( repeat |
679 | && *repeat == HUF_repeat_check | |
680 | && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) { | |
11fdf7f2 TL |
681 | *repeat = HUF_repeat_none; |
682 | } | |
683 | /* Heuristic : use existing table for small inputs */ | |
684 | if (preferRepeat && repeat && *repeat != HUF_repeat_none) { | |
9f95a23c TL |
685 | return HUF_compressCTable_internal(ostart, op, oend, |
686 | src, srcSize, | |
687 | nbStreams, oldHufTable, bmi2); | |
11fdf7f2 TL |
688 | } |
689 | ||
7c673cae FG |
690 | /* Build Huffman Tree */ |
691 | huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); | |
9f95a23c TL |
692 | { size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count, |
693 | maxSymbolValue, huffLog, | |
694 | table->nodeTable, sizeof(table->nodeTable)); | |
695 | CHECK_F(maxBits); | |
7c673cae | 696 | huffLog = (U32)maxBits; |
9f95a23c TL |
697 | /* Zero unused symbols in CTable, so we can check it for validity */ |
698 | memset(table->CTable + (maxSymbolValue + 1), 0, | |
699 | sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt))); | |
7c673cae FG |
700 | } |
701 | ||
702 | /* Write table description header */ | |
9f95a23c TL |
703 | { CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) ); |
704 | /* Check if using previous huffman table is beneficial */ | |
11fdf7f2 | 705 | if (repeat && *repeat != HUF_repeat_none) { |
9f95a23c TL |
706 | size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue); |
707 | size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue); | |
11fdf7f2 | 708 | if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) { |
9f95a23c TL |
709 | return HUF_compressCTable_internal(ostart, op, oend, |
710 | src, srcSize, | |
711 | nbStreams, oldHufTable, bmi2); | |
712 | } } | |
713 | ||
714 | /* Use the new huffman table */ | |
11fdf7f2 | 715 | if (hSize + 12ul >= srcSize) { return 0; } |
7c673cae | 716 | op += hSize; |
11fdf7f2 | 717 | if (repeat) { *repeat = HUF_repeat_none; } |
9f95a23c TL |
718 | if (oldHufTable) |
719 | memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */ | |
7c673cae | 720 | } |
9f95a23c TL |
721 | return HUF_compressCTable_internal(ostart, op, oend, |
722 | src, srcSize, | |
723 | nbStreams, table->CTable, bmi2); | |
7c673cae FG |
724 | } |
725 | ||
726 | ||
727 | size_t HUF_compress1X_wksp (void* dst, size_t dstSize, | |
728 | const void* src, size_t srcSize, | |
729 | unsigned maxSymbolValue, unsigned huffLog, | |
730 | void* workSpace, size_t wkspSize) | |
731 | { | |
9f95a23c TL |
732 | return HUF_compress_internal(dst, dstSize, src, srcSize, |
733 | maxSymbolValue, huffLog, HUF_singleStream, | |
734 | workSpace, wkspSize, | |
735 | NULL, NULL, 0, 0 /*bmi2*/); | |
11fdf7f2 TL |
736 | } |
737 | ||
738 | size_t HUF_compress1X_repeat (void* dst, size_t dstSize, | |
739 | const void* src, size_t srcSize, | |
740 | unsigned maxSymbolValue, unsigned huffLog, | |
741 | void* workSpace, size_t wkspSize, | |
9f95a23c | 742 | HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) |
11fdf7f2 | 743 | { |
9f95a23c TL |
744 | return HUF_compress_internal(dst, dstSize, src, srcSize, |
745 | maxSymbolValue, huffLog, HUF_singleStream, | |
746 | workSpace, wkspSize, hufTable, | |
747 | repeat, preferRepeat, bmi2); | |
7c673cae FG |
748 | } |
749 | ||
750 | size_t HUF_compress1X (void* dst, size_t dstSize, | |
751 | const void* src, size_t srcSize, | |
752 | unsigned maxSymbolValue, unsigned huffLog) | |
753 | { | |
9f95a23c | 754 | unsigned workSpace[HUF_WORKSPACE_SIZE_U32]; |
7c673cae FG |
755 | return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); |
756 | } | |
757 | ||
9f95a23c TL |
758 | /* HUF_compress4X_repeat(): |
759 | * compress input using 4 streams. | |
760 | * provide workspace to generate compression tables */ | |
7c673cae FG |
761 | size_t HUF_compress4X_wksp (void* dst, size_t dstSize, |
762 | const void* src, size_t srcSize, | |
763 | unsigned maxSymbolValue, unsigned huffLog, | |
764 | void* workSpace, size_t wkspSize) | |
765 | { | |
9f95a23c TL |
766 | return HUF_compress_internal(dst, dstSize, src, srcSize, |
767 | maxSymbolValue, huffLog, HUF_fourStreams, | |
768 | workSpace, wkspSize, | |
769 | NULL, NULL, 0, 0 /*bmi2*/); | |
11fdf7f2 TL |
770 | } |
771 | ||
9f95a23c TL |
772 | /* HUF_compress4X_repeat(): |
773 | * compress input using 4 streams. | |
774 | * re-use an existing huffman compression table */ | |
11fdf7f2 TL |
775 | size_t HUF_compress4X_repeat (void* dst, size_t dstSize, |
776 | const void* src, size_t srcSize, | |
777 | unsigned maxSymbolValue, unsigned huffLog, | |
778 | void* workSpace, size_t wkspSize, | |
9f95a23c | 779 | HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) |
11fdf7f2 | 780 | { |
9f95a23c TL |
781 | return HUF_compress_internal(dst, dstSize, src, srcSize, |
782 | maxSymbolValue, huffLog, HUF_fourStreams, | |
783 | workSpace, wkspSize, | |
784 | hufTable, repeat, preferRepeat, bmi2); | |
7c673cae FG |
785 | } |
786 | ||
787 | size_t HUF_compress2 (void* dst, size_t dstSize, | |
788 | const void* src, size_t srcSize, | |
789 | unsigned maxSymbolValue, unsigned huffLog) | |
790 | { | |
9f95a23c | 791 | unsigned workSpace[HUF_WORKSPACE_SIZE_U32]; |
7c673cae FG |
792 | return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); |
793 | } | |
794 | ||
795 | size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize) | |
796 | { | |
9f95a23c | 797 | return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT); |
7c673cae | 798 | } |