]> git.proxmox.com Git - ceph.git/blame - ceph/src/zstd/lib/compress/huf_compress.c
import 15.2.0 Octopus source
[ceph.git] / ceph / src / zstd / lib / compress / huf_compress.c
CommitLineData
7c673cae
FG
1/* ******************************************************************
2 Huffman encoder, part of New Generation Entropy library
3 Copyright (C) 2013-2016, Yann Collet.
4
5 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are
9 met:
10
11 * Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
13 * Redistributions in binary form must reproduce the above
14 copyright notice, this list of conditions and the following disclaimer
15 in the documentation and/or other materials provided with the
16 distribution.
17
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 You can contact the author at :
31 - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
32 - Public forum : https://groups.google.com/forum/#!forum/lz4c
33****************************************************************** */
34
35/* **************************************************************
36* Compiler specifics
37****************************************************************/
38#ifdef _MSC_VER /* Visual Studio */
39# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
40#endif
41
42
43/* **************************************************************
44* Includes
45****************************************************************/
46#include <string.h> /* memcpy, memset */
47#include <stdio.h> /* printf (debug) */
9f95a23c 48#include "compiler.h"
7c673cae 49#include "bitstream.h"
9f95a23c 50#include "hist.h"
7c673cae
FG
51#define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */
52#include "fse.h" /* header compression */
53#define HUF_STATIC_LINKING_ONLY
54#include "huf.h"
11fdf7f2 55#include "error_private.h"
7c673cae
FG
56
57
58/* **************************************************************
59* Error Management
60****************************************************************/
11fdf7f2 61#define HUF_isError ERR_isError
9f95a23c 62#define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */
11fdf7f2 63#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
7c673cae
FG
64#define CHECK_F(f) { CHECK_V_F(_var_err__, f); }
65
66
67/* **************************************************************
68* Utils
69****************************************************************/
70unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
71{
72 return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
73}
74
75
76/* *******************************************************
77* HUF : Huffman block compression
78*********************************************************/
79/* HUF_compressWeights() :
80 * Same as FSE_compress(), but dedicated to huff0's weights compression.
81 * The use case needs much less stack memory.
82 * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
83 */
84#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
9f95a23c 85static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize)
7c673cae
FG
86{
87 BYTE* const ostart = (BYTE*) dst;
88 BYTE* op = ostart;
89 BYTE* const oend = ostart + dstSize;
90
9f95a23c 91 unsigned maxSymbolValue = HUF_TABLELOG_MAX;
7c673cae
FG
92 U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
93
94 FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
95 BYTE scratchBuffer[1<<MAX_FSE_TABLELOG_FOR_HUFF_HEADER];
96
9f95a23c 97 unsigned count[HUF_TABLELOG_MAX+1];
7c673cae
FG
98 S16 norm[HUF_TABLELOG_MAX+1];
99
100 /* init conditions */
101 if (wtSize <= 1) return 0; /* Not compressible */
102
103 /* Scan input and build symbol stats */
9f95a23c 104 { unsigned const maxCount = HIST_count_simple(count, &maxSymbolValue, weightTable, wtSize); /* never fails */
7c673cae 105 if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */
9f95a23c 106 if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
7c673cae
FG
107 }
108
109 tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
110 CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue) );
111
112 /* Write table description header */
113 { CHECK_V_F(hSize, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) );
114 op += hSize;
115 }
116
117 /* Compress */
118 CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) );
119 { CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, weightTable, wtSize, CTable) );
120 if (cSize == 0) return 0; /* not enough space for compressed data */
121 op += cSize;
122 }
123
124 return op-ostart;
125}
126
127
128struct HUF_CElt_s {
129 U16 val;
130 BYTE nbBits;
131}; /* typedef'd to HUF_CElt within "huf.h" */
132
133/*! HUF_writeCTable() :
11fdf7f2 134 `CTable` : Huffman tree to save, using huf representation.
7c673cae
FG
135 @return : size of saved CTable */
136size_t HUF_writeCTable (void* dst, size_t maxDstSize,
9f95a23c 137 const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
7c673cae
FG
138{
139 BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */
140 BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
141 BYTE* op = (BYTE*)dst;
142 U32 n;
143
144 /* check conditions */
145 if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
146
147 /* convert to weight */
148 bitsToWeight[0] = 0;
149 for (n=1; n<huffLog+1; n++)
150 bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
151 for (n=0; n<maxSymbolValue; n++)
152 huffWeight[n] = bitsToWeight[CTable[n].nbBits];
153
154 /* attempt weights compression by FSE */
155 { CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, huffWeight, maxSymbolValue) );
156 if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */
157 op[0] = (BYTE)hSize;
158 return hSize+1;
159 } }
160
161 /* write raw values as 4-bits (max : 15) */
162 if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */
163 if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */
164 op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1));
165 huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */
166 for (n=0; n<maxSymbolValue; n+=2)
167 op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]);
168 return ((maxSymbolValue+1)/2) + 1;
169}
170
171
9f95a23c 172size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize)
7c673cae
FG
173{
174 BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; /* init not required, even though some static analyzer may complain */
175 U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */
176 U32 tableLog = 0;
177 U32 nbSymbols = 0;
178
179 /* get symbol weights */
180 CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize));
181
182 /* check result */
183 if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
11fdf7f2 184 if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall);
7c673cae
FG
185
186 /* Prepare base value per rank */
187 { U32 n, nextRankStart = 0;
188 for (n=1; n<=tableLog; n++) {
189 U32 current = nextRankStart;
190 nextRankStart += (rankVal[n] << (n-1));
191 rankVal[n] = current;
192 } }
193
194 /* fill nbBits */
195 { U32 n; for (n=0; n<nbSymbols; n++) {
196 const U32 w = huffWeight[n];
197 CTable[n].nbBits = (BYTE)(tableLog + 1 - w);
198 } }
199
200 /* fill val */
201 { U16 nbPerRank[HUF_TABLELOG_MAX+2] = {0}; /* support w=0=>n=tableLog+1 */
202 U16 valPerRank[HUF_TABLELOG_MAX+2] = {0};
203 { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; }
204 /* determine stating value per rank */
205 valPerRank[tableLog+1] = 0; /* for w==0 */
206 { U16 min = 0;
207 U32 n; for (n=tableLog; n>0; n--) { /* start at n=tablelog <-> w=1 */
208 valPerRank[n] = min; /* get starting value within each rank */
209 min += nbPerRank[n];
210 min >>= 1;
211 } }
212 /* assign value within rank, symbol order */
11fdf7f2 213 { U32 n; for (n=0; n<nbSymbols; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; }
7c673cae
FG
214 }
215
11fdf7f2 216 *maxSymbolValuePtr = nbSymbols - 1;
7c673cae
FG
217 return readSize;
218}
219
9f95a23c
TL
220U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue)
221{
222 const HUF_CElt* table = (const HUF_CElt*)symbolTable;
223 assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
224 return table[symbolValue].nbBits;
225}
226
7c673cae
FG
227
228typedef struct nodeElt_s {
229 U32 count;
230 U16 parent;
231 BYTE byte;
232 BYTE nbBits;
233} nodeElt;
234
235static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
236{
237 const U32 largestBits = huffNode[lastNonNull].nbBits;
238 if (largestBits <= maxNbBits) return largestBits; /* early exit : no elt > maxNbBits */
239
240 /* there are several too large elements (at least >= 2) */
241 { int totalCost = 0;
242 const U32 baseCost = 1 << (largestBits - maxNbBits);
243 U32 n = lastNonNull;
244
245 while (huffNode[n].nbBits > maxNbBits) {
246 totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
247 huffNode[n].nbBits = (BYTE)maxNbBits;
248 n --;
249 } /* n stops at huffNode[n].nbBits <= maxNbBits */
250 while (huffNode[n].nbBits == maxNbBits) n--; /* n end at index of smallest symbol using < maxNbBits */
251
252 /* renorm totalCost */
253 totalCost >>= (largestBits - maxNbBits); /* note : totalCost is necessarily a multiple of baseCost */
254
255 /* repay normalized cost */
256 { U32 const noSymbol = 0xF0F0F0F0;
257 U32 rankLast[HUF_TABLELOG_MAX+2];
258 int pos;
259
260 /* Get pos of last (smallest) symbol per rank */
261 memset(rankLast, 0xF0, sizeof(rankLast));
262 { U32 currentNbBits = maxNbBits;
263 for (pos=n ; pos >= 0; pos--) {
264 if (huffNode[pos].nbBits >= currentNbBits) continue;
265 currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */
266 rankLast[maxNbBits-currentNbBits] = pos;
267 } }
268
269 while (totalCost > 0) {
270 U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1;
271 for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
272 U32 highPos = rankLast[nBitsToDecrease];
273 U32 lowPos = rankLast[nBitsToDecrease-1];
274 if (highPos == noSymbol) continue;
275 if (lowPos == noSymbol) break;
276 { U32 const highTotal = huffNode[highPos].count;
277 U32 const lowTotal = 2 * huffNode[lowPos].count;
278 if (highTotal <= lowTotal) break;
279 } }
280 /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
11fdf7f2
TL
281 /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
282 while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))
7c673cae
FG
283 nBitsToDecrease ++;
284 totalCost -= 1 << (nBitsToDecrease-1);
285 if (rankLast[nBitsToDecrease-1] == noSymbol)
286 rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; /* this rank is no longer empty */
287 huffNode[rankLast[nBitsToDecrease]].nbBits ++;
288 if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */
289 rankLast[nBitsToDecrease] = noSymbol;
290 else {
291 rankLast[nBitsToDecrease]--;
292 if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease)
293 rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */
294 } } /* while (totalCost > 0) */
295
296 while (totalCost < 0) { /* Sometimes, cost correction overshoot */
297 if (rankLast[1] == noSymbol) { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */
298 while (huffNode[n].nbBits == maxNbBits) n--;
299 huffNode[n+1].nbBits--;
300 rankLast[1] = n+1;
301 totalCost++;
302 continue;
303 }
304 huffNode[ rankLast[1] + 1 ].nbBits--;
305 rankLast[1]++;
306 totalCost ++;
307 } } } /* there are several too large elements (at least >= 2) */
308
309 return maxNbBits;
310}
311
312
313typedef struct {
314 U32 base;
315 U32 current;
316} rankPos;
317
9f95a23c 318static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue)
7c673cae
FG
319{
320 rankPos rank[32];
321 U32 n;
322
323 memset(rank, 0, sizeof(rank));
324 for (n=0; n<=maxSymbolValue; n++) {
325 U32 r = BIT_highbit32(count[n] + 1);
326 rank[r].base ++;
327 }
328 for (n=30; n>0; n--) rank[n-1].base += rank[n].base;
329 for (n=0; n<32; n++) rank[n].current = rank[n].base;
330 for (n=0; n<=maxSymbolValue; n++) {
331 U32 const c = count[n];
332 U32 const r = BIT_highbit32(c+1) + 1;
333 U32 pos = rank[r].current++;
9f95a23c
TL
334 while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) {
335 huffNode[pos] = huffNode[pos-1];
336 pos--;
337 }
7c673cae
FG
338 huffNode[pos].count = c;
339 huffNode[pos].byte = (BYTE)n;
340 }
341}
342
343
344/** HUF_buildCTable_wksp() :
345 * Same as HUF_buildCTable(), but using externally allocated scratch buffer.
9f95a23c 346 * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of HUF_CTABLE_WORKSPACE_SIZE_U32 unsigned.
7c673cae
FG
347 */
348#define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
9f95a23c
TL
349typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
350size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
7c673cae
FG
351{
352 nodeElt* const huffNode0 = (nodeElt*)workSpace;
353 nodeElt* const huffNode = huffNode0+1;
354 U32 n, nonNullRank;
355 int lowS, lowN;
356 U16 nodeNb = STARTNODE;
357 U32 nodeRoot;
358
359 /* safety checks */
9f95a23c
TL
360 if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
361 if (wkspSize < sizeof(huffNodeTable)) return ERROR(workSpace_tooSmall);
7c673cae 362 if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
9f95a23c 363 if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
7c673cae
FG
364 memset(huffNode0, 0, sizeof(huffNodeTable));
365
366 /* sort, decreasing order */
367 HUF_sort(huffNode, count, maxSymbolValue);
368
369 /* init for parents */
370 nonNullRank = maxSymbolValue;
371 while(huffNode[nonNullRank].count == 0) nonNullRank--;
372 lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb;
373 huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count;
374 huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb;
375 nodeNb++; lowS-=2;
376 for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30);
377 huffNode0[0].count = (U32)(1U<<31); /* fake entry, strong barrier */
378
379 /* create parents */
380 while (nodeNb <= nodeRoot) {
381 U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
382 U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
383 huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count;
384 huffNode[n1].parent = huffNode[n2].parent = nodeNb;
385 nodeNb++;
386 }
387
388 /* distribute weights (unlimited tree height) */
389 huffNode[nodeRoot].nbBits = 0;
390 for (n=nodeRoot-1; n>=STARTNODE; n--)
391 huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
392 for (n=0; n<=nonNullRank; n++)
393 huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
394
395 /* enforce maxTableLog */
396 maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits);
397
398 /* fill result into tree (val, nbBits) */
399 { U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
400 U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
401 if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */
402 for (n=0; n<=nonNullRank; n++)
403 nbPerRank[huffNode[n].nbBits]++;
404 /* determine stating value per rank */
405 { U16 min = 0;
406 for (n=maxNbBits; n>0; n--) {
407 valPerRank[n] = min; /* get starting value within each rank */
408 min += nbPerRank[n];
409 min >>= 1;
410 } }
411 for (n=0; n<=maxSymbolValue; n++)
412 tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */
413 for (n=0; n<=maxSymbolValue; n++)
414 tree[n].val = valPerRank[tree[n].nbBits]++; /* assign value within rank, symbol order */
415 }
416
417 return maxNbBits;
418}
419
420/** HUF_buildCTable() :
9f95a23c 421 * @return : maxNbBits
7c673cae
FG
422 * Note : count is used before tree is written, so they can safely overlap
423 */
9f95a23c 424size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits)
7c673cae
FG
425{
426 huffNodeTable nodeTable;
427 return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, nodeTable, sizeof(nodeTable));
428}
429
11fdf7f2
TL
430static size_t HUF_estimateCompressedSize(HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
431{
432 size_t nbBits = 0;
433 int s;
434 for (s = 0; s <= (int)maxSymbolValue; ++s) {
435 nbBits += CTable[s].nbBits * count[s];
436 }
437 return nbBits >> 3;
438}
439
440static int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
441 int bad = 0;
442 int s;
443 for (s = 0; s <= (int)maxSymbolValue; ++s) {
444 bad |= (count[s] != 0) & (CTable[s].nbBits == 0);
445 }
446 return !bad;
447}
448
9f95a23c
TL
449size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
450
451FORCE_INLINE_TEMPLATE void
452HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
7c673cae
FG
453{
454 BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
455}
456
11fdf7f2 457#define HUF_FLUSHBITS(s) BIT_flushBits(s)
7c673cae
FG
458
459#define HUF_FLUSHBITS_1(stream) \
460 if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream)
461
462#define HUF_FLUSHBITS_2(stream) \
463 if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream)
464
9f95a23c
TL
465FORCE_INLINE_TEMPLATE size_t
466HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
467 const void* src, size_t srcSize,
468 const HUF_CElt* CTable)
7c673cae
FG
469{
470 const BYTE* ip = (const BYTE*) src;
471 BYTE* const ostart = (BYTE*)dst;
472 BYTE* const oend = ostart + dstSize;
473 BYTE* op = ostart;
474 size_t n;
7c673cae
FG
475 BIT_CStream_t bitC;
476
477 /* init */
478 if (dstSize < 8) return 0; /* not enough space to compress */
479 { size_t const initErr = BIT_initCStream(&bitC, op, oend-op);
480 if (HUF_isError(initErr)) return 0; }
481
482 n = srcSize & ~3; /* join to mod 4 */
483 switch (srcSize & 3)
484 {
485 case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable);
486 HUF_FLUSHBITS_2(&bitC);
11fdf7f2 487 /* fall-through */
7c673cae
FG
488 case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable);
489 HUF_FLUSHBITS_1(&bitC);
11fdf7f2 490 /* fall-through */
7c673cae
FG
491 case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable);
492 HUF_FLUSHBITS(&bitC);
11fdf7f2
TL
493 /* fall-through */
494 case 0 : /* fall-through */
495 default: break;
7c673cae
FG
496 }
497
498 for (; n>0; n-=4) { /* note : n&3==0 at this stage */
499 HUF_encodeSymbol(&bitC, ip[n- 1], CTable);
500 HUF_FLUSHBITS_1(&bitC);
501 HUF_encodeSymbol(&bitC, ip[n- 2], CTable);
502 HUF_FLUSHBITS_2(&bitC);
503 HUF_encodeSymbol(&bitC, ip[n- 3], CTable);
504 HUF_FLUSHBITS_1(&bitC);
505 HUF_encodeSymbol(&bitC, ip[n- 4], CTable);
506 HUF_FLUSHBITS(&bitC);
507 }
508
509 return BIT_closeCStream(&bitC);
510}
511
9f95a23c 512#if DYNAMIC_BMI2
7c673cae 513
9f95a23c
TL
514static TARGET_ATTRIBUTE("bmi2") size_t
515HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize,
516 const void* src, size_t srcSize,
517 const HUF_CElt* CTable)
518{
519 return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
520}
521
522static size_t
523HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize,
524 const void* src, size_t srcSize,
525 const HUF_CElt* CTable)
526{
527 return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
528}
529
530static size_t
531HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
532 const void* src, size_t srcSize,
533 const HUF_CElt* CTable, const int bmi2)
534{
535 if (bmi2) {
536 return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable);
537 }
538 return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable);
539}
540
541#else
542
543static size_t
544HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
545 const void* src, size_t srcSize,
546 const HUF_CElt* CTable, const int bmi2)
547{
548 (void)bmi2;
549 return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
550}
551
552#endif
553
554size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
555{
556 return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
557}
558
559
560static size_t
561HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
562 const void* src, size_t srcSize,
563 const HUF_CElt* CTable, int bmi2)
7c673cae
FG
564{
565 size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */
566 const BYTE* ip = (const BYTE*) src;
567 const BYTE* const iend = ip + srcSize;
568 BYTE* const ostart = (BYTE*) dst;
569 BYTE* const oend = ostart + dstSize;
570 BYTE* op = ostart;
571
572 if (dstSize < 6 + 1 + 1 + 1 + 8) return 0; /* minimum space to compress successfully */
573 if (srcSize < 12) return 0; /* no saving possible : too small input */
574 op += 6; /* jumpTable */
575
9f95a23c 576 { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
7c673cae 577 if (cSize==0) return 0;
9f95a23c 578 assert(cSize <= 65535);
7c673cae
FG
579 MEM_writeLE16(ostart, (U16)cSize);
580 op += cSize;
581 }
582
583 ip += segmentSize;
9f95a23c 584 { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
7c673cae 585 if (cSize==0) return 0;
9f95a23c 586 assert(cSize <= 65535);
7c673cae
FG
587 MEM_writeLE16(ostart+2, (U16)cSize);
588 op += cSize;
589 }
590
591 ip += segmentSize;
9f95a23c 592 { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
7c673cae 593 if (cSize==0) return 0;
9f95a23c 594 assert(cSize <= 65535);
7c673cae
FG
595 MEM_writeLE16(ostart+4, (U16)cSize);
596 op += cSize;
597 }
598
599 ip += segmentSize;
9f95a23c 600 { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, iend-ip, CTable, bmi2) );
7c673cae
FG
601 if (cSize==0) return 0;
602 op += cSize;
603 }
604
605 return op-ostart;
606}
607
9f95a23c
TL
608size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
609{
610 return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
611}
612
613typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
7c673cae 614
11fdf7f2
TL
615static size_t HUF_compressCTable_internal(
616 BYTE* const ostart, BYTE* op, BYTE* const oend,
617 const void* src, size_t srcSize,
9f95a23c 618 HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2)
11fdf7f2 619{
9f95a23c
TL
620 size_t const cSize = (nbStreams==HUF_singleStream) ?
621 HUF_compress1X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2) :
622 HUF_compress4X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2);
11fdf7f2
TL
623 if (HUF_isError(cSize)) { return cSize; }
624 if (cSize==0) { return 0; } /* uncompressible */
625 op += cSize;
626 /* check compressibility */
627 if ((size_t)(op-ostart) >= srcSize-1) { return 0; }
628 return op-ostart;
629}
630
9f95a23c
TL
631typedef struct {
632 unsigned count[HUF_SYMBOLVALUE_MAX + 1];
633 HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1];
634 huffNodeTable nodeTable;
635} HUF_compress_tables_t;
11fdf7f2 636
9f95a23c
TL
637/* HUF_compress_internal() :
638 * `workSpace` must a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
639static size_t
640HUF_compress_internal (void* dst, size_t dstSize,
641 const void* src, size_t srcSize,
642 unsigned maxSymbolValue, unsigned huffLog,
643 HUF_nbStreams_e nbStreams,
644 void* workSpace, size_t wkspSize,
645 HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
646 const int bmi2)
7c673cae 647{
9f95a23c 648 HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace;
7c673cae
FG
649 BYTE* const ostart = (BYTE*)dst;
650 BYTE* const oend = ostart + dstSize;
651 BYTE* op = ostart;
652
7c673cae 653 /* checks & inits */
9f95a23c
TL
654 if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
655 if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall);
656 if (!srcSize) return 0; /* Uncompressed */
657 if (!dstSize) return 0; /* cannot fit anything within dst budget */
7c673cae
FG
658 if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */
659 if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
9f95a23c 660 if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
7c673cae
FG
661 if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX;
662 if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
663
9f95a23c 664 /* Heuristic : If old table is valid, use it for small inputs */
11fdf7f2 665 if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
9f95a23c
TL
666 return HUF_compressCTable_internal(ostart, op, oend,
667 src, srcSize,
668 nbStreams, oldHufTable, bmi2);
11fdf7f2
TL
669 }
670
7c673cae 671 /* Scan input and build symbol stats */
9f95a23c 672 { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace, wkspSize) );
7c673cae 673 if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
9f95a23c 674 if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */
7c673cae
FG
675 }
676
11fdf7f2 677 /* Check validity of previous table */
9f95a23c
TL
678 if ( repeat
679 && *repeat == HUF_repeat_check
680 && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) {
11fdf7f2
TL
681 *repeat = HUF_repeat_none;
682 }
683 /* Heuristic : use existing table for small inputs */
684 if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
9f95a23c
TL
685 return HUF_compressCTable_internal(ostart, op, oend,
686 src, srcSize,
687 nbStreams, oldHufTable, bmi2);
11fdf7f2
TL
688 }
689
7c673cae
FG
690 /* Build Huffman Tree */
691 huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
9f95a23c
TL
692 { size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,
693 maxSymbolValue, huffLog,
694 table->nodeTable, sizeof(table->nodeTable));
695 CHECK_F(maxBits);
7c673cae 696 huffLog = (U32)maxBits;
9f95a23c
TL
697 /* Zero unused symbols in CTable, so we can check it for validity */
698 memset(table->CTable + (maxSymbolValue + 1), 0,
699 sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt)));
7c673cae
FG
700 }
701
702 /* Write table description header */
9f95a23c
TL
703 { CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) );
704 /* Check if using previous huffman table is beneficial */
11fdf7f2 705 if (repeat && *repeat != HUF_repeat_none) {
9f95a23c
TL
706 size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue);
707 size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue);
11fdf7f2 708 if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
9f95a23c
TL
709 return HUF_compressCTable_internal(ostart, op, oend,
710 src, srcSize,
711 nbStreams, oldHufTable, bmi2);
712 } }
713
714 /* Use the new huffman table */
11fdf7f2 715 if (hSize + 12ul >= srcSize) { return 0; }
7c673cae 716 op += hSize;
11fdf7f2 717 if (repeat) { *repeat = HUF_repeat_none; }
9f95a23c
TL
718 if (oldHufTable)
719 memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */
7c673cae 720 }
9f95a23c
TL
721 return HUF_compressCTable_internal(ostart, op, oend,
722 src, srcSize,
723 nbStreams, table->CTable, bmi2);
7c673cae
FG
724}
725
726
727size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
728 const void* src, size_t srcSize,
729 unsigned maxSymbolValue, unsigned huffLog,
730 void* workSpace, size_t wkspSize)
731{
9f95a23c
TL
732 return HUF_compress_internal(dst, dstSize, src, srcSize,
733 maxSymbolValue, huffLog, HUF_singleStream,
734 workSpace, wkspSize,
735 NULL, NULL, 0, 0 /*bmi2*/);
11fdf7f2
TL
736}
737
738size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
739 const void* src, size_t srcSize,
740 unsigned maxSymbolValue, unsigned huffLog,
741 void* workSpace, size_t wkspSize,
9f95a23c 742 HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
11fdf7f2 743{
9f95a23c
TL
744 return HUF_compress_internal(dst, dstSize, src, srcSize,
745 maxSymbolValue, huffLog, HUF_singleStream,
746 workSpace, wkspSize, hufTable,
747 repeat, preferRepeat, bmi2);
7c673cae
FG
748}
749
750size_t HUF_compress1X (void* dst, size_t dstSize,
751 const void* src, size_t srcSize,
752 unsigned maxSymbolValue, unsigned huffLog)
753{
9f95a23c 754 unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
7c673cae
FG
755 return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
756}
757
9f95a23c
TL
758/* HUF_compress4X_repeat():
759 * compress input using 4 streams.
760 * provide workspace to generate compression tables */
7c673cae
FG
761size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
762 const void* src, size_t srcSize,
763 unsigned maxSymbolValue, unsigned huffLog,
764 void* workSpace, size_t wkspSize)
765{
9f95a23c
TL
766 return HUF_compress_internal(dst, dstSize, src, srcSize,
767 maxSymbolValue, huffLog, HUF_fourStreams,
768 workSpace, wkspSize,
769 NULL, NULL, 0, 0 /*bmi2*/);
11fdf7f2
TL
770}
771
9f95a23c
TL
772/* HUF_compress4X_repeat():
773 * compress input using 4 streams.
774 * re-use an existing huffman compression table */
11fdf7f2
TL
775size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
776 const void* src, size_t srcSize,
777 unsigned maxSymbolValue, unsigned huffLog,
778 void* workSpace, size_t wkspSize,
9f95a23c 779 HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
11fdf7f2 780{
9f95a23c
TL
781 return HUF_compress_internal(dst, dstSize, src, srcSize,
782 maxSymbolValue, huffLog, HUF_fourStreams,
783 workSpace, wkspSize,
784 hufTable, repeat, preferRepeat, bmi2);
7c673cae
FG
785}
786
787size_t HUF_compress2 (void* dst, size_t dstSize,
788 const void* src, size_t srcSize,
789 unsigned maxSymbolValue, unsigned huffLog)
790{
9f95a23c 791 unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
7c673cae
FG
792 return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
793}
794
795size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
796{
9f95a23c 797 return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT);
7c673cae 798}