]> git.proxmox.com Git - ceph.git/blame - ceph/src/zstd/tests/decodecorpus.c
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / zstd / tests / decodecorpus.c
CommitLineData
11fdf7f2
TL
1/*
2 * Copyright (c) 2017-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
11#include <limits.h>
12#include <math.h>
13#include <stddef.h>
14#include <stdio.h>
15#include <stdlib.h>
16#include <string.h>
17#include <time.h>
18
19#include "zstd.h"
20#include "zstd_internal.h"
21#include "mem.h"
22#define ZDICT_STATIC_LINKING_ONLY
23#include "zdict.h"
24
25// Direct access to internal compression functions is required
26#include "zstd_compress.c"
27
28#define XXH_STATIC_LINKING_ONLY
29#include "xxhash.h" /* XXH64 */
30
31#ifndef MIN
32 #define MIN(a, b) ((a) < (b) ? (a) : (b))
33#endif
34
35#ifndef MAX_PATH
36 #ifdef PATH_MAX
37 #define MAX_PATH PATH_MAX
38 #else
39 #define MAX_PATH 256
40 #endif
41#endif
42
43/*-************************************
44* DISPLAY Macros
45**************************************/
46#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
47#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
48static U32 g_displayLevel = 2;
49
50#define DISPLAYUPDATE(...) \
51 do { \
52 if ((clockSpan(g_displayClock) > g_refreshRate) || \
53 (g_displayLevel >= 4)) { \
54 g_displayClock = clock(); \
55 DISPLAY(__VA_ARGS__); \
56 if (g_displayLevel >= 4) fflush(stderr); \
57 } \
58 } while (0)
59static const clock_t g_refreshRate = CLOCKS_PER_SEC / 6;
60static clock_t g_displayClock = 0;
61
62static clock_t clockSpan(clock_t cStart)
63{
64 return clock() - cStart; /* works even when overflow; max span ~ 30mn */
65}
66
67#define CHECKERR(code) \
68 do { \
69 if (ZSTD_isError(code)) { \
70 DISPLAY("Error occurred while generating data: %s\n", \
71 ZSTD_getErrorName(code)); \
72 exit(1); \
73 } \
74 } while (0)
75
76/*-*******************************************************
77* Random function
78*********************************************************/
79static unsigned RAND(unsigned* src)
80{
81#define RAND_rotl32(x,r) ((x << r) | (x >> (32 - r)))
82 static const U32 prime1 = 2654435761U;
83 static const U32 prime2 = 2246822519U;
84 U32 rand32 = *src;
85 rand32 *= prime1;
86 rand32 += prime2;
87 rand32 = RAND_rotl32(rand32, 13);
88 *src = rand32;
89 return RAND_rotl32(rand32, 27);
90#undef RAND_rotl32
91}
92
93#define DISTSIZE (8192)
94
95/* Write `size` bytes into `ptr`, all of which are less than or equal to `maxSymb` */
96static void RAND_bufferMaxSymb(U32* seed, void* ptr, size_t size, int maxSymb)
97{
98 size_t i;
99 BYTE* op = ptr;
100
101 for (i = 0; i < size; i++) {
102 op[i] = (BYTE) (RAND(seed) % (maxSymb + 1));
103 }
104}
105
106/* Write `size` random bytes into `ptr` */
107static void RAND_buffer(U32* seed, void* ptr, size_t size)
108{
109 size_t i;
110 BYTE* op = ptr;
111
112 for (i = 0; i + 4 <= size; i += 4) {
113 MEM_writeLE32(op + i, RAND(seed));
114 }
115 for (; i < size; i++) {
116 op[i] = RAND(seed) & 0xff;
117 }
118}
119
120/* Write `size` bytes into `ptr` following the distribution `dist` */
121static void RAND_bufferDist(U32* seed, BYTE* dist, void* ptr, size_t size)
122{
123 size_t i;
124 BYTE* op = ptr;
125
126 for (i = 0; i < size; i++) {
127 op[i] = dist[RAND(seed) % DISTSIZE];
128 }
129}
130
131/* Generate a random distribution where the frequency of each symbol follows a
132 * geometric distribution defined by `weight`
133 * `dist` should have size at least `DISTSIZE` */
134static void RAND_genDist(U32* seed, BYTE* dist, double weight)
135{
136 size_t i = 0;
137 size_t statesLeft = DISTSIZE;
138 BYTE symb = (BYTE) (RAND(seed) % 256);
139 BYTE step = (BYTE) ((RAND(seed) % 256) | 1); /* force it to be odd so it's relatively prime to 256 */
140
141 while (i < DISTSIZE) {
142 size_t states = ((size_t)(weight * statesLeft)) + 1;
143 size_t j;
144 for (j = 0; j < states && i < DISTSIZE; j++, i++) {
145 dist[i] = symb;
146 }
147
148 symb += step;
149 statesLeft -= states;
150 }
151}
152
153/* Generates a random number in the range [min, max) */
154static inline U32 RAND_range(U32* seed, U32 min, U32 max)
155{
156 return (RAND(seed) % (max-min)) + min;
157}
158
159#define ROUND(x) ((U32)(x + 0.5))
160
161/* Generates a random number in an exponential distribution with mean `mean` */
162static double RAND_exp(U32* seed, double mean)
163{
164 double const u = RAND(seed) / (double) UINT_MAX;
165 return log(1-u) * (-mean);
166}
167
168/*-*******************************************************
169* Constants and Structs
170*********************************************************/
171const char *BLOCK_TYPES[] = {"raw", "rle", "compressed"};
172
173#define MAX_DECOMPRESSED_SIZE_LOG 20
174#define MAX_DECOMPRESSED_SIZE (1ULL << MAX_DECOMPRESSED_SIZE_LOG)
175
176#define MAX_WINDOW_LOG 22 /* Recommended support is 8MB, so limit to 4MB + mantissa */
177
178#define MIN_SEQ_LEN (3)
179#define MAX_NB_SEQ ((ZSTD_BLOCKSIZE_MAX + MIN_SEQ_LEN - 1) / MIN_SEQ_LEN)
180
181BYTE CONTENT_BUFFER[MAX_DECOMPRESSED_SIZE];
182BYTE FRAME_BUFFER[MAX_DECOMPRESSED_SIZE * 2];
183BYTE LITERAL_BUFFER[ZSTD_BLOCKSIZE_MAX];
184
185seqDef SEQUENCE_BUFFER[MAX_NB_SEQ];
186BYTE SEQUENCE_LITERAL_BUFFER[ZSTD_BLOCKSIZE_MAX]; /* storeSeq expects a place to copy literals to */
187BYTE SEQUENCE_LLCODE[ZSTD_BLOCKSIZE_MAX];
188BYTE SEQUENCE_MLCODE[ZSTD_BLOCKSIZE_MAX];
189BYTE SEQUENCE_OFCODE[ZSTD_BLOCKSIZE_MAX];
190
191unsigned WKSP[1024];
192
193typedef struct {
194 size_t contentSize; /* 0 means unknown (unless contentSize == windowSize == 0) */
195 unsigned windowSize; /* contentSize >= windowSize means single segment */
196} frameHeader_t;
197
198/* For repeat modes */
199typedef struct {
200 U32 rep[ZSTD_REP_NUM];
201
202 int hufInit;
203 /* the distribution used in the previous block for repeat mode */
204 BYTE hufDist[DISTSIZE];
205 U32 hufTable [256]; /* HUF_CElt is an incomplete type */
206
207 int fseInit;
208 FSE_CTable offcodeCTable [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
209 FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
210 FSE_CTable litlengthCTable [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
211
212 /* Symbols that were present in the previous distribution, for use with
213 * set_repeat */
214 BYTE litlengthSymbolSet[36];
215 BYTE offsetSymbolSet[29];
216 BYTE matchlengthSymbolSet[53];
217} cblockStats_t;
218
219typedef struct {
220 void* data;
221 void* dataStart;
222 void* dataEnd;
223
224 void* src;
225 void* srcStart;
226 void* srcEnd;
227
228 frameHeader_t header;
229
230 cblockStats_t stats;
231 cblockStats_t oldStats; /* so they can be rolled back if uncompressible */
232} frame_t;
233
234typedef struct {
235 int useDict;
236 U32 dictID;
237 size_t dictContentSize;
238 BYTE* dictContent;
239} dictInfo;
240
241typedef enum {
242 gt_frame = 0, /* generate frames */
243 gt_block, /* generate compressed blocks without block/frame headers */
244} genType_e;
245
246/*-*******************************************************
247* Global variables (set from command line)
248*********************************************************/
249U32 g_maxDecompressedSizeLog = MAX_DECOMPRESSED_SIZE_LOG; /* <= 20 */
250U32 g_maxBlockSize = ZSTD_BLOCKSIZE_MAX; /* <= 128 KB */
251
252/*-*******************************************************
253* Generator Functions
254*********************************************************/
255
256struct {
257 int contentSize; /* force the content size to be present */
258} opts; /* advanced options on generation */
259
260/* Generate and write a random frame header */
261static void writeFrameHeader(U32* seed, frame_t* frame, dictInfo info)
262{
263 BYTE* const op = frame->data;
264 size_t pos = 0;
265 frameHeader_t fh;
266
267 BYTE windowByte = 0;
268
269 int singleSegment = 0;
270 int contentSizeFlag = 0;
271 int fcsCode = 0;
272
273 memset(&fh, 0, sizeof(fh));
274
275 /* generate window size */
276 {
277 /* Follow window algorithm from specification */
278 int const exponent = RAND(seed) % (MAX_WINDOW_LOG - 10);
279 int const mantissa = RAND(seed) % 8;
280 windowByte = (BYTE) ((exponent << 3) | mantissa);
281 fh.windowSize = (1U << (exponent + 10));
282 fh.windowSize += fh.windowSize / 8 * mantissa;
283 }
284
285 {
286 /* Generate random content size */
287 size_t highBit;
288 if (RAND(seed) & 7 && g_maxDecompressedSizeLog > 7) {
289 /* do content of at least 128 bytes */
290 highBit = 1ULL << RAND_range(seed, 7, g_maxDecompressedSizeLog);
291 } else if (RAND(seed) & 3) {
292 /* do small content */
293 highBit = 1ULL << RAND_range(seed, 0, MIN(7, 1U << g_maxDecompressedSizeLog));
294 } else {
295 /* 0 size frame */
296 highBit = 0;
297 }
298 fh.contentSize = highBit ? highBit + (RAND(seed) % highBit) : 0;
299
300 /* provide size sometimes */
301 contentSizeFlag = opts.contentSize | (RAND(seed) & 1);
302
303 if (contentSizeFlag && (fh.contentSize == 0 || !(RAND(seed) & 7))) {
304 /* do single segment sometimes */
305 fh.windowSize = (U32) fh.contentSize;
306 singleSegment = 1;
307 }
308 }
309
310 if (contentSizeFlag) {
311 /* Determine how large fcs field has to be */
312 int minFcsCode = (fh.contentSize >= 256) +
313 (fh.contentSize >= 65536 + 256) +
314 (fh.contentSize > 0xFFFFFFFFU);
315 if (!singleSegment && !minFcsCode) {
316 minFcsCode = 1;
317 }
318 fcsCode = minFcsCode + (RAND(seed) % (4 - minFcsCode));
319 if (fcsCode == 1 && fh.contentSize < 256) fcsCode++;
320 }
321
322 /* write out the header */
323 MEM_writeLE32(op + pos, ZSTD_MAGICNUMBER);
324 pos += 4;
325
326 {
327 /*
328 * fcsCode: 2-bit flag specifying how many bytes used to represent Frame_Content_Size (bits 7-6)
329 * singleSegment: 1-bit flag describing if data must be regenerated within a single continuous memory segment. (bit 5)
330 * contentChecksumFlag: 1-bit flag that is set if frame includes checksum at the end -- set to 1 below (bit 2)
331 * dictBits: 2-bit flag describing how many bytes Dictionary_ID uses -- set to 3 (bits 1-0)
332 * For more information: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frame_header
333 */
334 int const dictBits = info.useDict ? 3 : 0;
335 BYTE const frameHeaderDescriptor =
336 (BYTE) ((fcsCode << 6) | (singleSegment << 5) | (1 << 2) | dictBits);
337 op[pos++] = frameHeaderDescriptor;
338 }
339
340 if (!singleSegment) {
341 op[pos++] = windowByte;
342 }
343 if (info.useDict) {
344 MEM_writeLE32(op + pos, (U32) info.dictID);
345 pos += 4;
346 }
347 if (contentSizeFlag) {
348 switch (fcsCode) {
349 default: /* Impossible */
350 case 0: op[pos++] = (BYTE) fh.contentSize; break;
351 case 1: MEM_writeLE16(op + pos, (U16) (fh.contentSize - 256)); pos += 2; break;
352 case 2: MEM_writeLE32(op + pos, (U32) fh.contentSize); pos += 4; break;
353 case 3: MEM_writeLE64(op + pos, (U64) fh.contentSize); pos += 8; break;
354 }
355 }
356
357 DISPLAYLEVEL(3, " frame content size:\t%u\n", (U32)fh.contentSize);
358 DISPLAYLEVEL(3, " frame window size:\t%u\n", fh.windowSize);
359 DISPLAYLEVEL(3, " content size flag:\t%d\n", contentSizeFlag);
360 DISPLAYLEVEL(3, " single segment flag:\t%d\n", singleSegment);
361
362 frame->data = op + pos;
363 frame->header = fh;
364}
365
366/* Write a literal block in either raw or RLE form, return the literals size */
367static size_t writeLiteralsBlockSimple(U32* seed, frame_t* frame, size_t contentSize)
368{
369 BYTE* op = (BYTE*)frame->data;
370 int const type = RAND(seed) % 2;
371 int const sizeFormatDesc = RAND(seed) % 8;
372 size_t litSize;
373 size_t maxLitSize = MIN(contentSize, g_maxBlockSize);
374
375 if (sizeFormatDesc == 0) {
376 /* Size_FormatDesc = ?0 */
377 maxLitSize = MIN(maxLitSize, 31);
378 } else if (sizeFormatDesc <= 4) {
379 /* Size_FormatDesc = 01 */
380 maxLitSize = MIN(maxLitSize, 4095);
381 } else {
382 /* Size_Format = 11 */
383 maxLitSize = MIN(maxLitSize, 1048575);
384 }
385
386 litSize = RAND(seed) % (maxLitSize + 1);
387 if (frame->src == frame->srcStart && litSize == 0) {
388 litSize = 1; /* no empty literals if there's nothing preceding this block */
389 }
390 if (litSize + 3 > contentSize) {
391 litSize = contentSize; /* no matches shorter than 3 are allowed */
392 }
393 /* use smallest size format that fits */
394 if (litSize < 32) {
395 op[0] = (type | (0 << 2) | (litSize << 3)) & 0xff;
396 op += 1;
397 } else if (litSize < 4096) {
398 op[0] = (type | (1 << 2) | (litSize << 4)) & 0xff;
399 op[1] = (litSize >> 4) & 0xff;
400 op += 2;
401 } else {
402 op[0] = (type | (3 << 2) | (litSize << 4)) & 0xff;
403 op[1] = (litSize >> 4) & 0xff;
404 op[2] = (litSize >> 12) & 0xff;
405 op += 3;
406 }
407
408 if (type == 0) {
409 /* Raw literals */
410 DISPLAYLEVEL(4, " raw literals\n");
411
412 RAND_buffer(seed, LITERAL_BUFFER, litSize);
413 memcpy(op, LITERAL_BUFFER, litSize);
414 op += litSize;
415 } else {
416 /* RLE literals */
417 BYTE const symb = (BYTE) (RAND(seed) % 256);
418
419 DISPLAYLEVEL(4, " rle literals: 0x%02x\n", (U32)symb);
420
421 memset(LITERAL_BUFFER, symb, litSize);
422 op[0] = symb;
423 op++;
424 }
425
426 frame->data = op;
427
428 return litSize;
429}
430
431/* Generate a Huffman header for the given source */
432static size_t writeHufHeader(U32* seed, HUF_CElt* hufTable, void* dst, size_t dstSize,
433 const void* src, size_t srcSize)
434{
435 BYTE* const ostart = (BYTE*)dst;
436 BYTE* op = ostart;
437
438 unsigned huffLog = 11;
439 U32 maxSymbolValue = 255;
440
441 U32 count[HUF_SYMBOLVALUE_MAX+1];
442
443 /* Scan input and build symbol stats */
444 { size_t const largest = FSE_count_wksp (count, &maxSymbolValue, (const BYTE*)src, srcSize, WKSP);
445 if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 0; } /* single symbol, rle */
446 if (largest <= (srcSize >> 7)+1) return 0; /* Fast heuristic : not compressible enough */
447 }
448
449 /* Build Huffman Tree */
450 /* Max Huffman log is 11, min is highbit(maxSymbolValue)+1 */
451 huffLog = RAND_range(seed, ZSTD_highbit32(maxSymbolValue)+1, huffLog+1);
452 DISPLAYLEVEL(6, " huffman log: %u\n", huffLog);
453 { size_t const maxBits = HUF_buildCTable_wksp (hufTable, count, maxSymbolValue, huffLog, WKSP, sizeof(WKSP));
454 CHECKERR(maxBits);
455 huffLog = (U32)maxBits;
456 }
457
458 /* Write table description header */
459 { size_t const hSize = HUF_writeCTable (op, dstSize, hufTable, maxSymbolValue, huffLog);
460 if (hSize + 12 >= srcSize) return 0; /* not useful to try compression */
461 op += hSize;
462 }
463
464 return op - ostart;
465}
466
467/* Write a Huffman coded literals block and return the literals size */
468static size_t writeLiteralsBlockCompressed(U32* seed, frame_t* frame, size_t contentSize)
469{
470 BYTE* origop = (BYTE*)frame->data;
471 BYTE* opend = (BYTE*)frame->dataEnd;
472 BYTE* op;
473 BYTE* const ostart = origop;
474 int const sizeFormat = RAND(seed) % 4;
475 size_t litSize;
476 size_t hufHeaderSize = 0;
477 size_t compressedSize = 0;
478 size_t maxLitSize = MIN(contentSize-3, g_maxBlockSize);
479
480 symbolEncodingType_e hType;
481
482 if (contentSize < 64) {
483 /* make sure we get reasonably-sized literals for compression */
484 return ERROR(GENERIC);
485 }
486
487 DISPLAYLEVEL(4, " compressed literals\n");
488
489 switch (sizeFormat) {
490 case 0: /* fall through, size is the same as case 1 */
491 case 1:
492 maxLitSize = MIN(maxLitSize, 1023);
493 origop += 3;
494 break;
495 case 2:
496 maxLitSize = MIN(maxLitSize, 16383);
497 origop += 4;
498 break;
499 case 3:
500 maxLitSize = MIN(maxLitSize, 262143);
501 origop += 5;
502 break;
503 default:; /* impossible */
504 }
505
506 do {
507 op = origop;
508 do {
509 litSize = RAND(seed) % (maxLitSize + 1);
510 } while (litSize < 32); /* avoid small literal sizes */
511 if (litSize + 3 > contentSize) {
512 litSize = contentSize; /* no matches shorter than 3 are allowed */
513 }
514
515 /* most of the time generate a new distribution */
516 if ((RAND(seed) & 3) || !frame->stats.hufInit) {
517 do {
518 if (RAND(seed) & 3) {
519 /* add 10 to ensure some compressability */
520 double const weight = ((RAND(seed) % 90) + 10) / 100.0;
521
522 DISPLAYLEVEL(5, " distribution weight: %d%%\n",
523 (int)(weight * 100));
524
525 RAND_genDist(seed, frame->stats.hufDist, weight);
526 } else {
527 /* sometimes do restricted range literals to force
528 * non-huffman headers */
529 DISPLAYLEVEL(5, " small range literals\n");
530 RAND_bufferMaxSymb(seed, frame->stats.hufDist, DISTSIZE,
531 15);
532 }
533 RAND_bufferDist(seed, frame->stats.hufDist, LITERAL_BUFFER,
534 litSize);
535
536 /* generate the header from the distribution instead of the
537 * actual data to avoid bugs with symbols that were in the
538 * distribution but never showed up in the output */
539 hufHeaderSize = writeHufHeader(
540 seed, (HUF_CElt*)frame->stats.hufTable, op, opend - op,
541 frame->stats.hufDist, DISTSIZE);
542 CHECKERR(hufHeaderSize);
543 /* repeat until a valid header is written */
544 } while (hufHeaderSize == 0);
545 op += hufHeaderSize;
546 hType = set_compressed;
547
548 frame->stats.hufInit = 1;
549 } else {
550 /* repeat the distribution/table from last time */
551 DISPLAYLEVEL(5, " huffman repeat stats\n");
552 RAND_bufferDist(seed, frame->stats.hufDist, LITERAL_BUFFER,
553 litSize);
554 hufHeaderSize = 0;
555 hType = set_repeat;
556 }
557
558 do {
559 compressedSize =
560 sizeFormat == 0
561 ? HUF_compress1X_usingCTable(
562 op, opend - op, LITERAL_BUFFER, litSize,
563 (HUF_CElt*)frame->stats.hufTable)
564 : HUF_compress4X_usingCTable(
565 op, opend - op, LITERAL_BUFFER, litSize,
566 (HUF_CElt*)frame->stats.hufTable);
567 CHECKERR(compressedSize);
568 /* this only occurs when it could not compress or similar */
569 } while (compressedSize <= 0);
570
571 op += compressedSize;
572
573 compressedSize += hufHeaderSize;
574 DISPLAYLEVEL(5, " regenerated size: %u\n", (U32)litSize);
575 DISPLAYLEVEL(5, " compressed size: %u\n", (U32)compressedSize);
576 if (compressedSize >= litSize) {
577 DISPLAYLEVEL(5, " trying again\n");
578 /* if we have to try again, reset the stats so we don't accidentally
579 * try to repeat a distribution we just made */
580 frame->stats = frame->oldStats;
581 } else {
582 break;
583 }
584 } while (1);
585
586 /* write header */
587 switch (sizeFormat) {
588 case 0: /* fall through, size is the same as case 1 */
589 case 1: {
590 U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) |
591 ((U32)compressedSize << 14);
592 MEM_writeLE24(ostart, header);
593 break;
594 }
595 case 2: {
596 U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) |
597 ((U32)compressedSize << 18);
598 MEM_writeLE32(ostart, header);
599 break;
600 }
601 case 3: {
602 U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) |
603 ((U32)compressedSize << 22);
604 MEM_writeLE32(ostart, header);
605 ostart[4] = (BYTE)(compressedSize >> 10);
606 break;
607 }
608 default:; /* impossible */
609 }
610
611 frame->data = op;
612 return litSize;
613}
614
615static size_t writeLiteralsBlock(U32* seed, frame_t* frame, size_t contentSize)
616{
617 /* only do compressed for larger segments to avoid compressibility issues */
618 if (RAND(seed) & 7 && contentSize >= 64) {
619 return writeLiteralsBlockCompressed(seed, frame, contentSize);
620 } else {
621 return writeLiteralsBlockSimple(seed, frame, contentSize);
622 }
623}
624
625static inline void initSeqStore(seqStore_t *seqStore) {
626 seqStore->sequencesStart = SEQUENCE_BUFFER;
627 seqStore->litStart = SEQUENCE_LITERAL_BUFFER;
628 seqStore->llCode = SEQUENCE_LLCODE;
629 seqStore->mlCode = SEQUENCE_MLCODE;
630 seqStore->ofCode = SEQUENCE_OFCODE;
631
632 ZSTD_resetSeqStore(seqStore);
633}
634
635/* Randomly generate sequence commands */
636static U32 generateSequences(U32* seed, frame_t* frame, seqStore_t* seqStore,
637 size_t contentSize, size_t literalsSize, dictInfo info)
638{
639 /* The total length of all the matches */
640 size_t const remainingMatch = contentSize - literalsSize;
641 size_t excessMatch = 0;
642 U32 numSequences = 0;
643
644 U32 i;
645
646
647 const BYTE* literals = LITERAL_BUFFER;
648 BYTE* srcPtr = frame->src;
649
650 if (literalsSize != contentSize) {
651 /* each match must be at least MIN_SEQ_LEN, so this is the maximum
652 * number of sequences we can have */
653 U32 const maxSequences = (U32)remainingMatch / MIN_SEQ_LEN;
654 numSequences = (RAND(seed) % maxSequences) + 1;
655
656 /* the extra match lengths we have to allocate to each sequence */
657 excessMatch = remainingMatch - numSequences * MIN_SEQ_LEN;
658 }
659
660 DISPLAYLEVEL(5, " total match lengths: %u\n", (U32)remainingMatch);
661 for (i = 0; i < numSequences; i++) {
662 /* Generate match and literal lengths by exponential distribution to
663 * ensure nice numbers */
664 U32 matchLen =
665 MIN_SEQ_LEN +
666 ROUND(RAND_exp(seed, excessMatch / (double)(numSequences - i)));
667 U32 literalLen =
668 (RAND(seed) & 7)
669 ? ROUND(RAND_exp(seed,
670 literalsSize /
671 (double)(numSequences - i)))
672 : 0;
673 /* actual offset, code to send, and point to copy up to when shifting
674 * codes in the repeat offsets history */
675 U32 offset, offsetCode, repIndex;
676
677 /* bounds checks */
678 matchLen = (U32) MIN(matchLen, excessMatch + MIN_SEQ_LEN);
679 literalLen = MIN(literalLen, (U32) literalsSize);
680 if (i == 0 && srcPtr == frame->srcStart && literalLen == 0) literalLen = 1;
681 if (i + 1 == numSequences) matchLen = MIN_SEQ_LEN + (U32) excessMatch;
682
683 memcpy(srcPtr, literals, literalLen);
684 srcPtr += literalLen;
685 do {
686 if (RAND(seed) & 7) {
687 /* do a normal offset */
688 U32 const dataDecompressed = (U32)((BYTE*)srcPtr-(BYTE*)frame->srcStart);
689 offset = (RAND(seed) %
690 MIN(frame->header.windowSize,
691 (size_t)((BYTE*)srcPtr - (BYTE*)frame->srcStart))) +
692 1;
693 if (info.useDict && (RAND(seed) & 1) && i + 1 != numSequences && dataDecompressed < frame->header.windowSize) {
694 /* need to occasionally generate offsets that go past the start */
695 /* including i+1 != numSequences because the last sequences has to adhere to predetermined contentSize */
696 U32 lenPastStart = (RAND(seed) % info.dictContentSize) + 1;
697 offset = (U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart)+lenPastStart;
698 if (offset > frame->header.windowSize) {
699 if (lenPastStart < MIN_SEQ_LEN) {
700 /* when offset > windowSize, matchLen bound by end of dictionary (lenPastStart) */
701 /* this also means that lenPastStart must be greater than MIN_SEQ_LEN */
702 /* make sure lenPastStart does not go past dictionary start though */
703 lenPastStart = MIN(lenPastStart+MIN_SEQ_LEN, (U32)info.dictContentSize);
704 offset = (U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart) + lenPastStart;
705 }
706 {
707 U32 const matchLenBound = MIN(frame->header.windowSize, lenPastStart);
708 matchLen = MIN(matchLen, matchLenBound);
709 }
710 }
711 }
712 offsetCode = offset + ZSTD_REP_MOVE;
713 repIndex = 2;
714 } else {
715 /* do a repeat offset */
716 offsetCode = RAND(seed) % 3;
717 if (literalLen > 0) {
718 offset = frame->stats.rep[offsetCode];
719 repIndex = offsetCode;
720 } else {
721 /* special case */
722 offset = offsetCode == 2 ? frame->stats.rep[0] - 1
723 : frame->stats.rep[offsetCode + 1];
724 repIndex = MIN(2, offsetCode + 1);
725 }
726 }
727 } while (((!info.useDict) && (offset > (size_t)((BYTE*)srcPtr - (BYTE*)frame->srcStart))) || offset == 0);
728
729 {
730 size_t j;
731 BYTE* const dictEnd = info.dictContent + info.dictContentSize;
732 for (j = 0; j < matchLen; j++) {
733 if ((U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart) < offset) {
734 /* copy from dictionary instead of literals */
735 size_t const dictOffset = offset - (srcPtr - (BYTE*)frame->srcStart);
736 *srcPtr = *(dictEnd - dictOffset);
737 }
738 else {
739 *srcPtr = *(srcPtr-offset);
740 }
741 srcPtr++;
742 }
743 }
744
745 { int r;
746 for (r = repIndex; r > 0; r--) {
747 frame->stats.rep[r] = frame->stats.rep[r - 1];
748 }
749 frame->stats.rep[0] = offset;
750 }
751
752 DISPLAYLEVEL(6, " LL: %5u OF: %5u ML: %5u", literalLen, offset, matchLen);
753 DISPLAYLEVEL(7, " srcPos: %8u seqNb: %3u",
754 (U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart), i);
755 DISPLAYLEVEL(6, "\n");
756 if (offsetCode < 3) {
757 DISPLAYLEVEL(7, " repeat offset: %d\n", repIndex);
758 }
759 /* use libzstd sequence handling */
760 ZSTD_storeSeq(seqStore, literalLen, literals, offsetCode,
761 matchLen - MINMATCH);
762
763 literalsSize -= literalLen;
764 excessMatch -= (matchLen - MIN_SEQ_LEN);
765 literals += literalLen;
766 }
767
768 memcpy(srcPtr, literals, literalsSize);
769 srcPtr += literalsSize;
770 DISPLAYLEVEL(6, " excess literals: %5u", (U32)literalsSize);
771 DISPLAYLEVEL(7, " srcPos: %8u", (U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart));
772 DISPLAYLEVEL(6, "\n");
773
774 return numSequences;
775}
776
777static void initSymbolSet(const BYTE* symbols, size_t len, BYTE* set, BYTE maxSymbolValue)
778{
779 size_t i;
780
781 memset(set, 0, (size_t)maxSymbolValue+1);
782
783 for (i = 0; i < len; i++) {
784 set[symbols[i]] = 1;
785 }
786}
787
788static int isSymbolSubset(const BYTE* symbols, size_t len, const BYTE* set, BYTE maxSymbolValue)
789{
790 size_t i;
791
792 for (i = 0; i < len; i++) {
793 if (symbols[i] > maxSymbolValue || !set[symbols[i]]) {
794 return 0;
795 }
796 }
797 return 1;
798}
799
800static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr,
801 size_t nbSeq)
802{
803 /* This code is mostly copied from ZSTD_compressSequences in zstd_compress.c */
804 U32 count[MaxSeq+1];
805 S16 norm[MaxSeq+1];
806 FSE_CTable* CTable_LitLength = frame->stats.litlengthCTable;
807 FSE_CTable* CTable_OffsetBits = frame->stats.offcodeCTable;
808 FSE_CTable* CTable_MatchLength = frame->stats.matchlengthCTable;
809 U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */
810 const seqDef* const sequences = seqStorePtr->sequencesStart;
811 const BYTE* const ofCodeTable = seqStorePtr->ofCode;
812 const BYTE* const llCodeTable = seqStorePtr->llCode;
813 const BYTE* const mlCodeTable = seqStorePtr->mlCode;
814 BYTE* const oend = (BYTE*)frame->dataEnd;
815 BYTE* op = (BYTE*)frame->data;
816 BYTE* seqHead;
817 BYTE scratchBuffer[1<<MAX(MLFSELog,LLFSELog)];
818
819 /* literals compressing block removed so that can be done separately */
820
821 /* Sequences Header */
822 if ((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead */) return ERROR(dstSize_tooSmall);
823 if (nbSeq < 0x7F) *op++ = (BYTE)nbSeq;
824 else if (nbSeq < LONGNBSEQ) op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
825 else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
826
827 /* seqHead : flags for FSE encoding type */
828 seqHead = op++;
829
830 if (nbSeq==0) {
831 frame->data = op;
832
833 return 0;
834 }
835
836 /* convert length/distances into codes */
837 ZSTD_seqToCodes(seqStorePtr);
838
839 /* CTable for Literal Lengths */
840 { U32 max = MaxLL;
841 size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, WKSP);
842 if (mostFrequent == nbSeq) {
843 /* do RLE if we have the chance */
844 *op++ = llCodeTable[0];
845 FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
846 LLtype = set_rle;
847 } else if (frame->stats.fseInit && !(RAND(seed) & 3) &&
848 isSymbolSubset(llCodeTable, nbSeq,
849 frame->stats.litlengthSymbolSet, 35)) {
850 /* maybe do repeat mode if we're allowed to */
851 LLtype = set_repeat;
852 } else if (!(RAND(seed) & 3)) {
853 /* maybe use the default distribution */
854 FSE_buildCTable_wksp(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
855 LLtype = set_basic;
856 } else {
857 /* fall back on a full table */
858 size_t nbSeq_1 = nbSeq;
859 const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max);
860 if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; }
861 FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
862 { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
863 if (FSE_isError(NCountSize)) return ERROR(GENERIC);
864 op += NCountSize; }
865 FSE_buildCTable_wksp(CTable_LitLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
866 LLtype = set_compressed;
867 } }
868
869 /* CTable for Offsets */
870 /* see Literal Lengths for descriptions of mode choices */
871 { U32 max = MaxOff;
872 size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, WKSP);
873 if (mostFrequent == nbSeq) {
874 *op++ = ofCodeTable[0];
875 FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
876 Offtype = set_rle;
877 } else if (frame->stats.fseInit && !(RAND(seed) & 3) &&
878 isSymbolSubset(ofCodeTable, nbSeq,
879 frame->stats.offsetSymbolSet, 28)) {
880 Offtype = set_repeat;
881 } else if (!(RAND(seed) & 3)) {
882 FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, DefaultMaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
883 Offtype = set_basic;
884 } else {
885 size_t nbSeq_1 = nbSeq;
886 const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max);
887 if (count[ofCodeTable[nbSeq-1]]>1) { count[ofCodeTable[nbSeq-1]]--; nbSeq_1--; }
888 FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
889 { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
890 if (FSE_isError(NCountSize)) return ERROR(GENERIC);
891 op += NCountSize; }
892 FSE_buildCTable_wksp(CTable_OffsetBits, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
893 Offtype = set_compressed;
894 } }
895
896 /* CTable for MatchLengths */
897 /* see Literal Lengths for descriptions of mode choices */
898 { U32 max = MaxML;
899 size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, WKSP);
900 if (mostFrequent == nbSeq) {
901 *op++ = *mlCodeTable;
902 FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
903 MLtype = set_rle;
904 } else if (frame->stats.fseInit && !(RAND(seed) & 3) &&
905 isSymbolSubset(mlCodeTable, nbSeq,
906 frame->stats.matchlengthSymbolSet, 52)) {
907 MLtype = set_repeat;
908 } else if (!(RAND(seed) & 3)) {
909 /* sometimes do default distribution */
910 FSE_buildCTable_wksp(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
911 MLtype = set_basic;
912 } else {
913 /* fall back on table */
914 size_t nbSeq_1 = nbSeq;
915 const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max);
916 if (count[mlCodeTable[nbSeq-1]]>1) { count[mlCodeTable[nbSeq-1]]--; nbSeq_1--; }
917 FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
918 { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
919 if (FSE_isError(NCountSize)) return ERROR(GENERIC);
920 op += NCountSize; }
921 FSE_buildCTable_wksp(CTable_MatchLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
922 MLtype = set_compressed;
923 } }
924 frame->stats.fseInit = 1;
925 initSymbolSet(llCodeTable, nbSeq, frame->stats.litlengthSymbolSet, 35);
926 initSymbolSet(ofCodeTable, nbSeq, frame->stats.offsetSymbolSet, 28);
927 initSymbolSet(mlCodeTable, nbSeq, frame->stats.matchlengthSymbolSet, 52);
928
929 DISPLAYLEVEL(5, " LL type: %d OF type: %d ML type: %d\n", LLtype, Offtype, MLtype);
930
931 *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
932
933 /* Encoding Sequences */
934 { BIT_CStream_t blockStream;
935 FSE_CState_t stateMatchLength;
936 FSE_CState_t stateOffsetBits;
937 FSE_CState_t stateLitLength;
938
939 CHECK_E(BIT_initCStream(&blockStream, op, oend-op), dstSize_tooSmall); /* not enough space remaining */
940
941 /* first symbols */
942 FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
943 FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]);
944 FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]);
945 BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
946 if (MEM_32bits()) BIT_flushBits(&blockStream);
947 BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
948 if (MEM_32bits()) BIT_flushBits(&blockStream);
949 BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
950 BIT_flushBits(&blockStream);
951
952 { size_t n;
953 for (n=nbSeq-2 ; n<nbSeq ; n--) { /* intentional underflow */
954 BYTE const llCode = llCodeTable[n];
955 BYTE const ofCode = ofCodeTable[n];
956 BYTE const mlCode = mlCodeTable[n];
957 U32 const llBits = LL_bits[llCode];
958 U32 const ofBits = ofCode; /* 32b*/ /* 64b*/
959 U32 const mlBits = ML_bits[mlCode];
960 /* (7)*/ /* (7)*/
961 FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */
962 FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */
963 if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/
964 FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */
965 if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
966 BIT_flushBits(&blockStream); /* (7)*/
967 BIT_addBits(&blockStream, sequences[n].litLength, llBits);
968 if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
969 BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
970 if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/
971 BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */
972 BIT_flushBits(&blockStream); /* (7)*/
973 } }
974
975 FSE_flushCState(&blockStream, &stateMatchLength);
976 FSE_flushCState(&blockStream, &stateOffsetBits);
977 FSE_flushCState(&blockStream, &stateLitLength);
978
979 { size_t const streamSize = BIT_closeCStream(&blockStream);
980 if (streamSize==0) return ERROR(dstSize_tooSmall); /* not enough space */
981 op += streamSize;
982 } }
983
984 frame->data = op;
985
986 return 0;
987}
988
989static size_t writeSequencesBlock(U32* seed, frame_t* frame, size_t contentSize,
990 size_t literalsSize, dictInfo info)
991{
992 seqStore_t seqStore;
993 size_t numSequences;
994
995
996 initSeqStore(&seqStore);
997
998 /* randomly generate sequences */
999 numSequences = generateSequences(seed, frame, &seqStore, contentSize, literalsSize, info);
1000 /* write them out to the frame data */
1001 CHECKERR(writeSequences(seed, frame, &seqStore, numSequences));
1002
1003 return numSequences;
1004}
1005
1006static size_t writeCompressedBlock(U32* seed, frame_t* frame, size_t contentSize, dictInfo info)
1007{
1008 BYTE* const blockStart = (BYTE*)frame->data;
1009 size_t literalsSize;
1010 size_t nbSeq;
1011
1012 DISPLAYLEVEL(4, " compressed block:\n");
1013
1014 literalsSize = writeLiteralsBlock(seed, frame, contentSize);
1015
1016 DISPLAYLEVEL(4, " literals size: %u\n", (U32)literalsSize);
1017
1018 nbSeq = writeSequencesBlock(seed, frame, contentSize, literalsSize, info);
1019
1020 DISPLAYLEVEL(4, " number of sequences: %u\n", (U32)nbSeq);
1021
1022 return (BYTE*)frame->data - blockStart;
1023}
1024
1025static void writeBlock(U32* seed, frame_t* frame, size_t contentSize,
1026 int lastBlock, dictInfo info)
1027{
1028 int const blockTypeDesc = RAND(seed) % 8;
1029 size_t blockSize;
1030 int blockType;
1031
1032 BYTE *const header = (BYTE*)frame->data;
1033 BYTE *op = header + 3;
1034
1035 DISPLAYLEVEL(4, " block:\n");
1036 DISPLAYLEVEL(4, " block content size: %u\n", (U32)contentSize);
1037 DISPLAYLEVEL(4, " last block: %s\n", lastBlock ? "yes" : "no");
1038
1039 if (blockTypeDesc == 0) {
1040 /* Raw data frame */
1041
1042 RAND_buffer(seed, frame->src, contentSize);
1043 memcpy(op, frame->src, contentSize);
1044
1045 op += contentSize;
1046 blockType = 0;
1047 blockSize = contentSize;
1048 } else if (blockTypeDesc == 1) {
1049 /* RLE */
1050 BYTE const symbol = RAND(seed) & 0xff;
1051
1052 op[0] = symbol;
1053 memset(frame->src, symbol, contentSize);
1054
1055 op++;
1056 blockType = 1;
1057 blockSize = contentSize;
1058 } else {
1059 /* compressed, most common */
1060 size_t compressedSize;
1061 blockType = 2;
1062
1063 frame->oldStats = frame->stats;
1064
1065 frame->data = op;
1066 compressedSize = writeCompressedBlock(seed, frame, contentSize, info);
1067 if (compressedSize >= contentSize) { /* compressed block must be strictly smaller than uncompressed one */
1068 blockType = 0;
1069 memcpy(op, frame->src, contentSize);
1070
1071 op += contentSize;
1072 blockSize = contentSize; /* fall back on raw block if data doesn't
1073 compress */
1074
1075 frame->stats = frame->oldStats; /* don't update the stats */
1076 } else {
1077 op += compressedSize;
1078 blockSize = compressedSize;
1079 }
1080 }
1081 frame->src = (BYTE*)frame->src + contentSize;
1082
1083 DISPLAYLEVEL(4, " block type: %s\n", BLOCK_TYPES[blockType]);
1084 DISPLAYLEVEL(4, " block size field: %u\n", (U32)blockSize);
1085
1086 header[0] = (BYTE) ((lastBlock | (blockType << 1) | (blockSize << 3)) & 0xff);
1087 MEM_writeLE16(header + 1, (U16) (blockSize >> 5));
1088
1089 frame->data = op;
1090}
1091
1092static void writeBlocks(U32* seed, frame_t* frame, dictInfo info)
1093{
1094 size_t contentLeft = frame->header.contentSize;
1095 size_t const maxBlockSize = MIN(g_maxBlockSize, frame->header.windowSize);
1096 while (1) {
1097 /* 1 in 4 chance of ending frame */
1098 int const lastBlock = contentLeft > maxBlockSize ? 0 : !(RAND(seed) & 3);
1099 size_t blockContentSize;
1100 if (lastBlock) {
1101 blockContentSize = contentLeft;
1102 } else {
1103 if (contentLeft > 0 && (RAND(seed) & 7)) {
1104 /* some variable size block */
1105 blockContentSize = RAND(seed) % (MIN(maxBlockSize, contentLeft)+1);
1106 } else if (contentLeft > maxBlockSize && (RAND(seed) & 1)) {
1107 /* some full size block */
1108 blockContentSize = maxBlockSize;
1109 } else {
1110 /* some empty block */
1111 blockContentSize = 0;
1112 }
1113 }
1114
1115 writeBlock(seed, frame, blockContentSize, lastBlock, info);
1116
1117 contentLeft -= blockContentSize;
1118 if (lastBlock) break;
1119 }
1120}
1121
1122static void writeChecksum(frame_t* frame)
1123{
1124 /* write checksum so implementations can verify their output */
1125 U64 digest = XXH64(frame->srcStart, (BYTE*)frame->src-(BYTE*)frame->srcStart, 0);
1126 DISPLAYLEVEL(3, " checksum: %08x\n", (U32)digest);
1127 MEM_writeLE32(frame->data, (U32)digest);
1128 frame->data = (BYTE*)frame->data + 4;
1129}
1130
1131static void outputBuffer(const void* buf, size_t size, const char* const path)
1132{
1133 /* write data out to file */
1134 const BYTE* ip = (const BYTE*)buf;
1135 FILE* out;
1136 if (path) {
1137 out = fopen(path, "wb");
1138 } else {
1139 out = stdout;
1140 }
1141 if (!out) {
1142 fprintf(stderr, "Failed to open file at %s: ", path);
1143 perror(NULL);
1144 exit(1);
1145 }
1146
1147 { size_t fsize = size;
1148 size_t written = 0;
1149 while (written < fsize) {
1150 written += fwrite(ip + written, 1, fsize - written, out);
1151 if (ferror(out)) {
1152 fprintf(stderr, "Failed to write to file at %s: ", path);
1153 perror(NULL);
1154 exit(1);
1155 }
1156 }
1157 }
1158
1159 if (path) {
1160 fclose(out);
1161 }
1162}
1163
1164static void initFrame(frame_t* fr)
1165{
1166 memset(fr, 0, sizeof(*fr));
1167 fr->data = fr->dataStart = FRAME_BUFFER;
1168 fr->dataEnd = FRAME_BUFFER + sizeof(FRAME_BUFFER);
1169 fr->src = fr->srcStart = CONTENT_BUFFER;
1170 fr->srcEnd = CONTENT_BUFFER + sizeof(CONTENT_BUFFER);
1171
1172 /* init repeat codes */
1173 fr->stats.rep[0] = 1;
1174 fr->stats.rep[1] = 4;
1175 fr->stats.rep[2] = 8;
1176}
1177
1178/**
1179 * Generated a single zstd compressed block with no block/frame header.
1180 * Returns the final seed.
1181 */
1182static U32 generateCompressedBlock(U32 seed, frame_t* frame, dictInfo info)
1183{
1184 size_t blockContentSize;
1185 int blockWritten = 0;
1186 BYTE* op;
1187 DISPLAYLEVEL(4, "block seed: %u\n", seed);
1188 initFrame(frame);
1189 op = (BYTE*)frame->data;
1190
1191 while (!blockWritten) {
1192 size_t cSize;
1193 /* generate window size */
1194 { int const exponent = RAND(&seed) % (MAX_WINDOW_LOG - 10);
1195 int const mantissa = RAND(&seed) % 8;
1196 frame->header.windowSize = (1U << (exponent + 10));
1197 frame->header.windowSize += (frame->header.windowSize / 8) * mantissa;
1198 }
1199
1200 /* generate content size */
1201 { size_t const maxBlockSize = MIN(g_maxBlockSize, frame->header.windowSize);
1202 if (RAND(&seed) & 15) {
1203 /* some full size blocks */
1204 blockContentSize = maxBlockSize;
1205 } else if (RAND(&seed) & 7 && g_maxBlockSize >= (1U << 7)) {
1206 /* some small blocks <= 128 bytes*/
1207 blockContentSize = RAND(&seed) % (1U << 7);
1208 } else {
1209 /* some variable size blocks */
1210 blockContentSize = RAND(&seed) % maxBlockSize;
1211 }
1212 }
1213
1214 /* try generating a compressed block */
1215 frame->oldStats = frame->stats;
1216 frame->data = op;
1217 cSize = writeCompressedBlock(&seed, frame, blockContentSize, info);
1218 if (cSize >= blockContentSize) { /* compressed size must be strictly smaller than decompressed size : https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#blocks */
1219 /* data doesn't compress -- try again */
1220 frame->stats = frame->oldStats; /* don't update the stats */
1221 DISPLAYLEVEL(5, " can't compress block : try again \n");
1222 } else {
1223 blockWritten = 1;
1224 DISPLAYLEVEL(4, " block size: %u \n", (U32)cSize);
1225 frame->src = (BYTE*)frame->src + blockContentSize;
1226 }
1227 }
1228 return seed;
1229}
1230
1231/* Return the final seed */
1232static U32 generateFrame(U32 seed, frame_t* fr, dictInfo info)
1233{
1234 /* generate a complete frame */
1235 DISPLAYLEVEL(3, "frame seed: %u\n", seed);
1236 initFrame(fr);
1237
1238 writeFrameHeader(&seed, fr, info);
1239 writeBlocks(&seed, fr, info);
1240 writeChecksum(fr);
1241
1242 return seed;
1243}
1244
1245/*_*******************************************************
1246* Dictionary Helper Functions
1247*********************************************************/
1248/* returns 0 if successful, otherwise returns 1 upon error */
1249static int genRandomDict(U32 dictID, U32 seed, size_t dictSize, BYTE* fullDict)
1250{
1251 /* allocate space for samples */
1252 int ret = 0;
1253 unsigned const numSamples = 4;
1254 size_t sampleSizes[4];
1255 BYTE* const samples = malloc(5000*sizeof(BYTE));
1256 if (samples == NULL) {
1257 DISPLAY("Error: could not allocate space for samples\n");
1258 return 1;
1259 }
1260
1261 /* generate samples */
1262 { unsigned literalValue = 1;
1263 unsigned samplesPos = 0;
1264 size_t currSize = 1;
1265 while (literalValue <= 4) {
1266 sampleSizes[literalValue - 1] = currSize;
1267 { size_t k;
1268 for (k = 0; k < currSize; k++) {
1269 *(samples + (samplesPos++)) = (BYTE)literalValue;
1270 } }
1271 literalValue++;
1272 currSize *= 16;
1273 } }
1274
1275 { size_t dictWriteSize = 0;
1276 ZDICT_params_t zdictParams;
1277 size_t const headerSize = MAX(dictSize/4, 256);
1278 size_t const dictContentSize = dictSize - headerSize;
1279 BYTE* const dictContent = fullDict + headerSize;
1280 if (dictContentSize < ZDICT_CONTENTSIZE_MIN || dictSize < ZDICT_DICTSIZE_MIN) {
1281 DISPLAY("Error: dictionary size is too small\n");
1282 ret = 1;
1283 goto exitGenRandomDict;
1284 }
1285
1286 /* init dictionary params */
1287 memset(&zdictParams, 0, sizeof(zdictParams));
1288 zdictParams.dictID = dictID;
1289 zdictParams.notificationLevel = 1;
1290
1291 /* fill in dictionary content */
1292 RAND_buffer(&seed, (void*)dictContent, dictContentSize);
1293
1294 /* finalize dictionary with random samples */
1295 dictWriteSize = ZDICT_finalizeDictionary(fullDict, dictSize,
1296 dictContent, dictContentSize,
1297 samples, sampleSizes, numSamples,
1298 zdictParams);
1299
1300 if (ZDICT_isError(dictWriteSize)) {
1301 DISPLAY("Could not finalize dictionary: %s\n", ZDICT_getErrorName(dictWriteSize));
1302 ret = 1;
1303 }
1304 }
1305
1306exitGenRandomDict:
1307 free(samples);
1308 return ret;
1309}
1310
1311static dictInfo initDictInfo(int useDict, size_t dictContentSize, BYTE* dictContent, U32 dictID){
1312 /* allocate space statically */
1313 dictInfo dictOp;
1314 memset(&dictOp, 0, sizeof(dictOp));
1315 dictOp.useDict = useDict;
1316 dictOp.dictContentSize = dictContentSize;
1317 dictOp.dictContent = dictContent;
1318 dictOp.dictID = dictID;
1319 return dictOp;
1320}
1321
1322/*-*******************************************************
1323* Test Mode
1324*********************************************************/
1325
1326BYTE DECOMPRESSED_BUFFER[MAX_DECOMPRESSED_SIZE];
1327
1328static size_t testDecodeSimple(frame_t* fr)
1329{
1330 /* test decoding the generated data with the simple API */
1331 size_t const ret = ZSTD_decompress(DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE,
1332 fr->dataStart, (BYTE*)fr->data - (BYTE*)fr->dataStart);
1333
1334 if (ZSTD_isError(ret)) return ret;
1335
1336 if (memcmp(DECOMPRESSED_BUFFER, fr->srcStart,
1337 (BYTE*)fr->src - (BYTE*)fr->srcStart) != 0) {
1338 return ERROR(corruption_detected);
1339 }
1340
1341 return ret;
1342}
1343
1344static size_t testDecodeStreaming(frame_t* fr)
1345{
1346 /* test decoding the generated data with the streaming API */
1347 ZSTD_DStream* zd = ZSTD_createDStream();
1348 ZSTD_inBuffer in;
1349 ZSTD_outBuffer out;
1350 size_t ret;
1351
1352 if (!zd) return ERROR(memory_allocation);
1353
1354 in.src = fr->dataStart;
1355 in.pos = 0;
1356 in.size = (BYTE*)fr->data - (BYTE*)fr->dataStart;
1357
1358 out.dst = DECOMPRESSED_BUFFER;
1359 out.pos = 0;
1360 out.size = ZSTD_DStreamOutSize();
1361
1362 ZSTD_initDStream(zd);
1363 while (1) {
1364 ret = ZSTD_decompressStream(zd, &out, &in);
1365 if (ZSTD_isError(ret)) goto cleanup; /* error */
1366 if (ret == 0) break; /* frame is done */
1367
1368 /* force decoding to be done in chunks */
1369 out.size += MIN(ZSTD_DStreamOutSize(), MAX_DECOMPRESSED_SIZE - out.size);
1370 }
1371
1372 ret = out.pos;
1373
1374 if (memcmp(out.dst, fr->srcStart, out.pos) != 0) {
1375 return ERROR(corruption_detected);
1376 }
1377
1378cleanup:
1379 ZSTD_freeDStream(zd);
1380 return ret;
1381}
1382
1383static size_t testDecodeWithDict(U32 seed, genType_e genType)
1384{
1385 /* create variables */
1386 size_t const dictSize = RAND(&seed) % (10 << 20) + ZDICT_DICTSIZE_MIN + ZDICT_CONTENTSIZE_MIN;
1387 U32 const dictID = RAND(&seed);
1388 size_t errorDetected = 0;
1389 BYTE* const fullDict = malloc(dictSize);
1390 if (fullDict == NULL) {
1391 return ERROR(GENERIC);
1392 }
1393
1394 /* generate random dictionary */
1395 if (genRandomDict(dictID, seed, dictSize, fullDict)) { /* return 0 on success */
1396 errorDetected = ERROR(GENERIC);
1397 goto dictTestCleanup;
1398 }
1399
1400
1401 { frame_t fr;
1402 dictInfo info;
1403 ZSTD_DCtx* const dctx = ZSTD_createDCtx();
1404 size_t ret;
1405
1406 /* get dict info */
1407 { size_t const headerSize = MAX(dictSize/4, 256);
1408 size_t const dictContentSize = dictSize-headerSize;
1409 BYTE* const dictContent = fullDict+headerSize;
1410 info = initDictInfo(1, dictContentSize, dictContent, dictID);
1411 }
1412
1413 /* manually decompress and check difference */
1414 if (genType == gt_frame) {
1415 /* Test frame */
1416 generateFrame(seed, &fr, info);
1417 ret = ZSTD_decompress_usingDict(dctx, DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE,
1418 fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart,
1419 fullDict, dictSize);
1420 } else {
1421 /* Test block */
1422 generateCompressedBlock(seed, &fr, info);
1423 ret = ZSTD_decompressBegin_usingDict(dctx, fullDict, dictSize);
1424 if (ZSTD_isError(ret)) {
1425 errorDetected = ret;
1426 ZSTD_freeDCtx(dctx);
1427 goto dictTestCleanup;
1428 }
1429 ret = ZSTD_decompressBlock(dctx, DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE,
1430 fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart);
1431 }
1432 ZSTD_freeDCtx(dctx);
1433
1434 if (ZSTD_isError(ret)) {
1435 errorDetected = ret;
1436 goto dictTestCleanup;
1437 }
1438
1439 if (memcmp(DECOMPRESSED_BUFFER, fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart) != 0) {
1440 errorDetected = ERROR(corruption_detected);
1441 goto dictTestCleanup;
1442 }
1443 }
1444
1445dictTestCleanup:
1446 free(fullDict);
1447 return errorDetected;
1448}
1449
1450static size_t testDecodeRawBlock(frame_t* fr)
1451{
1452 ZSTD_DCtx* dctx = ZSTD_createDCtx();
1453 size_t ret = ZSTD_decompressBegin(dctx);
1454 if (ZSTD_isError(ret)) return ret;
1455
1456 ret = ZSTD_decompressBlock(
1457 dctx,
1458 DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE,
1459 fr->dataStart, (BYTE*)fr->data - (BYTE*)fr->dataStart);
1460 ZSTD_freeDCtx(dctx);
1461 if (ZSTD_isError(ret)) return ret;
1462
1463 if (memcmp(DECOMPRESSED_BUFFER, fr->srcStart,
1464 (BYTE*)fr->src - (BYTE*)fr->srcStart) != 0) {
1465 return ERROR(corruption_detected);
1466 }
1467
1468 return ret;
1469}
1470
1471static int runBlockTest(U32* seed)
1472{
1473 frame_t fr;
1474 U32 const seedCopy = *seed;
1475 { dictInfo const info = initDictInfo(0, 0, NULL, 0);
1476 *seed = generateCompressedBlock(*seed, &fr, info);
1477 }
1478
1479 { size_t const r = testDecodeRawBlock(&fr);
1480 if (ZSTD_isError(r)) {
1481 DISPLAY("Error in block mode on test seed %u: %s\n", seedCopy,
1482 ZSTD_getErrorName(r));
1483 return 1;
1484 }
1485 }
1486
1487 { size_t const r = testDecodeWithDict(*seed, gt_block);
1488 if (ZSTD_isError(r)) {
1489 DISPLAY("Error in block mode with dictionary on test seed %u: %s\n",
1490 seedCopy, ZSTD_getErrorName(r));
1491 return 1;
1492 }
1493 }
1494 return 0;
1495}
1496
1497static int runFrameTest(U32* seed)
1498{
1499 frame_t fr;
1500 U32 const seedCopy = *seed;
1501 { dictInfo const info = initDictInfo(0, 0, NULL, 0);
1502 *seed = generateFrame(*seed, &fr, info);
1503 }
1504
1505 { size_t const r = testDecodeSimple(&fr);
1506 if (ZSTD_isError(r)) {
1507 DISPLAY("Error in simple mode on test seed %u: %s\n",
1508 seedCopy, ZSTD_getErrorName(r));
1509 return 1;
1510 }
1511 }
1512 { size_t const r = testDecodeStreaming(&fr);
1513 if (ZSTD_isError(r)) {
1514 DISPLAY("Error in streaming mode on test seed %u: %s\n",
1515 seedCopy, ZSTD_getErrorName(r));
1516 return 1;
1517 }
1518 }
1519 { size_t const r = testDecodeWithDict(*seed, gt_frame); /* avoid big dictionaries */
1520 if (ZSTD_isError(r)) {
1521 DISPLAY("Error in dictionary mode on test seed %u: %s\n",
1522 seedCopy, ZSTD_getErrorName(r));
1523 return 1;
1524 }
1525 }
1526 return 0;
1527}
1528
1529static int runTestMode(U32 seed, unsigned numFiles, unsigned const testDurationS,
1530 genType_e genType)
1531{
1532 unsigned fnum;
1533
1534 clock_t const startClock = clock();
1535 clock_t const maxClockSpan = testDurationS * CLOCKS_PER_SEC;
1536
1537 if (numFiles == 0 && !testDurationS) numFiles = 1;
1538
1539 DISPLAY("seed: %u\n", seed);
1540
1541 for (fnum = 0; fnum < numFiles || clockSpan(startClock) < maxClockSpan; fnum++) {
1542 if (fnum < numFiles)
1543 DISPLAYUPDATE("\r%u/%u ", fnum, numFiles);
1544 else
1545 DISPLAYUPDATE("\r%u ", fnum);
1546
1547 { int const ret = (genType == gt_frame) ?
1548 runFrameTest(&seed) :
1549 runBlockTest(&seed);
1550 if (ret) return ret;
1551 }
1552 }
1553
1554 DISPLAY("\r%u tests completed: ", fnum);
1555 DISPLAY("OK\n");
1556
1557 return 0;
1558}
1559
1560/*-*******************************************************
1561* File I/O
1562*********************************************************/
1563
1564static int generateFile(U32 seed, const char* const path,
1565 const char* const origPath, genType_e genType)
1566{
1567 frame_t fr;
1568
1569 DISPLAY("seed: %u\n", seed);
1570
1571 { dictInfo const info = initDictInfo(0, 0, NULL, 0);
1572 if (genType == gt_frame) {
1573 generateFrame(seed, &fr, info);
1574 } else {
1575 generateCompressedBlock(seed, &fr, info);
1576 }
1577 }
1578 outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, path);
1579 if (origPath) {
1580 outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, origPath);
1581 }
1582 return 0;
1583}
1584
1585static int generateCorpus(U32 seed, unsigned numFiles, const char* const path,
1586 const char* const origPath, genType_e genType)
1587{
1588 char outPath[MAX_PATH];
1589 unsigned fnum;
1590
1591 DISPLAY("seed: %u\n", seed);
1592
1593 for (fnum = 0; fnum < numFiles; fnum++) {
1594 frame_t fr;
1595
1596 DISPLAYUPDATE("\r%u/%u ", fnum, numFiles);
1597
1598 { dictInfo const info = initDictInfo(0, 0, NULL, 0);
1599 if (genType == gt_frame) {
1600 seed = generateFrame(seed, &fr, info);
1601 } else {
1602 seed = generateCompressedBlock(seed, &fr, info);
1603 }
1604 }
1605
1606 if (snprintf(outPath, MAX_PATH, "%s/z%06u.zst", path, fnum) + 1 > MAX_PATH) {
1607 DISPLAY("Error: path too long\n");
1608 return 1;
1609 }
1610 outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, outPath);
1611
1612 if (origPath) {
1613 if (snprintf(outPath, MAX_PATH, "%s/z%06u", origPath, fnum) + 1 > MAX_PATH) {
1614 DISPLAY("Error: path too long\n");
1615 return 1;
1616 }
1617 outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, outPath);
1618 }
1619 }
1620
1621 DISPLAY("\r%u/%u \n", fnum, numFiles);
1622
1623 return 0;
1624}
1625
1626static int generateCorpusWithDict(U32 seed, unsigned numFiles, const char* const path,
1627 const char* const origPath, const size_t dictSize,
1628 genType_e genType)
1629{
1630 char outPath[MAX_PATH];
1631 BYTE* fullDict;
1632 U32 const dictID = RAND(&seed);
1633 int errorDetected = 0;
1634
1635 if (snprintf(outPath, MAX_PATH, "%s/dictionary", path) + 1 > MAX_PATH) {
1636 DISPLAY("Error: path too long\n");
1637 return 1;
1638 }
1639
1640 /* allocate space for the dictionary */
1641 fullDict = malloc(dictSize);
1642 if (fullDict == NULL) {
1643 DISPLAY("Error: could not allocate space for full dictionary.\n");
1644 return 1;
1645 }
1646
1647 /* randomly generate the dictionary */
1648 { int const ret = genRandomDict(dictID, seed, dictSize, fullDict);
1649 if (ret != 0) {
1650 errorDetected = ret;
1651 goto dictCleanup;
1652 }
1653 }
1654
1655 /* write out dictionary */
1656 if (numFiles != 0) {
1657 if (snprintf(outPath, MAX_PATH, "%s/dictionary", path) + 1 > MAX_PATH) {
1658 DISPLAY("Error: dictionary path too long\n");
1659 errorDetected = 1;
1660 goto dictCleanup;
1661 }
1662 outputBuffer(fullDict, dictSize, outPath);
1663 }
1664 else {
1665 outputBuffer(fullDict, dictSize, "dictionary");
1666 }
1667
1668 /* generate random compressed/decompressed files */
1669 { unsigned fnum;
1670 for (fnum = 0; fnum < MAX(numFiles, 1); fnum++) {
1671 frame_t fr;
1672 DISPLAYUPDATE("\r%u/%u ", fnum, numFiles);
1673 {
1674 size_t const headerSize = MAX(dictSize/4, 256);
1675 size_t const dictContentSize = dictSize-headerSize;
1676 BYTE* const dictContent = fullDict+headerSize;
1677 dictInfo const info = initDictInfo(1, dictContentSize, dictContent, dictID);
1678 if (genType == gt_frame) {
1679 seed = generateFrame(seed, &fr, info);
1680 } else {
1681 seed = generateCompressedBlock(seed, &fr, info);
1682 }
1683 }
1684
1685 if (numFiles != 0) {
1686 if (snprintf(outPath, MAX_PATH, "%s/z%06u.zst", path, fnum) + 1 > MAX_PATH) {
1687 DISPLAY("Error: path too long\n");
1688 errorDetected = 1;
1689 goto dictCleanup;
1690 }
1691 outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, outPath);
1692
1693 if (origPath) {
1694 if (snprintf(outPath, MAX_PATH, "%s/z%06u", origPath, fnum) + 1 > MAX_PATH) {
1695 DISPLAY("Error: path too long\n");
1696 errorDetected = 1;
1697 goto dictCleanup;
1698 }
1699 outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, outPath);
1700 }
1701 }
1702 else {
1703 outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, path);
1704 if (origPath) {
1705 outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, origPath);
1706 }
1707 }
1708 }
1709 }
1710
1711dictCleanup:
1712 free(fullDict);
1713 return errorDetected;
1714}
1715
1716
1717/*_*******************************************************
1718* Command line
1719*********************************************************/
1720static U32 makeSeed(void)
1721{
1722 U32 t = (U32) time(NULL);
1723 return XXH32(&t, sizeof(t), 0) % 65536;
1724}
1725
1726static unsigned readInt(const char** argument)
1727{
1728 unsigned val = 0;
1729 while ((**argument>='0') && (**argument<='9')) {
1730 val *= 10;
1731 val += **argument - '0';
1732 (*argument)++;
1733 }
1734 return val;
1735}
1736
1737static void usage(const char* programName)
1738{
1739 DISPLAY( "Usage :\n");
1740 DISPLAY( " %s [args]\n", programName);
1741 DISPLAY( "\n");
1742 DISPLAY( "Arguments :\n");
1743 DISPLAY( " -p<path> : select output path (default:stdout)\n");
1744 DISPLAY( " in multiple files mode this should be a directory\n");
1745 DISPLAY( " -o<path> : select path to output original file (default:no output)\n");
1746 DISPLAY( " in multiple files mode this should be a directory\n");
1747 DISPLAY( " -s# : select seed (default:random based on time)\n");
1748 DISPLAY( " -n# : number of files to generate (default:1)\n");
1749 DISPLAY( " -t : activate test mode (test files against libzstd instead of outputting them)\n");
1750 DISPLAY( " -T# : length of time to run tests for\n");
1751 DISPLAY( " -v : increase verbosity level (default:0, max:7)\n");
1752 DISPLAY( " -h/H : display help/long help and exit\n");
1753}
1754
1755static void advancedUsage(const char* programName)
1756{
1757 usage(programName);
1758 DISPLAY( "\n");
1759 DISPLAY( "Advanced arguments :\n");
1760 DISPLAY( " --content-size : always include the content size in the frame header\n");
1761 DISPLAY( " --use-dict=# : include a dictionary used to decompress the corpus\n");
1762 DISPLAY( " --gen-blocks : generate raw compressed blocks without block/frame headers\n");
1763 DISPLAY( " --max-block-size-log=# : max block size log, must be in range [2, 17]\n");
1764 DISPLAY( " --max-content-size-log=# : max content size log, must be <= 20\n");
1765 DISPLAY( " (this is ignored with gen-blocks)\n");
1766}
1767
1768/*! readU32FromChar() :
1769 @return : unsigned integer value read from input in `char` format
1770 allows and interprets K, KB, KiB, M, MB and MiB suffix.
1771 Will also modify `*stringPtr`, advancing it to position where it stopped reading.
1772 Note : function result can overflow if digit string > MAX_UINT */
1773static unsigned readU32FromChar(const char** stringPtr)
1774{
1775 unsigned result = 0;
1776 while ((**stringPtr >='0') && (**stringPtr <='9'))
1777 result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
1778 if ((**stringPtr=='K') || (**stringPtr=='M')) {
1779 result <<= 10;
1780 if (**stringPtr=='M') result <<= 10;
1781 (*stringPtr)++ ;
1782 if (**stringPtr=='i') (*stringPtr)++;
1783 if (**stringPtr=='B') (*stringPtr)++;
1784 }
1785 return result;
1786}
1787
1788/** longCommandWArg() :
1789 * check if *stringPtr is the same as longCommand.
1790 * If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
1791 * @return 0 and doesn't modify *stringPtr otherwise.
1792 */
1793static unsigned longCommandWArg(const char** stringPtr, const char* longCommand)
1794{
1795 size_t const comSize = strlen(longCommand);
1796 int const result = !strncmp(*stringPtr, longCommand, comSize);
1797 if (result) *stringPtr += comSize;
1798 return result;
1799}
1800
1801int main(int argc, char** argv)
1802{
1803 U32 seed = 0;
1804 int seedset = 0;
1805 unsigned numFiles = 0;
1806 unsigned testDuration = 0;
1807 int testMode = 0;
1808 const char* path = NULL;
1809 const char* origPath = NULL;
1810 int useDict = 0;
1811 unsigned dictSize = (10 << 10); /* 10 kB default */
1812 genType_e genType = gt_frame;
1813
1814 int argNb;
1815
1816 /* Check command line */
1817 for (argNb=1; argNb<argc; argNb++) {
1818 const char* argument = argv[argNb];
1819 if(!argument) continue; /* Protection if argument empty */
1820
1821 /* Handle commands. Aggregated commands are allowed */
1822 if (argument[0]=='-') {
1823 argument++;
1824 while (*argument!=0) {
1825 switch(*argument)
1826 {
1827 case 'h':
1828 usage(argv[0]);
1829 return 0;
1830 case 'H':
1831 advancedUsage(argv[0]);
1832 return 0;
1833 case 'v':
1834 argument++;
1835 g_displayLevel++;
1836 break;
1837 case 's':
1838 argument++;
1839 seedset=1;
1840 seed = readInt(&argument);
1841 break;
1842 case 'n':
1843 argument++;
1844 numFiles = readInt(&argument);
1845 break;
1846 case 'T':
1847 argument++;
1848 testDuration = readInt(&argument);
1849 if (*argument == 'm') {
1850 testDuration *= 60;
1851 argument++;
1852 if (*argument == 'n') argument++;
1853 }
1854 break;
1855 case 'o':
1856 argument++;
1857 origPath = argument;
1858 argument += strlen(argument);
1859 break;
1860 case 'p':
1861 argument++;
1862 path = argument;
1863 argument += strlen(argument);
1864 break;
1865 case 't':
1866 argument++;
1867 testMode = 1;
1868 break;
1869 case '-':
1870 argument++;
1871 if (strcmp(argument, "content-size") == 0) {
1872 opts.contentSize = 1;
1873 } else if (longCommandWArg(&argument, "use-dict=")) {
1874 dictSize = readU32FromChar(&argument);
1875 useDict = 1;
1876 } else if (strcmp(argument, "gen-blocks") == 0) {
1877 genType = gt_block;
1878 } else if (longCommandWArg(&argument, "max-block-size-log=")) {
1879 U32 value = readU32FromChar(&argument);
1880 if (value >= 2 && value <= ZSTD_BLOCKSIZE_MAX) {
1881 g_maxBlockSize = 1U << value;
1882 }
1883 } else if (longCommandWArg(&argument, "max-content-size-log=")) {
1884 U32 value = readU32FromChar(&argument);
1885 g_maxDecompressedSizeLog =
1886 MIN(MAX_DECOMPRESSED_SIZE_LOG, value);
1887 } else {
1888 advancedUsage(argv[0]);
1889 return 1;
1890 }
1891 argument += strlen(argument);
1892 break;
1893 default:
1894 usage(argv[0]);
1895 return 1;
1896 } } } } /* for (argNb=1; argNb<argc; argNb++) */
1897
1898 if (!seedset) {
1899 seed = makeSeed();
1900 }
1901
1902 if (testMode) {
1903 return runTestMode(seed, numFiles, testDuration, genType);
1904 } else {
1905 if (testDuration) {
1906 DISPLAY("Error: -T requires test mode (-t)\n\n");
1907 usage(argv[0]);
1908 return 1;
1909 }
1910 }
1911
1912 if (!path) {
1913 DISPLAY("Error: path is required in file generation mode\n");
1914 usage(argv[0]);
1915 return 1;
1916 }
1917
1918 if (numFiles == 0 && useDict == 0) {
1919 return generateFile(seed, path, origPath, genType);
1920 } else if (useDict == 0){
1921 return generateCorpus(seed, numFiles, path, origPath, genType);
1922 } else {
1923 /* should generate files with a dictionary */
1924 return generateCorpusWithDict(seed, numFiles, path, origPath, dictSize, genType);
1925 }
1926
1927}