]>
Commit | Line | Data |
---|---|---|
11fdf7f2 | 1 | /* |
7c673cae FG |
2 | * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. |
3 | * All rights reserved. | |
4 | * | |
11fdf7f2 TL |
5 | * This source code is licensed under both the BSD-style license (found in the |
6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found | |
7 | * in the COPYING file in the root directory of this source tree). | |
8 | * You may select, at your option, one of the above-listed licenses. | |
7c673cae FG |
9 | */ |
10 | ||
11 | ||
12 | ||
7c673cae FG |
13 | /*-************************************ |
14 | * Dependencies | |
15 | **************************************/ | |
11fdf7f2 | 16 | #include "platform.h" /* SET_BINARY_MODE */ |
7c673cae FG |
17 | #include <stdlib.h> /* malloc, free */ |
18 | #include <stdio.h> /* FILE, fwrite, fprintf */ | |
19 | #include <string.h> /* memcpy */ | |
20 | #include "mem.h" /* U32 */ | |
21 | ||
22 | ||
7c673cae FG |
23 | /*-************************************ |
24 | * Macros | |
25 | **************************************/ | |
26 | #define KB *(1 <<10) | |
27 | #define MIN(a,b) ( (a) < (b) ? (a) : (b) ) | |
28 | ||
29 | #define RDG_DEBUG 0 | |
30 | #define TRACE(...) if (RDG_DEBUG) fprintf(stderr, __VA_ARGS__ ) | |
31 | ||
32 | ||
33 | /*-************************************ | |
34 | * Local constants | |
35 | **************************************/ | |
36 | #define LTLOG 13 | |
37 | #define LTSIZE (1<<LTLOG) | |
38 | #define LTMASK (LTSIZE-1) | |
39 | ||
40 | ||
41 | /*-******************************************************* | |
42 | * Local Functions | |
43 | *********************************************************/ | |
44 | #define RDG_rotl32(x,r) ((x << r) | (x >> (32 - r))) | |
45 | static U32 RDG_rand(U32* src) | |
46 | { | |
47 | static const U32 prime1 = 2654435761U; | |
48 | static const U32 prime2 = 2246822519U; | |
49 | U32 rand32 = *src; | |
50 | rand32 *= prime1; | |
51 | rand32 ^= prime2; | |
52 | rand32 = RDG_rotl32(rand32, 13); | |
53 | *src = rand32; | |
54 | return rand32 >> 5; | |
55 | } | |
56 | ||
57 | ||
58 | static void RDG_fillLiteralDistrib(BYTE* ldt, double ld) | |
59 | { | |
60 | BYTE const firstChar = (ld<=0.0) ? 0 : '('; | |
61 | BYTE const lastChar = (ld<=0.0) ? 255 : '}'; | |
62 | BYTE character = (ld<=0.0) ? 0 : '0'; | |
63 | U32 u; | |
64 | ||
65 | if (ld<=0.0) ld = 0.0; | |
66 | for (u=0; u<LTSIZE; ) { | |
67 | U32 const weight = (U32)((double)(LTSIZE - u) * ld) + 1; | |
68 | U32 const end = MIN ( u + weight , LTSIZE); | |
69 | while (u < end) ldt[u++] = character; | |
70 | character++; | |
71 | if (character > lastChar) character = firstChar; | |
72 | } | |
73 | } | |
74 | ||
75 | ||
76 | static BYTE RDG_genChar(U32* seed, const BYTE* ldt) | |
77 | { | |
78 | U32 const id = RDG_rand(seed) & LTMASK; | |
79 | return ldt[id]; /* memory-sanitizer fails here, stating "uninitialized value" when table initialized with P==0.0. Checked : table is fully initialized */ | |
80 | } | |
81 | ||
82 | ||
83 | static U32 RDG_rand15Bits (unsigned* seedPtr) | |
84 | { | |
85 | return RDG_rand(seedPtr) & 0x7FFF; | |
86 | } | |
87 | ||
88 | static U32 RDG_randLength(unsigned* seedPtr) | |
89 | { | |
90 | if (RDG_rand(seedPtr) & 7) return (RDG_rand(seedPtr) & 0xF); /* small length */ | |
91 | return (RDG_rand(seedPtr) & 0x1FF) + 0xF; | |
92 | } | |
93 | ||
94 | void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, double matchProba, const BYTE* ldt, unsigned* seedPtr) | |
95 | { | |
96 | BYTE* const buffPtr = (BYTE*)buffer; | |
97 | U32 const matchProba32 = (U32)(32768 * matchProba); | |
98 | size_t pos = prefixSize; | |
99 | U32 prevOffset = 1; | |
100 | ||
101 | /* special case : sparse content */ | |
102 | while (matchProba >= 1.0) { | |
103 | size_t size0 = RDG_rand(seedPtr) & 3; | |
104 | size0 = (size_t)1 << (16 + size0 * 2); | |
105 | size0 += RDG_rand(seedPtr) & (size0-1); /* because size0 is power of 2*/ | |
106 | if (buffSize < pos + size0) { | |
107 | memset(buffPtr+pos, 0, buffSize-pos); | |
108 | return; | |
109 | } | |
110 | memset(buffPtr+pos, 0, size0); | |
111 | pos += size0; | |
112 | buffPtr[pos-1] = RDG_genChar(seedPtr, ldt); | |
113 | continue; | |
114 | } | |
115 | ||
116 | /* init */ | |
117 | if (pos==0) buffPtr[0] = RDG_genChar(seedPtr, ldt), pos=1; | |
118 | ||
119 | /* Generate compressible data */ | |
120 | while (pos < buffSize) { | |
121 | /* Select : Literal (char) or Match (within 32K) */ | |
122 | if (RDG_rand15Bits(seedPtr) < matchProba32) { | |
123 | /* Copy (within 32K) */ | |
124 | U32 const length = RDG_randLength(seedPtr) + 4; | |
125 | U32 const d = (U32) MIN(pos + length , buffSize); | |
126 | U32 const repeatOffset = (RDG_rand(seedPtr) & 15) == 2; | |
127 | U32 const randOffset = RDG_rand15Bits(seedPtr) + 1; | |
128 | U32 const offset = repeatOffset ? prevOffset : (U32) MIN(randOffset , pos); | |
129 | size_t match = pos - offset; | |
130 | while (pos < d) buffPtr[pos++] = buffPtr[match++]; /* correctly manages overlaps */ | |
131 | prevOffset = offset; | |
132 | } else { | |
133 | /* Literal (noise) */ | |
134 | U32 const length = RDG_randLength(seedPtr); | |
135 | U32 const d = (U32) MIN(pos + length, buffSize); | |
136 | while (pos < d) buffPtr[pos++] = RDG_genChar(seedPtr, ldt); | |
137 | } } | |
138 | } | |
139 | ||
140 | ||
141 | void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba, unsigned seed) | |
142 | { | |
143 | BYTE ldt[LTSIZE]; | |
144 | memset(ldt, '0', sizeof(ldt)); /* yes, character '0', this is intentional */ | |
145 | if (litProba<=0.0) litProba = matchProba / 4.5; | |
146 | RDG_fillLiteralDistrib(ldt, litProba); | |
147 | RDG_genBlock(buffer, size, 0, matchProba, ldt, &seed); | |
148 | } | |
149 | ||
150 | ||
151 | void RDG_genStdout(unsigned long long size, double matchProba, double litProba, unsigned seed) | |
152 | { | |
153 | size_t const stdBlockSize = 128 KB; | |
154 | size_t const stdDictSize = 32 KB; | |
155 | BYTE* const buff = (BYTE*)malloc(stdDictSize + stdBlockSize); | |
156 | U64 total = 0; | |
157 | BYTE ldt[LTSIZE]; /* literals distribution table */ | |
158 | ||
159 | /* init */ | |
160 | if (buff==NULL) { perror("datagen"); exit(1); } | |
161 | if (litProba<=0.0) litProba = matchProba / 4.5; | |
162 | memset(ldt, '0', sizeof(ldt)); /* yes, character '0', this is intentional */ | |
163 | RDG_fillLiteralDistrib(ldt, litProba); | |
164 | SET_BINARY_MODE(stdout); | |
165 | ||
166 | /* Generate initial dict */ | |
167 | RDG_genBlock(buff, stdDictSize, 0, matchProba, ldt, &seed); | |
168 | ||
169 | /* Generate compressible data */ | |
170 | while (total < size) { | |
171 | size_t const genBlockSize = (size_t) (MIN (stdBlockSize, size-total)); | |
172 | RDG_genBlock(buff, stdDictSize+stdBlockSize, stdDictSize, matchProba, ldt, &seed); | |
173 | total += genBlockSize; | |
174 | { size_t const unused = fwrite(buff, 1, genBlockSize, stdout); (void)unused; } | |
175 | /* update dict */ | |
176 | memcpy(buff, buff + stdBlockSize, stdDictSize); | |
177 | } | |
178 | ||
179 | /* cleanup */ | |
180 | free(buff); | |
181 | } |