]>
git.proxmox.com Git - ceph.git/blob - ceph/src/zstd/programs/datagen.c
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree. An additional grant
7 * of patent rights can be found in the PATENTS file in the same directory.
12 /* *************************************
14 ***************************************/
16 # define _CRT_SECURE_NO_WARNINGS /* removes Visual warning on strerror() */
17 # define _CRT_SECURE_NO_DEPRECATE /* removes VS2005 warning on strerror() */
20 /*-************************************
22 **************************************/
23 #include <stdlib.h> /* malloc, free */
24 #include <stdio.h> /* FILE, fwrite, fprintf */
25 #include <string.h> /* memcpy */
26 #include "mem.h" /* U32 */
29 /*-************************************
30 * OS-specific Includes
31 **************************************/
32 #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__)
33 # include <fcntl.h> /* _O_BINARY */
34 # include <io.h> /* _setmode, _isatty */
35 # define SET_BINARY_MODE(file) {int unused = _setmode(_fileno(file), _O_BINARY); (void)unused; }
37 # define SET_BINARY_MODE(file)
41 /*-************************************
43 **************************************/
45 #define MIN(a,b) ( (a) < (b) ? (a) : (b) )
48 #define TRACE(...) if (RDG_DEBUG) fprintf(stderr, __VA_ARGS__ )
51 /*-************************************
53 **************************************/
55 #define LTSIZE (1<<LTLOG)
56 #define LTMASK (LTSIZE-1)
59 /*-*******************************************************
61 *********************************************************/
62 #define RDG_rotl32(x,r) ((x << r) | (x >> (32 - r)))
63 static U32
RDG_rand(U32
* src
)
65 static const U32 prime1
= 2654435761U;
66 static const U32 prime2
= 2246822519U;
70 rand32
= RDG_rotl32(rand32
, 13);
76 static void RDG_fillLiteralDistrib(BYTE
* ldt
, double ld
)
78 BYTE
const firstChar
= (ld
<=0.0) ? 0 : '(';
79 BYTE
const lastChar
= (ld
<=0.0) ? 255 : '}';
80 BYTE character
= (ld
<=0.0) ? 0 : '0';
83 if (ld
<=0.0) ld
= 0.0;
84 for (u
=0; u
<LTSIZE
; ) {
85 U32
const weight
= (U32
)((double)(LTSIZE
- u
) * ld
) + 1;
86 U32
const end
= MIN ( u
+ weight
, LTSIZE
);
87 while (u
< end
) ldt
[u
++] = character
;
89 if (character
> lastChar
) character
= firstChar
;
94 static BYTE
RDG_genChar(U32
* seed
, const BYTE
* ldt
)
96 U32
const id
= RDG_rand(seed
) & LTMASK
;
97 return ldt
[id
]; /* memory-sanitizer fails here, stating "uninitialized value" when table initialized with P==0.0. Checked : table is fully initialized */
101 static U32
RDG_rand15Bits (unsigned* seedPtr
)
103 return RDG_rand(seedPtr
) & 0x7FFF;
106 static U32
RDG_randLength(unsigned* seedPtr
)
108 if (RDG_rand(seedPtr
) & 7) return (RDG_rand(seedPtr
) & 0xF); /* small length */
109 return (RDG_rand(seedPtr
) & 0x1FF) + 0xF;
112 void RDG_genBlock(void* buffer
, size_t buffSize
, size_t prefixSize
, double matchProba
, const BYTE
* ldt
, unsigned* seedPtr
)
114 BYTE
* const buffPtr
= (BYTE
*)buffer
;
115 U32
const matchProba32
= (U32
)(32768 * matchProba
);
116 size_t pos
= prefixSize
;
119 /* special case : sparse content */
120 while (matchProba
>= 1.0) {
121 size_t size0
= RDG_rand(seedPtr
) & 3;
122 size0
= (size_t)1 << (16 + size0
* 2);
123 size0
+= RDG_rand(seedPtr
) & (size0
-1); /* because size0 is power of 2*/
124 if (buffSize
< pos
+ size0
) {
125 memset(buffPtr
+pos
, 0, buffSize
-pos
);
128 memset(buffPtr
+pos
, 0, size0
);
130 buffPtr
[pos
-1] = RDG_genChar(seedPtr
, ldt
);
135 if (pos
==0) buffPtr
[0] = RDG_genChar(seedPtr
, ldt
), pos
=1;
137 /* Generate compressible data */
138 while (pos
< buffSize
) {
139 /* Select : Literal (char) or Match (within 32K) */
140 if (RDG_rand15Bits(seedPtr
) < matchProba32
) {
141 /* Copy (within 32K) */
142 U32
const length
= RDG_randLength(seedPtr
) + 4;
143 U32
const d
= (U32
) MIN(pos
+ length
, buffSize
);
144 U32
const repeatOffset
= (RDG_rand(seedPtr
) & 15) == 2;
145 U32
const randOffset
= RDG_rand15Bits(seedPtr
) + 1;
146 U32
const offset
= repeatOffset
? prevOffset
: (U32
) MIN(randOffset
, pos
);
147 size_t match
= pos
- offset
;
148 while (pos
< d
) buffPtr
[pos
++] = buffPtr
[match
++]; /* correctly manages overlaps */
151 /* Literal (noise) */
152 U32
const length
= RDG_randLength(seedPtr
);
153 U32
const d
= (U32
) MIN(pos
+ length
, buffSize
);
154 while (pos
< d
) buffPtr
[pos
++] = RDG_genChar(seedPtr
, ldt
);
159 void RDG_genBuffer(void* buffer
, size_t size
, double matchProba
, double litProba
, unsigned seed
)
162 memset(ldt
, '0', sizeof(ldt
)); /* yes, character '0', this is intentional */
163 if (litProba
<=0.0) litProba
= matchProba
/ 4.5;
164 RDG_fillLiteralDistrib(ldt
, litProba
);
165 RDG_genBlock(buffer
, size
, 0, matchProba
, ldt
, &seed
);
169 void RDG_genStdout(unsigned long long size
, double matchProba
, double litProba
, unsigned seed
)
171 size_t const stdBlockSize
= 128 KB
;
172 size_t const stdDictSize
= 32 KB
;
173 BYTE
* const buff
= (BYTE
*)malloc(stdDictSize
+ stdBlockSize
);
175 BYTE ldt
[LTSIZE
]; /* literals distribution table */
178 if (buff
==NULL
) { perror("datagen"); exit(1); }
179 if (litProba
<=0.0) litProba
= matchProba
/ 4.5;
180 memset(ldt
, '0', sizeof(ldt
)); /* yes, character '0', this is intentional */
181 RDG_fillLiteralDistrib(ldt
, litProba
);
182 SET_BINARY_MODE(stdout
);
184 /* Generate initial dict */
185 RDG_genBlock(buff
, stdDictSize
, 0, matchProba
, ldt
, &seed
);
187 /* Generate compressible data */
188 while (total
< size
) {
189 size_t const genBlockSize
= (size_t) (MIN (stdBlockSize
, size
-total
));
190 RDG_genBlock(buff
, stdDictSize
+stdBlockSize
, stdDictSize
, matchProba
, ldt
, &seed
);
191 total
+= genBlockSize
;
192 { size_t const unused
= fwrite(buff
, 1, genBlockSize
, stdout
); (void)unused
; }
194 memcpy(buff
, buff
+ stdBlockSize
, stdDictSize
);