]>
git.proxmox.com Git - ceph.git/blob - ceph/src/zstd/programs/datagen.c
2 * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
13 /*-************************************
15 **************************************/
17 #include "platform.h" /* SET_BINARY_MODE */
18 #include <stdlib.h> /* malloc, free */
19 #include <stdio.h> /* FILE, fwrite, fprintf */
20 #include <string.h> /* memcpy */
21 #include "../lib/common/mem.h" /* U32 */
24 /*-************************************
26 **************************************/
28 #define MIN(a,b) ( (a) < (b) ? (a) : (b) )
31 #define TRACE(...) if (RDG_DEBUG) fprintf(stderr, __VA_ARGS__ )
34 /*-************************************
36 **************************************/
38 #define LTSIZE (1<<LTLOG)
39 #define LTMASK (LTSIZE-1)
42 /*-*******************************************************
44 *********************************************************/
45 #define RDG_rotl32(x,r) ((x << r) | (x >> (32 - r)))
46 static U32
RDG_rand(U32
* src
)
48 static const U32 prime1
= 2654435761U;
49 static const U32 prime2
= 2246822519U;
53 rand32
= RDG_rotl32(rand32
, 13);
58 typedef U32 fixedPoint_24_8
;
60 static void RDG_fillLiteralDistrib(BYTE
* ldt
, fixedPoint_24_8 ld
)
62 BYTE
const firstChar
= (ld
<=0.0) ? 0 : '(';
63 BYTE
const lastChar
= (ld
<=0.0) ? 255 : '}';
64 BYTE character
= (ld
<=0.0) ? 0 : '0';
68 for (u
=0; u
<LTSIZE
; ) {
69 U32
const weight
= (((LTSIZE
- u
) * ld
) >> 8) + 1;
70 U32
const end
= MIN ( u
+ weight
, LTSIZE
);
71 while (u
< end
) ldt
[u
++] = character
;
73 if (character
> lastChar
) character
= firstChar
;
78 static BYTE
RDG_genChar(U32
* seed
, const BYTE
* ldt
)
80 U32
const id
= RDG_rand(seed
) & LTMASK
;
81 return ldt
[id
]; /* memory-sanitizer fails here, stating "uninitialized value" when table initialized with P==0.0. Checked : table is fully initialized */
85 static U32
RDG_rand15Bits (U32
* seedPtr
)
87 return RDG_rand(seedPtr
) & 0x7FFF;
90 static U32
RDG_randLength(U32
* seedPtr
)
92 if (RDG_rand(seedPtr
) & 7) return (RDG_rand(seedPtr
) & 0xF); /* small length */
93 return (RDG_rand(seedPtr
) & 0x1FF) + 0xF;
96 static void RDG_genBlock(void* buffer
, size_t buffSize
, size_t prefixSize
,
97 double matchProba
, const BYTE
* ldt
, U32
* seedPtr
)
99 BYTE
* const buffPtr
= (BYTE
*)buffer
;
100 U32
const matchProba32
= (U32
)(32768 * matchProba
);
101 size_t pos
= prefixSize
;
104 /* special case : sparse content */
105 while (matchProba
>= 1.0) {
106 size_t size0
= RDG_rand(seedPtr
) & 3;
107 size0
= (size_t)1 << (16 + size0
* 2);
108 size0
+= RDG_rand(seedPtr
) & (size0
-1); /* because size0 is power of 2*/
109 if (buffSize
< pos
+ size0
) {
110 memset(buffPtr
+pos
, 0, buffSize
-pos
);
113 memset(buffPtr
+pos
, 0, size0
);
115 buffPtr
[pos
-1] = RDG_genChar(seedPtr
, ldt
);
120 if (pos
==0) buffPtr
[0] = RDG_genChar(seedPtr
, ldt
), pos
=1;
122 /* Generate compressible data */
123 while (pos
< buffSize
) {
124 /* Select : Literal (char) or Match (within 32K) */
125 if (RDG_rand15Bits(seedPtr
) < matchProba32
) {
126 /* Copy (within 32K) */
127 U32
const length
= RDG_randLength(seedPtr
) + 4;
128 U32
const d
= (U32
) MIN(pos
+ length
, buffSize
);
129 U32
const repeatOffset
= (RDG_rand(seedPtr
) & 15) == 2;
130 U32
const randOffset
= RDG_rand15Bits(seedPtr
) + 1;
131 U32
const offset
= repeatOffset
? prevOffset
: (U32
) MIN(randOffset
, pos
);
132 size_t match
= pos
- offset
;
133 while (pos
< d
) { buffPtr
[pos
++] = buffPtr
[match
++]; /* correctly manages overlaps */ }
136 /* Literal (noise) */
137 U32
const length
= RDG_randLength(seedPtr
);
138 U32
const d
= (U32
) MIN(pos
+ length
, buffSize
);
139 while (pos
< d
) { buffPtr
[pos
++] = RDG_genChar(seedPtr
, ldt
); }
144 void RDG_genBuffer(void* buffer
, size_t size
, double matchProba
, double litProba
, unsigned seed
)
148 memset(ldt
, '0', sizeof(ldt
)); /* yes, character '0', this is intentional */
149 if (litProba
<=0.0) litProba
= matchProba
/ 4.5;
150 RDG_fillLiteralDistrib(ldt
, (fixedPoint_24_8
)(litProba
* 256 + 0.001));
151 RDG_genBlock(buffer
, size
, 0, matchProba
, ldt
, &seed32
);
155 void RDG_genStdout(unsigned long long size
, double matchProba
, double litProba
, unsigned seed
)
158 size_t const stdBlockSize
= 128 KB
;
159 size_t const stdDictSize
= 32 KB
;
160 BYTE
* const buff
= (BYTE
*)malloc(stdDictSize
+ stdBlockSize
);
162 BYTE ldt
[LTSIZE
]; /* literals distribution table */
165 if (buff
==NULL
) { perror("datagen"); exit(1); }
166 if (litProba
<=0.0) litProba
= matchProba
/ 4.5;
167 memset(ldt
, '0', sizeof(ldt
)); /* yes, character '0', this is intentional */
168 RDG_fillLiteralDistrib(ldt
, (fixedPoint_24_8
)(litProba
* 256 + 0.001));
169 SET_BINARY_MODE(stdout
);
171 /* Generate initial dict */
172 RDG_genBlock(buff
, stdDictSize
, 0, matchProba
, ldt
, &seed32
);
174 /* Generate compressible data */
175 while (total
< size
) {
176 size_t const genBlockSize
= (size_t) (MIN (stdBlockSize
, size
-total
));
177 RDG_genBlock(buff
, stdDictSize
+stdBlockSize
, stdDictSize
, matchProba
, ldt
, &seed32
);
178 total
+= genBlockSize
;
179 { size_t const unused
= fwrite(buff
, 1, genBlockSize
, stdout
); (void)unused
; }
181 memcpy(buff
, buff
+ stdBlockSize
, stdDictSize
);