]> git.proxmox.com Git - ceph.git/blob - ceph/src/zstd/contrib/experimental_dict_builders/fastCover/main.c
import 15.2.0 Octopus source
[ceph.git] / ceph / src / zstd / contrib / experimental_dict_builders / fastCover / main.c
1 #include <stdio.h> /* fprintf */
2 #include <stdlib.h> /* malloc, free, qsort */
3 #include <string.h> /* strcmp, strlen */
4 #include <errno.h> /* errno */
5 #include <ctype.h>
6 #include "fastCover.h"
7 #include "io.h"
8 #include "util.h"
9 #include "zdict.h"
10
11
12 /*-*************************************
13 * Console display
14 ***************************************/
15 #define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
16 #define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
17
18 static const U64 g_refreshRate = SEC_TO_MICRO / 6;
19 static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
20
21 #define DISPLAYUPDATE(l, ...) { if (displayLevel>=l) { \
22 if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (displayLevel>=4)) \
23 { g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
24 if (displayLevel>=4) fflush(stderr); } } }
25
26
27 /*-*************************************
28 * Exceptions
29 ***************************************/
30 #ifndef DEBUG
31 # define DEBUG 0
32 #endif
33 #define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
34 #define EXM_THROW(error, ...) \
35 { \
36 DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
37 DISPLAY("Error %i : ", error); \
38 DISPLAY(__VA_ARGS__); \
39 DISPLAY("\n"); \
40 exit(error); \
41 }
42
43
44 /*-*************************************
45 * Constants
46 ***************************************/
47 static const unsigned g_defaultMaxDictSize = 110 KB;
48 #define DEFAULT_CLEVEL 3
49
50
51 /*-*************************************
52 * FASTCOVER
53 ***************************************/
54 int FASTCOVER_trainFromFiles(const char* dictFileName, sampleInfo *info,
55 unsigned maxDictSize,
56 ZDICT_fastCover_params_t *params) {
57 unsigned const displayLevel = params->zParams.notificationLevel;
58 void* const dictBuffer = malloc(maxDictSize);
59
60 int result = 0;
61
62 /* Checks */
63 if (!dictBuffer)
64 EXM_THROW(12, "not enough memory for trainFromFiles"); /* should not happen */
65
66 { size_t dictSize;
67 /* Run the optimize version if either k or d is not provided */
68 if (!params->d || !params->k) {
69 dictSize = ZDICT_optimizeTrainFromBuffer_fastCover(dictBuffer, maxDictSize, info->srcBuffer,
70 info->samplesSizes, info->nbSamples, params);
71 } else {
72 dictSize = ZDICT_trainFromBuffer_fastCover(dictBuffer, maxDictSize, info->srcBuffer,
73 info->samplesSizes, info->nbSamples, *params);
74 }
75 DISPLAYLEVEL(2, "k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\n", params->k, params->d, params->f, params->steps, (unsigned)(params->splitPoint*100));
76 if (ZDICT_isError(dictSize)) {
77 DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize)); /* should not happen */
78 result = 1;
79 goto _done;
80 }
81 /* save dict */
82 DISPLAYLEVEL(2, "Save dictionary of size %u into file %s \n", (U32)dictSize, dictFileName);
83 saveDict(dictFileName, dictBuffer, dictSize);
84 }
85
86 /* clean up */
87 _done:
88 free(dictBuffer);
89 return result;
90 }
91
92
93
94 int main(int argCount, const char* argv[])
95 {
96 int displayLevel = 2;
97 const char* programName = argv[0];
98 int operationResult = 0;
99
100 /* Initialize arguments to default values */
101 unsigned k = 0;
102 unsigned d = 0;
103 unsigned f = 23;
104 unsigned steps = 32;
105 unsigned nbThreads = 1;
106 unsigned split = 100;
107 const char* outputFile = "fastCoverDict";
108 unsigned dictID = 0;
109 unsigned maxDictSize = g_defaultMaxDictSize;
110
111 /* Initialize table to store input files */
112 const char** filenameTable = (const char**)malloc(argCount * sizeof(const char*));
113 unsigned filenameIdx = 0;
114
115 char* fileNamesBuf = NULL;
116 unsigned fileNamesNb = filenameIdx;
117 int followLinks = 0; /* follow directory recursively */
118 const char** extendedFileList = NULL;
119
120 /* Parse arguments */
121 for (int i = 1; i < argCount; i++) {
122 const char* argument = argv[i];
123 if (longCommandWArg(&argument, "k=")) { k = readU32FromChar(&argument); continue; }
124 if (longCommandWArg(&argument, "d=")) { d = readU32FromChar(&argument); continue; }
125 if (longCommandWArg(&argument, "f=")) { f = readU32FromChar(&argument); continue; }
126 if (longCommandWArg(&argument, "steps=")) { steps = readU32FromChar(&argument); continue; }
127 if (longCommandWArg(&argument, "split=")) { split = readU32FromChar(&argument); continue; }
128 if (longCommandWArg(&argument, "dictID=")) { dictID = readU32FromChar(&argument); continue; }
129 if (longCommandWArg(&argument, "maxdict=")) { maxDictSize = readU32FromChar(&argument); continue; }
130 if (longCommandWArg(&argument, "in=")) {
131 filenameTable[filenameIdx] = argument;
132 filenameIdx++;
133 continue;
134 }
135 if (longCommandWArg(&argument, "out=")) {
136 outputFile = argument;
137 continue;
138 }
139 DISPLAYLEVEL(1, "Incorrect parameters\n");
140 operationResult = 1;
141 return operationResult;
142 }
143
144 /* Get the list of all files recursively (because followLinks==0)*/
145 extendedFileList = UTIL_createFileList(filenameTable, filenameIdx, &fileNamesBuf,
146 &fileNamesNb, followLinks);
147 if (extendedFileList) {
148 unsigned u;
149 for (u=0; u<fileNamesNb; u++) DISPLAYLEVEL(4, "%u %s\n", u, extendedFileList[u]);
150 free((void*)filenameTable);
151 filenameTable = extendedFileList;
152 filenameIdx = fileNamesNb;
153 }
154
155 size_t blockSize = 0;
156
157 /* Set up zParams */
158 ZDICT_params_t zParams;
159 zParams.compressionLevel = DEFAULT_CLEVEL;
160 zParams.notificationLevel = displayLevel;
161 zParams.dictID = dictID;
162
163 /* Set up fastCover params */
164 ZDICT_fastCover_params_t params;
165 params.zParams = zParams;
166 params.k = k;
167 params.d = d;
168 params.f = f;
169 params.steps = steps;
170 params.nbThreads = nbThreads;
171 params.splitPoint = (double)split/100;
172
173 /* Build dictionary */
174 sampleInfo* info = getSampleInfo(filenameTable,
175 filenameIdx, blockSize, maxDictSize, zParams.notificationLevel);
176 operationResult = FASTCOVER_trainFromFiles(outputFile, info, maxDictSize, &params);
177
178 /* Free allocated memory */
179 UTIL_freeFileList(extendedFileList, fileNamesBuf);
180 freeSampleInfo(info);
181
182 return operationResult;
183 }