1 #include <stdio.h> /* fprintf */
2 #include <stdlib.h> /* malloc, free, qsort */
3 #include <string.h> /* strcmp, strlen */
4 #include <errno.h> /* errno */
12 /*-*************************************
14 ***************************************/
15 #define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
16 #define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
18 static const U64 g_refreshRate
= SEC_TO_MICRO
/ 6;
19 static UTIL_time_t g_displayClock
= UTIL_TIME_INITIALIZER
;
21 #define DISPLAYUPDATE(l, ...) { if (displayLevel>=l) { \
22 if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (displayLevel>=4)) \
23 { g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
24 if (displayLevel>=4) fflush(stderr); } } }
27 /*-*************************************
29 ***************************************/
33 #define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
34 #define EXM_THROW(error, ...) \
36 DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
37 DISPLAY("Error %i : ", error); \
38 DISPLAY(__VA_ARGS__); \
44 /*-*************************************
46 ***************************************/
47 static const unsigned g_defaultMaxDictSize
= 110 KB
;
48 #define DEFAULT_CLEVEL 3
51 /*-*************************************
53 ***************************************/
54 int FASTCOVER_trainFromFiles(const char* dictFileName
, sampleInfo
*info
,
56 ZDICT_fastCover_params_t
*params
) {
57 unsigned const displayLevel
= params
->zParams
.notificationLevel
;
58 void* const dictBuffer
= malloc(maxDictSize
);
64 EXM_THROW(12, "not enough memory for trainFromFiles"); /* should not happen */
67 /* Run the optimize version if either k or d is not provided */
68 if (!params
->d
|| !params
->k
) {
69 dictSize
= ZDICT_optimizeTrainFromBuffer_fastCover(dictBuffer
, maxDictSize
, info
->srcBuffer
,
70 info
->samplesSizes
, info
->nbSamples
, params
);
72 dictSize
= ZDICT_trainFromBuffer_fastCover(dictBuffer
, maxDictSize
, info
->srcBuffer
,
73 info
->samplesSizes
, info
->nbSamples
, *params
);
75 DISPLAYLEVEL(2, "k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\n", params
->k
, params
->d
, params
->f
, params
->steps
, (unsigned)(params
->splitPoint
*100));
76 if (ZDICT_isError(dictSize
)) {
77 DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize
)); /* should not happen */
82 DISPLAYLEVEL(2, "Save dictionary of size %u into file %s \n", (U32
)dictSize
, dictFileName
);
83 saveDict(dictFileName
, dictBuffer
, dictSize
);
94 int main(int argCount
, const char* argv
[])
97 const char* programName
= argv
[0];
98 int operationResult
= 0;
100 /* Initialize arguments to default values */
105 unsigned nbThreads
= 1;
106 unsigned split
= 100;
107 const char* outputFile
= "fastCoverDict";
109 unsigned maxDictSize
= g_defaultMaxDictSize
;
111 /* Initialize table to store input files */
112 const char** filenameTable
= (const char**)malloc(argCount
* sizeof(const char*));
113 unsigned filenameIdx
= 0;
115 char* fileNamesBuf
= NULL
;
116 unsigned fileNamesNb
= filenameIdx
;
117 int followLinks
= 0; /* follow directory recursively */
118 const char** extendedFileList
= NULL
;
120 /* Parse arguments */
121 for (int i
= 1; i
< argCount
; i
++) {
122 const char* argument
= argv
[i
];
123 if (longCommandWArg(&argument
, "k=")) { k
= readU32FromChar(&argument
); continue; }
124 if (longCommandWArg(&argument
, "d=")) { d
= readU32FromChar(&argument
); continue; }
125 if (longCommandWArg(&argument
, "f=")) { f
= readU32FromChar(&argument
); continue; }
126 if (longCommandWArg(&argument
, "steps=")) { steps
= readU32FromChar(&argument
); continue; }
127 if (longCommandWArg(&argument
, "split=")) { split
= readU32FromChar(&argument
); continue; }
128 if (longCommandWArg(&argument
, "dictID=")) { dictID
= readU32FromChar(&argument
); continue; }
129 if (longCommandWArg(&argument
, "maxdict=")) { maxDictSize
= readU32FromChar(&argument
); continue; }
130 if (longCommandWArg(&argument
, "in=")) {
131 filenameTable
[filenameIdx
] = argument
;
135 if (longCommandWArg(&argument
, "out=")) {
136 outputFile
= argument
;
139 DISPLAYLEVEL(1, "Incorrect parameters\n");
141 return operationResult
;
144 /* Get the list of all files recursively (because followLinks==0)*/
145 extendedFileList
= UTIL_createFileList(filenameTable
, filenameIdx
, &fileNamesBuf
,
146 &fileNamesNb
, followLinks
);
147 if (extendedFileList
) {
149 for (u
=0; u
<fileNamesNb
; u
++) DISPLAYLEVEL(4, "%u %s\n", u
, extendedFileList
[u
]);
150 free((void*)filenameTable
);
151 filenameTable
= extendedFileList
;
152 filenameIdx
= fileNamesNb
;
155 size_t blockSize
= 0;
158 ZDICT_params_t zParams
;
159 zParams
.compressionLevel
= DEFAULT_CLEVEL
;
160 zParams
.notificationLevel
= displayLevel
;
161 zParams
.dictID
= dictID
;
163 /* Set up fastCover params */
164 ZDICT_fastCover_params_t params
;
165 params
.zParams
= zParams
;
169 params
.steps
= steps
;
170 params
.nbThreads
= nbThreads
;
171 params
.splitPoint
= (double)split
/100;
173 /* Build dictionary */
174 sampleInfo
* info
= getSampleInfo(filenameTable
,
175 filenameIdx
, blockSize
, maxDictSize
, zParams
.notificationLevel
);
176 operationResult
= FASTCOVER_trainFromFiles(outputFile
, info
, maxDictSize
, ¶ms
);
178 /* Free allocated memory */
179 UTIL_freeFileList(extendedFileList
, fileNamesBuf
);
180 freeSampleInfo(info
);
182 return operationResult
;