1 #include <stdio.h> /* fprintf */
2 #include <stdlib.h> /* malloc, free, qsort */
3 #include <string.h> /* memset */
4 #include <time.h> /* clock */
5 #include "mem.h" /* read */
8 #include "zstd_internal.h" /* includes zstd.h */
9 #ifndef ZDICT_STATIC_LINKING_ONLY
10 #define ZDICT_STATIC_LINKING_ONLY
16 unsigned k
; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
17 unsigned d
; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
18 unsigned f
; /* log of size of frequency array */
19 unsigned steps
; /* Number of steps : Only used for optimization : 0 means default (32) : Higher means more parameters checked */
20 unsigned nbThreads
; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
21 double splitPoint
; /* Percentage of samples used for training: the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (1.0), 1.0 when all samples are used for both training and testing */
22 ZDICT_params_t zParams
;
23 } ZDICT_fastCover_params_t
;
26 /*! ZDICT_optimizeTrainFromBuffer_fastCover():
27 * Train a dictionary from an array of samples using a modified version of the COVER algorithm.
28 * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
29 * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
30 * The resulting dictionary will be saved into `dictBuffer`.
31 * All of the parameters except for f are optional.
32 * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}.
33 * if steps is zero it defaults to its default value.
34 * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048].
36 * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
37 * or an error code, which can be tested with ZDICT_isError().
38 * On success `*parameters` contains the parameters selected.
40 ZDICTLIB_API
size_t ZDICT_optimizeTrainFromBuffer_fastCover(
41 void *dictBuffer
, size_t dictBufferCapacity
, const void *samplesBuffer
,
42 const size_t *samplesSizes
, unsigned nbSamples
,
43 ZDICT_fastCover_params_t
*parameters
);
46 /*! ZDICT_trainFromBuffer_fastCover():
47 * Train a dictionary from an array of samples using a modified version of the COVER algorithm.
48 * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
49 * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
50 * The resulting dictionary will be saved into `dictBuffer`.
51 * d, k, and f are required.
52 * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
53 * or an error code, which can be tested with ZDICT_isError().
55 ZDICTLIB_API
size_t ZDICT_trainFromBuffer_fastCover(
56 void *dictBuffer
, size_t dictBufferCapacity
, const void *samplesBuffer
,
57 const size_t *samplesSizes
, unsigned nbSamples
, ZDICT_fastCover_params_t parameters
);