]>
Commit | Line | Data |
---|---|---|
9f95a23c TL |
1 | #include <stdio.h> /* fprintf */ |
2 | #include <stdlib.h> /* malloc, free, qsort */ | |
3 | #include <string.h> /* memset */ | |
4 | #include <time.h> /* clock */ | |
5 | #include "mem.h" /* read */ | |
6 | #include "pool.h" | |
7 | #include "threading.h" | |
8 | #include "zstd_internal.h" /* includes zstd.h */ | |
9 | #ifndef ZDICT_STATIC_LINKING_ONLY | |
10 | #define ZDICT_STATIC_LINKING_ONLY | |
11 | #endif | |
12 | #include "zdict.h" | |
13 | ||
14 | ||
15 | typedef struct { | |
16 | unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */ | |
17 | unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */ | |
18 | unsigned f; /* log of size of frequency array */ | |
19 | unsigned steps; /* Number of steps : Only used for optimization : 0 means default (32) : Higher means more parameters checked */ | |
20 | unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */ | |
21 | double splitPoint; /* Percentage of samples used for training: the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (1.0), 1.0 when all samples are used for both training and testing */ | |
22 | ZDICT_params_t zParams; | |
23 | } ZDICT_fastCover_params_t; | |
24 | ||
25 | ||
26 | /*! ZDICT_optimizeTrainFromBuffer_fastCover(): | |
27 | * Train a dictionary from an array of samples using a modified version of the COVER algorithm. | |
28 | * Samples must be stored concatenated in a single flat buffer `samplesBuffer`, | |
29 | * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. | |
30 | * The resulting dictionary will be saved into `dictBuffer`. | |
31 | * All of the parameters except for f are optional. | |
32 | * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}. | |
33 | * if steps is zero it defaults to its default value. | |
34 | * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048]. | |
35 | * | |
36 | * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) | |
37 | * or an error code, which can be tested with ZDICT_isError(). | |
38 | * On success `*parameters` contains the parameters selected. | |
39 | */ | |
40 | ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover( | |
41 | void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer, | |
42 | const size_t *samplesSizes, unsigned nbSamples, | |
43 | ZDICT_fastCover_params_t *parameters); | |
44 | ||
45 | ||
46 | /*! ZDICT_trainFromBuffer_fastCover(): | |
47 | * Train a dictionary from an array of samples using a modified version of the COVER algorithm. | |
48 | * Samples must be stored concatenated in a single flat buffer `samplesBuffer`, | |
49 | * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. | |
50 | * The resulting dictionary will be saved into `dictBuffer`. | |
51 | * d, k, and f are required. | |
52 | * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) | |
53 | * or an error code, which can be tested with ZDICT_isError(). | |
54 | */ | |
55 | ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover( | |
56 | void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer, | |
57 | const size_t *samplesSizes, unsigned nbSamples, ZDICT_fastCover_params_t parameters); |