]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /** |
2 | * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. | |
3 | * All rights reserved. | |
4 | * | |
5 | * This source code is licensed under the BSD-style license found in the | |
6 | * LICENSE file in the root directory of this source tree. An additional grant | |
7 | * of patent rights can be found in the PATENTS file in the same directory. | |
8 | */ | |
9 | ||
10 | #ifndef DICTBUILDER_H_001 | |
11 | #define DICTBUILDER_H_001 | |
12 | ||
13 | #if defined (__cplusplus) | |
14 | extern "C" { | |
15 | #endif | |
16 | ||
17 | ||
18 | /*====== Dependencies ======*/ | |
19 | #include <stddef.h> /* size_t */ | |
20 | ||
21 | ||
22 | /*====== Export for Windows ======*/ | |
23 | /*! | |
24 | * ZSTD_DLL_EXPORT : | |
25 | * Enable exporting of functions when building a Windows DLL | |
26 | */ | |
27 | #if defined(_WIN32) && defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) | |
28 | # define ZDICTLIB_API __declspec(dllexport) | |
29 | #else | |
30 | # define ZDICTLIB_API | |
31 | #endif | |
32 | ||
33 | ||
34 | /*! ZDICT_trainFromBuffer() : | |
35 | Train a dictionary from an array of samples. | |
36 | Samples must be stored concatenated in a single flat buffer `samplesBuffer`, | |
37 | supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. | |
38 | The resulting dictionary will be saved into `dictBuffer`. | |
39 | @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) | |
40 | or an error code, which can be tested with ZDICT_isError(). | |
41 | Tips : In general, a reasonable dictionary has a size of ~ 100 KB. | |
42 | It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`. | |
43 | In general, it's recommended to provide a few thousands samples, but this can vary a lot. | |
44 | It's recommended that total size of all samples be about ~x100 times the target size of dictionary. | |
45 | */ | |
46 | ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity, | |
47 | const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples); | |
48 | ||
49 | ||
50 | /*====== Helper functions ======*/ | |
51 | ZDICTLIB_API unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize); /**< extracts dictID; @return zero if error (not a valid dictionary) */ | |
52 | ZDICTLIB_API unsigned ZDICT_isError(size_t errorCode); | |
53 | ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode); | |
54 | ||
55 | ||
56 | ||
57 | #ifdef ZDICT_STATIC_LINKING_ONLY | |
58 | ||
59 | /* ==================================================================================== | |
60 | * The definitions in this section are considered experimental. | |
61 | * They should never be used with a dynamic library, as they may change in the future. | |
62 | * They are provided for advanced usages. | |
63 | * Use them only in association with static linking. | |
64 | * ==================================================================================== */ | |
65 | ||
66 | typedef struct { | |
67 | unsigned selectivityLevel; /* 0 means default; larger => select more => larger dictionary */ | |
68 | int compressionLevel; /* 0 means default; target a specific zstd compression level */ | |
69 | unsigned notificationLevel; /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */ | |
70 | unsigned dictID; /* 0 means auto mode (32-bits random value); other : force dictID value */ | |
71 | unsigned reserved[2]; /* reserved space for future parameters */ | |
72 | } ZDICT_params_t; | |
73 | ||
74 | ||
75 | /*! ZDICT_trainFromBuffer_advanced() : | |
76 | Same as ZDICT_trainFromBuffer() with control over more parameters. | |
77 | `parameters` is optional and can be provided with values set to 0 to mean "default". | |
78 | @return : size of dictionary stored into `dictBuffer` (<= `dictBufferSize`), | |
79 | or an error code, which can be tested by ZDICT_isError(). | |
80 | note : ZDICT_trainFromBuffer_advanced() will send notifications into stderr if instructed to, using notificationLevel>0. | |
81 | */ | |
82 | size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity, | |
83 | const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, | |
84 | ZDICT_params_t parameters); | |
85 | ||
86 | ||
87 | /*! ZDICT_addEntropyTablesFromBuffer() : | |
88 | ||
89 | Given a content-only dictionary (built using any 3rd party algorithm), | |
90 | add entropy tables computed from an array of samples. | |
91 | Samples must be stored concatenated in a flat buffer `samplesBuffer`, | |
92 | supplied with an array of sizes `samplesSizes`, providing the size of each sample in order. | |
93 | ||
94 | The input dictionary content must be stored *at the end* of `dictBuffer`. | |
95 | Its size is `dictContentSize`. | |
96 | The resulting dictionary with added entropy tables will be *written back to `dictBuffer`*, | |
97 | starting from its beginning. | |
98 | @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`). | |
99 | */ | |
100 | size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity, | |
101 | const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples); | |
102 | ||
103 | ||
104 | ||
105 | #endif /* ZDICT_STATIC_LINKING_ONLY */ | |
106 | ||
107 | #if defined (__cplusplus) | |
108 | } | |
109 | #endif | |
110 | ||
111 | #endif /* DICTBUILDER_H_001 */ |