]> git.proxmox.com Git - ceph.git/blob - ceph/src/zstd/lib/dictBuilder/cover.h
import 15.2.0 Octopus source
[ceph.git] / ceph / src / zstd / lib / dictBuilder / cover.h
1 #include <stdio.h> /* fprintf */
2 #include <stdlib.h> /* malloc, free, qsort */
3 #include <string.h> /* memset */
4 #include <time.h> /* clock */
5 #include "mem.h" /* read */
6 #include "pool.h"
7 #include "threading.h"
8 #include "zstd_internal.h" /* includes zstd.h */
9 #ifndef ZDICT_STATIC_LINKING_ONLY
10 #define ZDICT_STATIC_LINKING_ONLY
11 #endif
12 #include "zdict.h"
13
14 /**
15 * COVER_best_t is used for two purposes:
16 * 1. Synchronizing threads.
17 * 2. Saving the best parameters and dictionary.
18 *
19 * All of the methods except COVER_best_init() are thread safe if zstd is
20 * compiled with multithreaded support.
21 */
22 typedef struct COVER_best_s {
23 ZSTD_pthread_mutex_t mutex;
24 ZSTD_pthread_cond_t cond;
25 size_t liveJobs;
26 void *dict;
27 size_t dictSize;
28 ZDICT_cover_params_t parameters;
29 size_t compressedSize;
30 } COVER_best_t;
31
32 /**
33 * A segment is a range in the source as well as the score of the segment.
34 */
35 typedef struct {
36 U32 begin;
37 U32 end;
38 U32 score;
39 } COVER_segment_t;
40
41 /**
42 *Number of epochs and size of each epoch.
43 */
44 typedef struct {
45 U32 num;
46 U32 size;
47 } COVER_epoch_info_t;
48
49 /**
50 * Computes the number of epochs and the size of each epoch.
51 * We will make sure that each epoch gets at least 10 * k bytes.
52 *
53 * The COVER algorithms divide the data up into epochs of equal size and
54 * select one segment from each epoch.
55 *
56 * @param maxDictSize The maximum allowed dictionary size.
57 * @param nbDmers The number of dmers we are training on.
58 * @param k The parameter k (segment size).
59 * @param passes The target number of passes over the dmer corpus.
60 * More passes means a better dictionary.
61 */
62 COVER_epoch_info_t COVER_computeEpochs(U32 maxDictSize, U32 nbDmers,
63 U32 k, U32 passes);
64
65 /**
66 * Warns the user when their corpus is too small.
67 */
68 void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel);
69
70 /**
71 * Checks total compressed size of a dictionary
72 */
73 size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters,
74 const size_t *samplesSizes, const BYTE *samples,
75 size_t *offsets,
76 size_t nbTrainSamples, size_t nbSamples,
77 BYTE *const dict, size_t dictBufferCapacity);
78
79 /**
80 * Returns the sum of the sample sizes.
81 */
82 size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) ;
83
84 /**
85 * Initialize the `COVER_best_t`.
86 */
87 void COVER_best_init(COVER_best_t *best);
88
89 /**
90 * Wait until liveJobs == 0.
91 */
92 void COVER_best_wait(COVER_best_t *best);
93
94 /**
95 * Call COVER_best_wait() and then destroy the COVER_best_t.
96 */
97 void COVER_best_destroy(COVER_best_t *best);
98
99 /**
100 * Called when a thread is about to be launched.
101 * Increments liveJobs.
102 */
103 void COVER_best_start(COVER_best_t *best);
104
105 /**
106 * Called when a thread finishes executing, both on error or success.
107 * Decrements liveJobs and signals any waiting threads if liveJobs == 0.
108 * If this dictionary is the best so far save it and its parameters.
109 */
110 void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
111 ZDICT_cover_params_t parameters, void *dict,
112 size_t dictSize);