]> git.proxmox.com Git - ceph.git/blame - ceph/src/zstd/contrib/long_distance_matching/ldm.h
update download target update for octopus release
[ceph.git] / ceph / src / zstd / contrib / long_distance_matching / ldm.h
CommitLineData
11fdf7f2
TL
1#ifndef LDM_H
2#define LDM_H
3
4#include "mem.h" // from /lib/common/mem.h
5
6//#include "ldm_params.h"
7
8// =============================================================================
9// Modify the parameters in ldm_params.h if "ldm_params.h" is included.
10// Otherwise, modify the parameters here.
11// =============================================================================
12
13#ifndef LDM_PARAMS_H
14 // Defines the size of the hash table.
15 // Note that this is not the number of buckets.
16 // Currently this should be less than WINDOW_SIZE_LOG + 4.
17 #define LDM_MEMORY_USAGE 23
18
19 // The number of entries in a hash bucket.
20 #define HASH_BUCKET_SIZE_LOG 3 // The maximum is 4 for now.
21
22 // Defines the lag in inserting elements into the hash table.
23 #define LDM_LAG 0
24
25 // The maximum window size when searching for matches.
26 // The maximum value is 30
27 #define LDM_WINDOW_SIZE_LOG 28
28
29 // The minimum match length.
30 // This should be a multiple of four.
31 #define LDM_MIN_MATCH_LENGTH 64
32
33 // If INSERT_BY_TAG, insert entries into the hash table as a function of the
34 // hash. Certain hashes will not be inserted.
35 //
36 // Otherwise, insert as a function of the position.
37 #define INSERT_BY_TAG 1
38
39 // Store a checksum with the hash table entries for faster comparison.
40 // This halves the number of entries the hash table can contain.
41 #define USE_CHECKSUM 1
42#endif
43
44// Output compression statistics.
45#define COMPUTE_STATS
46
47// Output the configuration.
48#define OUTPUT_CONFIGURATION
49
50// If defined, forces the probability of insertion to be approximately
51// one per (1 << HASH_ONLY_EVERY_LOG). If not defined, the probability will be
52// calculated based on the memory usage and window size for "even" insertion
53// throughout the window.
54
55// #define HASH_ONLY_EVERY_LOG 8
56
57// =============================================================================
58
59// The number of bytes storing the compressed and decompressed size
60// in the header.
61#define LDM_COMPRESSED_SIZE 8
62#define LDM_DECOMPRESSED_SIZE 8
63#define LDM_HEADER_SIZE ((LDM_COMPRESSED_SIZE)+(LDM_DECOMPRESSED_SIZE))
64
65#define ML_BITS 4
66#define ML_MASK ((1U<<ML_BITS)-1)
67#define RUN_BITS (8-ML_BITS)
68#define RUN_MASK ((1U<<RUN_BITS)-1)
69
70// The number of bytes storing the offset.
71#define LDM_OFFSET_SIZE 4
72
73#define LDM_WINDOW_SIZE (1 << (LDM_WINDOW_SIZE_LOG))
74
75// TODO: Match lengths that are too small do not use the hash table efficiently.
76// There should be a minimum hash length given the hash table size.
77#define LDM_HASH_LENGTH LDM_MIN_MATCH_LENGTH
78
79typedef struct LDM_compressStats LDM_compressStats;
80typedef struct LDM_CCtx LDM_CCtx;
81typedef struct LDM_DCtx LDM_DCtx;
82
83/**
84 * Compresses src into dst.
85 * Returns the compressed size if successful, 0 otherwise.
86 *
87 * NB: This currently ignores maxDstSize and assumes enough space is available.
88 *
89 * Block format (see lz4 documentation for more information):
90 * github.com/lz4/lz4/blob/dev/doc/lz4_Block_format.md
91 *
92 * A block is composed of sequences. Each sequence begins with a token, which
93 * is a one-byte value separated into two 4-bit fields.
94 *
95 * The first field uses the four high bits of the token and encodes the literal
96 * length. If the field value is 0, there is no literal. If it is 15,
97 * additional bytes are added (each ranging from 0 to 255) to the previous
98 * value to produce a total length.
99 *
100 * Following the token and optional length bytes are the literals.
101 *
102 * Next are the 4 bytes representing the offset of the match (2 in lz4),
103 * representing the position to copy the literals.
104 *
105 * The lower four bits of the token encode the match length. With additional
106 * bytes added similarly to the additional literal length bytes after the offset.
107 *
108 * The last sequence is incomplete and stops right after the literals.
109 */
110size_t LDM_compress(const void *src, size_t srcSize,
111 void *dst, size_t maxDstSize);
112
113/**
114 * Initialize the compression context.
115 *
116 * Allocates memory for the hash table.
117 *
118 * Returns 0 if successful, 1 otherwise.
119 */
120size_t LDM_initializeCCtx(LDM_CCtx *cctx,
121 const void *src, size_t srcSize,
122 void *dst, size_t maxDstSize);
123
124/**
125 * Frees up memory allocated in LDM_initializeCCtx().
126 */
127void LDM_destroyCCtx(LDM_CCtx *cctx);
128
129/**
130 * Prints the distribution of offsets in the hash table.
131 *
132 * The offsets are defined as the distance of the hash table entry from the
133 * current input position of the cctx.
134 */
135void LDM_outputHashTableOffsetHistogram(const LDM_CCtx *cctx);
136
137/**
138 * Outputs compression statistics to stdout.
139 */
140void LDM_printCompressStats(const LDM_compressStats *stats);
141
142/**
143 * Encode the literal length followed by the literals.
144 *
145 * The literal length is written to the upper four bits of pToken, with
146 * additional bytes written to the output as needed (see lz4).
147 *
148 * This is followed by literalLength bytes corresponding to the literals.
149 */
150void LDM_encodeLiteralLengthAndLiterals(LDM_CCtx *cctx, BYTE *pToken,
151 const U64 literalLength);
152
153/**
154 * Write current block (literals, literal length, match offset,
155 * match length).
156 */
157void LDM_outputBlock(LDM_CCtx *cctx,
158 const U64 literalLength,
159 const U32 offset,
160 const U64 matchLength);
161
162/**
163 * Decompresses src into dst.
164 *
165 * Note: assumes src does not have a header.
166 */
167size_t LDM_decompress(const void *src, size_t srcSize,
168 void *dst, size_t maxDstSize);
169
170/**
171 * Initialize the decompression context.
172 */
173void LDM_initializeDCtx(LDM_DCtx *dctx,
174 const void *src, size_t compressedSize,
175 void *dst, size_t maxDecompressedSize);
176
177/**
178 * Reads the header from src and writes the compressed size and
179 * decompressed size into compressedSize and decompressedSize respectively.
180 *
181 * NB: LDM_compress and LDM_decompress currently do not add/read headers.
182 */
183void LDM_readHeader(const void *src, U64 *compressedSize,
184 U64 *decompressedSize);
185
186/**
187 * Write the compressed and decompressed size.
188 */
189void LDM_writeHeader(void *memPtr, U64 compressedSize,
190 U64 decompressedSize);
191
192/**
193 * Output the configuration used.
194 */
195void LDM_outputConfiguration(void);
196
197#endif /* LDM_H */