]>
Commit | Line | Data |
---|---|---|
11fdf7f2 TL |
1 | #ifndef LDM_H |
2 | #define LDM_H | |
3 | ||
4 | #include "mem.h" // from /lib/common/mem.h | |
5 | ||
6 | //#include "ldm_params.h" | |
7 | ||
8 | // ============================================================================= | |
9 | // Modify the parameters in ldm_params.h if "ldm_params.h" is included. | |
10 | // Otherwise, modify the parameters here. | |
11 | // ============================================================================= | |
12 | ||
13 | #ifndef LDM_PARAMS_H | |
14 | // Defines the size of the hash table. | |
15 | // Note that this is not the number of buckets. | |
16 | // Currently this should be less than WINDOW_SIZE_LOG + 4. | |
17 | #define LDM_MEMORY_USAGE 23 | |
18 | ||
19 | // The number of entries in a hash bucket. | |
20 | #define HASH_BUCKET_SIZE_LOG 3 // The maximum is 4 for now. | |
21 | ||
22 | // Defines the lag in inserting elements into the hash table. | |
23 | #define LDM_LAG 0 | |
24 | ||
25 | // The maximum window size when searching for matches. | |
26 | // The maximum value is 30 | |
27 | #define LDM_WINDOW_SIZE_LOG 28 | |
28 | ||
29 | // The minimum match length. | |
30 | // This should be a multiple of four. | |
31 | #define LDM_MIN_MATCH_LENGTH 64 | |
32 | ||
33 | // If INSERT_BY_TAG, insert entries into the hash table as a function of the | |
34 | // hash. Certain hashes will not be inserted. | |
35 | // | |
36 | // Otherwise, insert as a function of the position. | |
37 | #define INSERT_BY_TAG 1 | |
38 | ||
39 | // Store a checksum with the hash table entries for faster comparison. | |
40 | // This halves the number of entries the hash table can contain. | |
41 | #define USE_CHECKSUM 1 | |
42 | #endif | |
43 | ||
44 | // Output compression statistics. | |
45 | #define COMPUTE_STATS | |
46 | ||
47 | // Output the configuration. | |
48 | #define OUTPUT_CONFIGURATION | |
49 | ||
50 | // If defined, forces the probability of insertion to be approximately | |
51 | // one per (1 << HASH_ONLY_EVERY_LOG). If not defined, the probability will be | |
52 | // calculated based on the memory usage and window size for "even" insertion | |
53 | // throughout the window. | |
54 | ||
55 | // #define HASH_ONLY_EVERY_LOG 8 | |
56 | ||
57 | // ============================================================================= | |
58 | ||
59 | // The number of bytes storing the compressed and decompressed size | |
60 | // in the header. | |
61 | #define LDM_COMPRESSED_SIZE 8 | |
62 | #define LDM_DECOMPRESSED_SIZE 8 | |
63 | #define LDM_HEADER_SIZE ((LDM_COMPRESSED_SIZE)+(LDM_DECOMPRESSED_SIZE)) | |
64 | ||
65 | #define ML_BITS 4 | |
66 | #define ML_MASK ((1U<<ML_BITS)-1) | |
67 | #define RUN_BITS (8-ML_BITS) | |
68 | #define RUN_MASK ((1U<<RUN_BITS)-1) | |
69 | ||
70 | // The number of bytes storing the offset. | |
71 | #define LDM_OFFSET_SIZE 4 | |
72 | ||
73 | #define LDM_WINDOW_SIZE (1 << (LDM_WINDOW_SIZE_LOG)) | |
74 | ||
75 | // TODO: Match lengths that are too small do not use the hash table efficiently. | |
76 | // There should be a minimum hash length given the hash table size. | |
77 | #define LDM_HASH_LENGTH LDM_MIN_MATCH_LENGTH | |
78 | ||
79 | typedef struct LDM_compressStats LDM_compressStats; | |
80 | typedef struct LDM_CCtx LDM_CCtx; | |
81 | typedef struct LDM_DCtx LDM_DCtx; | |
82 | ||
83 | /** | |
84 | * Compresses src into dst. | |
85 | * Returns the compressed size if successful, 0 otherwise. | |
86 | * | |
87 | * NB: This currently ignores maxDstSize and assumes enough space is available. | |
88 | * | |
89 | * Block format (see lz4 documentation for more information): | |
90 | * github.com/lz4/lz4/blob/dev/doc/lz4_Block_format.md | |
91 | * | |
92 | * A block is composed of sequences. Each sequence begins with a token, which | |
93 | * is a one-byte value separated into two 4-bit fields. | |
94 | * | |
95 | * The first field uses the four high bits of the token and encodes the literal | |
96 | * length. If the field value is 0, there is no literal. If it is 15, | |
97 | * additional bytes are added (each ranging from 0 to 255) to the previous | |
98 | * value to produce a total length. | |
99 | * | |
100 | * Following the token and optional length bytes are the literals. | |
101 | * | |
102 | * Next are the 4 bytes representing the offset of the match (2 in lz4), | |
103 | * representing the position to copy the literals. | |
104 | * | |
105 | * The lower four bits of the token encode the match length. With additional | |
106 | * bytes added similarly to the additional literal length bytes after the offset. | |
107 | * | |
108 | * The last sequence is incomplete and stops right after the literals. | |
109 | */ | |
110 | size_t LDM_compress(const void *src, size_t srcSize, | |
111 | void *dst, size_t maxDstSize); | |
112 | ||
113 | /** | |
114 | * Initialize the compression context. | |
115 | * | |
116 | * Allocates memory for the hash table. | |
117 | * | |
118 | * Returns 0 if successful, 1 otherwise. | |
119 | */ | |
120 | size_t LDM_initializeCCtx(LDM_CCtx *cctx, | |
121 | const void *src, size_t srcSize, | |
122 | void *dst, size_t maxDstSize); | |
123 | ||
124 | /** | |
125 | * Frees up memory allocated in LDM_initializeCCtx(). | |
126 | */ | |
127 | void LDM_destroyCCtx(LDM_CCtx *cctx); | |
128 | ||
129 | /** | |
130 | * Prints the distribution of offsets in the hash table. | |
131 | * | |
132 | * The offsets are defined as the distance of the hash table entry from the | |
133 | * current input position of the cctx. | |
134 | */ | |
135 | void LDM_outputHashTableOffsetHistogram(const LDM_CCtx *cctx); | |
136 | ||
137 | /** | |
138 | * Outputs compression statistics to stdout. | |
139 | */ | |
140 | void LDM_printCompressStats(const LDM_compressStats *stats); | |
141 | ||
142 | /** | |
143 | * Encode the literal length followed by the literals. | |
144 | * | |
145 | * The literal length is written to the upper four bits of pToken, with | |
146 | * additional bytes written to the output as needed (see lz4). | |
147 | * | |
148 | * This is followed by literalLength bytes corresponding to the literals. | |
149 | */ | |
150 | void LDM_encodeLiteralLengthAndLiterals(LDM_CCtx *cctx, BYTE *pToken, | |
151 | const U64 literalLength); | |
152 | ||
153 | /** | |
154 | * Write current block (literals, literal length, match offset, | |
155 | * match length). | |
156 | */ | |
157 | void LDM_outputBlock(LDM_CCtx *cctx, | |
158 | const U64 literalLength, | |
159 | const U32 offset, | |
160 | const U64 matchLength); | |
161 | ||
162 | /** | |
163 | * Decompresses src into dst. | |
164 | * | |
165 | * Note: assumes src does not have a header. | |
166 | */ | |
167 | size_t LDM_decompress(const void *src, size_t srcSize, | |
168 | void *dst, size_t maxDstSize); | |
169 | ||
170 | /** | |
171 | * Initialize the decompression context. | |
172 | */ | |
173 | void LDM_initializeDCtx(LDM_DCtx *dctx, | |
174 | const void *src, size_t compressedSize, | |
175 | void *dst, size_t maxDecompressedSize); | |
176 | ||
177 | /** | |
178 | * Reads the header from src and writes the compressed size and | |
179 | * decompressed size into compressedSize and decompressedSize respectively. | |
180 | * | |
181 | * NB: LDM_compress and LDM_decompress currently do not add/read headers. | |
182 | */ | |
183 | void LDM_readHeader(const void *src, U64 *compressedSize, | |
184 | U64 *decompressedSize); | |
185 | ||
186 | /** | |
187 | * Write the compressed and decompressed size. | |
188 | */ | |
189 | void LDM_writeHeader(void *memPtr, U64 compressedSize, | |
190 | U64 decompressedSize); | |
191 | ||
192 | /** | |
193 | * Output the configuration used. | |
194 | */ | |
195 | void LDM_outputConfiguration(void); | |
196 | ||
197 | #endif /* LDM_H */ |