]> git.proxmox.com Git - ceph.git/blame - ceph/src/os/filestore/HashIndex.h
update sources to v12.1.1
[ceph.git] / ceph / src / os / filestore / HashIndex.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15#ifndef CEPH_HASHINDEX_H
16#define CEPH_HASHINDEX_H
17
18#include "include/buffer_fwd.h"
19#include "include/encoding.h"
20#include "LFNIndex.h"
21
22extern string reverse_hexdigit_bits_string(string l);
23
24/**
25 * Implements collection prehashing.
26 *
27 * @verbatim
28 * (root) - 0 - 0
29 * - 1
30 * - E
31 * - 1
32 * - 2 - D - 0
33 * .
34 * .
35 * .
36 * - F - 0
37 * @endverbatim
38 *
39 * A file is located at the longest existing directory from the root
40 * given by the hex characters in the hash beginning with the least
41 * significant.
42 *
43 * ex: ghobject_t("object", CEPH_NO_SNAP, 0xA4CEE0D2)
44 * would be located in (root)/2/D/0/
45 *
224ce89b
WB
46 * Subdirectories are created when the number of objects in a
47 * directory exceed 16 * (abs(merge_threshhold)) * split_multiplier +
48 * split_rand_factor). The number of objects in a directory is encoded
49 * as subdir_info_s in an xattr on the directory.
7c673cae
FG
50 */
51class HashIndex : public LFNIndex {
52private:
53 /// Attribute name for storing subdir info @see subdir_info_s
54 static const string SUBDIR_ATTR;
224ce89b
WB
55 /// Attribute name for storing index-wide settings
56 static const string SETTINGS_ATTR;
7c673cae
FG
57 /// Attribute name for storing in progress op tag
58 static const string IN_PROGRESS_OP_TAG;
59 /// Size (bits) in object hash
60 static const int PATH_HASH_LEN = 32;
61 /// Max length of hashed path
62 static const int MAX_HASH_LEVEL = (PATH_HASH_LEN/4);
63
64 /**
65 * Merges occur when the number of object drops below
66 * merge_threshold and splits occur when the number of objects
224ce89b
WB
67 * exceeds:
68 *
69 * 16 * (abs(merge_threshold) * split_multiplier + split_rand_factor)
70 *
71 * Please note if merge_threshold is less than zero, it will never
72 * do merging
7c673cae
FG
73 */
74 int merge_threshold;
75 int split_multiplier;
76
77 /// Encodes current subdir state for determining when to split/merge.
78 struct subdir_info_s {
79 uint64_t objs; ///< Objects in subdir.
80 uint32_t subdirs; ///< Subdirs in subdir.
81 uint32_t hash_level; ///< Hashlevel of subdir.
82
83 subdir_info_s() : objs(0), subdirs(0), hash_level(0) {}
84
85 void encode(bufferlist &bl) const
86 {
87 __u8 v = 1;
88 ::encode(v, bl);
89 ::encode(objs, bl);
90 ::encode(subdirs, bl);
91 ::encode(hash_level, bl);
92 }
93
94 void decode(bufferlist::iterator &bl)
95 {
96 __u8 v;
97 ::decode(v, bl);
98 assert(v == 1);
99 ::decode(objs, bl);
100 ::decode(subdirs, bl);
101 ::decode(hash_level, bl);
102 }
103 };
104
224ce89b
WB
105 struct settings_s {
106 uint32_t split_rand_factor; ///< random factor added to split threshold (only on root of collection)
107 settings_s() : split_rand_factor(0) {}
108 void encode(bufferlist &bl) const
109 {
110 __u8 v = 1;
111 ::encode(v, bl);
112 ::encode(split_rand_factor, bl);
113 }
114 void decode(bufferlist::iterator &bl)
115 {
116 __u8 v;
117 ::decode(v, bl);
118 ::decode(split_rand_factor, bl);
119 }
120 } settings;
121
7c673cae
FG
122 /// Encodes in progress split or merge
123 struct InProgressOp {
124 static const int SPLIT = 0;
125 static const int MERGE = 1;
126 static const int COL_SPLIT = 2;
127 int op;
128 vector<string> path;
129
130 InProgressOp(int op, const vector<string> &path)
131 : op(op), path(path) {}
132
133 explicit InProgressOp(bufferlist::iterator &bl) {
134 decode(bl);
135 }
136
137 bool is_split() const { return op == SPLIT; }
138 bool is_col_split() const { return op == COL_SPLIT; }
139 bool is_merge() const { return op == MERGE; }
140
141 void encode(bufferlist &bl) const {
142 __u8 v = 1;
143 ::encode(v, bl);
144 ::encode(op, bl);
145 ::encode(path, bl);
146 }
147
148 void decode(bufferlist::iterator &bl) {
149 __u8 v;
150 ::decode(v, bl);
151 assert(v == 1);
152 ::decode(op, bl);
153 ::decode(path, bl);
154 }
155 };
156
157
158public:
159 /// Constructor.
160 HashIndex(
161 CephContext* cct,
162 coll_t collection, ///< [in] Collection
163 const char *base_path, ///< [in] Path to the index root.
164 int merge_at, ///< [in] Merge threshhold.
165 int split_multiple, ///< [in] Split threshhold.
166 uint32_t index_version,///< [in] Index version
167 double retry_probability=0) ///< [in] retry probability
168 : LFNIndex(cct, collection, base_path, index_version, retry_probability),
169 merge_threshold(merge_at),
224ce89b
WB
170 split_multiplier(split_multiple)
171 {}
172
173 int read_settings() override;
7c673cae
FG
174
175 /// @see CollectionIndex
176 uint32_t collection_version() override { return index_version; }
177
178 /// @see CollectionIndex
179 int cleanup() override;
180
181 /// @see CollectionIndex
182 int prep_delete() override;
183
184 /// @see CollectionIndex
185 int _split(
186 uint32_t match,
187 uint32_t bits,
188 CollectionIndex* dest
189 ) override;
190
191 /// @see CollectionIndex
192 int apply_layout_settings() override;
193
194protected:
195 int _init() override;
196
197 int _created(
198 const vector<string> &path,
199 const ghobject_t &oid,
200 const string &mangled_name
201 ) override;
202 int _remove(
203 const vector<string> &path,
204 const ghobject_t &oid,
205 const string &mangled_name
206 ) override;
207 int _lookup(
208 const ghobject_t &oid,
209 vector<string> *path,
210 string *mangled_name,
211 int *hardlink
212 ) override;
213
214 /**
215 * Pre-hash the collection to create folders according to the expected number
216 * of objects in this collection.
217 */
218 int _pre_hash_collection(
219 uint32_t pg_num,
220 uint64_t expected_num_objs
221 ) override;
222
223 int _collection_list_partial(
224 const ghobject_t &start,
225 const ghobject_t &end,
226 int max_count,
227 vector<ghobject_t> *ls,
228 ghobject_t *next
229 ) override;
230private:
231 /// Internal recursively remove path and its subdirs
232 int _recursive_remove(
233 const vector<string> &path, ///< [in] path to remove
234 bool top ///< [in] internal tracking of first caller
235 ); /// @return Error Code, 0 on success
236 /// Recursively remove path and its subdirs
237 int recursive_remove(
238 const vector<string> &path ///< [in] path to remove
239 ); /// @return Error Code, 0 on success
240 /// Tag root directory at beginning of col_split
241 int start_col_split(
242 const vector<string> &path ///< [in] path to split
243 ); ///< @return Error Code, 0 on success
244 /// Tag root directory at beginning of split
245 int start_split(
246 const vector<string> &path ///< [in] path to split
247 ); ///< @return Error Code, 0 on success
248 /// Tag root directory at beginning of split
249 int start_merge(
250 const vector<string> &path ///< [in] path to merge
251 ); ///< @return Error Code, 0 on success
252 /// Remove tag at end of split or merge
253 int end_split_or_merge(
254 const vector<string> &path ///< [in] path to split or merged
255 ); ///< @return Error Code, 0 on success
256 /// Gets info from the xattr on the subdir represented by path
257 int get_info(
258 const vector<string> &path, ///< [in] Path from which to read attribute.
259 subdir_info_s *info ///< [out] Attribute value
260 ); /// @return Error Code, 0 on success
261
262 /// Sets info to the xattr on the subdir represented by path
263 int set_info(
264 const vector<string> &path, ///< [in] Path on which to set attribute.
265 const subdir_info_s &info ///< [in] Value to set
266 ); /// @return Error Code, 0 on success
267
268 /// Encapsulates logic for when to split.
269 bool must_merge(
270 const subdir_info_s &info ///< [in] Info to check
271 ); /// @return True if info must be merged, False otherwise
272
273 /// Encapsulates logic for when to merge.
274 bool must_split(
275 const subdir_info_s &info ///< [in] Info to check
276 ); /// @return True if info must be split, False otherwise
277
278 /// Initiates merge
279 int initiate_merge(
280 const vector<string> &path, ///< [in] Subdir to merge
281 subdir_info_s info ///< [in] Info attached to path
282 ); /// @return Error Code, 0 on success
283
284 /// Completes merge
285 int complete_merge(
286 const vector<string> &path, ///< [in] Subdir to merge
287 subdir_info_s info ///< [in] Info attached to path
288 ); /// @return Error Code, 0 on success
289
290 /// Resets attr to match actual subdir contents
291 int reset_attr(
292 const vector<string> &path ///< [in] path to cleanup
293 );
294
295 /// Initiate Split
296 int initiate_split(
297 const vector<string> &path, ///< [in] Subdir to split
298 subdir_info_s info ///< [in] Info attached to path
299 ); /// @return Error Code, 0 on success
300
301 /// Completes Split
302 int complete_split(
303 const vector<string> &path, ///< [in] Subdir to split
304 subdir_info_s info ///< [in] Info attached to path
305 ); /// @return Error Code, 0 on success
306
307 /// Determine path components from hoid hash
308 void get_path_components(
309 const ghobject_t &oid, ///< [in] Object for which to get path components
310 vector<string> *path ///< [out] Path components for hoid.
311 );
312
313 /// Pre-hash and split folders to avoid runtime splitting
314 /// according to the given expected object number.
315 int pre_split_folder(uint32_t pg_num, uint64_t expected_num_objs);
316
317 /// Initialize the folder (dir info) with the given hash
318 /// level and number of its subdirs.
319 int init_split_folder(vector<string> &path, uint32_t hash_level);
320
321 /// do collection split for path
322 static int col_split_level(
323 HashIndex &from, ///< [in] from index
324 HashIndex &dest, ///< [in] to index
325 const vector<string> &path, ///< [in] path to split
326 uint32_t bits, ///< [in] num bits to match
327 uint32_t match, ///< [in] bits to match
328 unsigned *mkdirred ///< [in,out] path[:mkdirred] has been mkdirred
329 );
330
331
332 /**
333 * Get string representation of ghobject_t/hash
334 *
335 * e.g: 0x01234567 -> "76543210"
336 */
337 static string get_path_str(
338 const ghobject_t &oid ///< [in] Object to get hash string for
339 ); ///< @return Hash string for hoid.
340
341 /// Get string from hash, @see get_path_str
342 static string get_hash_str(
343 uint32_t hash ///< [in] Hash to convert to a string.
344 ); ///< @return String representation of hash
345
346 /// Get hash from hash prefix string e.g. "FFFFAB" -> 0xFFFFAB00
347 static uint32_t hash_prefix_to_hash(
348 string prefix ///< [in] string to convert
349 ); ///< @return Hash
350
351 /// Get hash mod from path
352 static void path_to_hobject_hash_prefix(
353 const vector<string> &path,///< [in] path to convert
354 uint32_t *bits, ///< [out] bits
355 uint32_t *hash ///< [out] hash
356 ) {
357 string hash_str;
358 for (vector<string>::const_iterator i = path.begin();
359 i != path.end();
360 ++i) {
361 hash_str.push_back(*i->begin());
362 }
363 uint32_t rev_hash = hash_prefix_to_hash(hash_str);
364 if (hash)
365 *hash = rev_hash;
366 if (bits)
367 *bits = path.size() * 4;
368 }
369
370 /// Calculate the number of bits.
371 static int calc_num_bits(uint64_t n) {
372 int ret = 0;
373 while (n > 0) {
374 n = n >> 1;
375 ret++;
376 }
377 return ret;
378 }
379
380 /// Convert a number to hex string (upper case).
381 static string to_hex(int n) {
382 assert(n >= 0 && n < 16);
383 char c = (n <= 9 ? ('0' + n) : ('A' + n - 10));
384 string str;
385 str.append(1, c);
386 return str;
387 }
388
389 struct CmpPairBitwise {
390 bool operator()(const pair<string, ghobject_t>& l,
391 const pair<string, ghobject_t>& r)
392 {
393 if (l.first < r.first)
394 return true;
395 if (l.first > r.first)
396 return false;
397 if (cmp(l.second, r.second) < 0)
398 return true;
399 return false;
400 }
401 };
402
403 struct CmpHexdigitStringBitwise {
404 bool operator()(const string& l, const string& r) {
405 return reverse_hexdigit_bits_string(l) < reverse_hexdigit_bits_string(r);
406 }
407 };
408
409 /// Get path contents by hash
410 int get_path_contents_by_hash_bitwise(
411 const vector<string> &path, /// [in] Path to list
412 const ghobject_t *next_object, /// [in] list > *next_object
413 set<string, CmpHexdigitStringBitwise> *hash_prefixes, /// [out] prefixes in dir
414 set<pair<string, ghobject_t>, CmpPairBitwise> *objects /// [out] objects
415 );
416
417 /// List objects in collection in ghobject_t order
418 int list_by_hash(
419 const vector<string> &path, /// [in] Path to list
420 const ghobject_t &end, /// [in] List only objects < end
421 int max_count, /// [in] List at most max_count
422 ghobject_t *next, /// [in,out] List objects >= *next
423 vector<ghobject_t> *out /// [out] Listed objects
424 ); ///< @return Error Code, 0 on success
425 /// List objects in collection in ghobject_t order
426 int list_by_hash_bitwise(
427 const vector<string> &path, /// [in] Path to list
428 const ghobject_t &end, /// [in] List only objects < end
429 int max_count, /// [in] List at most max_count
430 ghobject_t *next, /// [in,out] List objects >= *next
431 vector<ghobject_t> *out /// [out] Listed objects
432 ); ///< @return Error Code, 0 on success
433
434 /// Create the given levels of sub directories from the given root.
435 /// The contents of *path* is not changed after calling this function.
436 int recursive_create_path(vector<string>& path, int level);
437
438 /// split each dir below the given path
439 int split_dirs(const vector<string> &path);
224ce89b
WB
440
441 int write_settings();
7c673cae
FG
442};
443
444#endif