]> git.proxmox.com Git - ceph.git/blame - ceph/src/os/filestore/HashIndex.h
buildsys: change download over to reef release
[ceph.git] / ceph / src / os / filestore / HashIndex.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15#ifndef CEPH_HASHINDEX_H
16#define CEPH_HASHINDEX_H
17
18#include "include/buffer_fwd.h"
19#include "include/encoding.h"
20#include "LFNIndex.h"
21
f67539c2 22extern std::string reverse_hexdigit_bits_string(std::string l);
7c673cae
FG
23
24/**
25 * Implements collection prehashing.
26 *
27 * @verbatim
28 * (root) - 0 - 0
29 * - 1
30 * - E
31 * - 1
32 * - 2 - D - 0
33 * .
34 * .
35 * .
36 * - F - 0
37 * @endverbatim
38 *
39 * A file is located at the longest existing directory from the root
40 * given by the hex characters in the hash beginning with the least
41 * significant.
42 *
43 * ex: ghobject_t("object", CEPH_NO_SNAP, 0xA4CEE0D2)
44 * would be located in (root)/2/D/0/
45 *
224ce89b 46 * Subdirectories are created when the number of objects in a
11fdf7f2 47 * directory exceed 16 * (abs(merge_threshhold) * split_multiplier +
224ce89b
WB
48 * split_rand_factor). The number of objects in a directory is encoded
49 * as subdir_info_s in an xattr on the directory.
7c673cae
FG
50 */
51class HashIndex : public LFNIndex {
52private:
53 /// Attribute name for storing subdir info @see subdir_info_s
f67539c2 54 static const std::string SUBDIR_ATTR;
224ce89b 55 /// Attribute name for storing index-wide settings
f67539c2 56 static const std::string SETTINGS_ATTR;
7c673cae 57 /// Attribute name for storing in progress op tag
f67539c2 58 static const std::string IN_PROGRESS_OP_TAG;
7c673cae
FG
59 /// Size (bits) in object hash
60 static const int PATH_HASH_LEN = 32;
61 /// Max length of hashed path
62 static const int MAX_HASH_LEVEL = (PATH_HASH_LEN/4);
63
64 /**
65 * Merges occur when the number of object drops below
66 * merge_threshold and splits occur when the number of objects
224ce89b
WB
67 * exceeds:
68 *
69 * 16 * (abs(merge_threshold) * split_multiplier + split_rand_factor)
70 *
71 * Please note if merge_threshold is less than zero, it will never
72 * do merging
7c673cae
FG
73 */
74 int merge_threshold;
75 int split_multiplier;
76
77 /// Encodes current subdir state for determining when to split/merge.
78 struct subdir_info_s {
79 uint64_t objs; ///< Objects in subdir.
80 uint32_t subdirs; ///< Subdirs in subdir.
81 uint32_t hash_level; ///< Hashlevel of subdir.
82
83 subdir_info_s() : objs(0), subdirs(0), hash_level(0) {}
84
f67539c2 85 void encode(ceph::buffer::list &bl) const
7c673cae 86 {
11fdf7f2 87 using ceph::encode;
7c673cae 88 __u8 v = 1;
11fdf7f2
TL
89 encode(v, bl);
90 encode(objs, bl);
91 encode(subdirs, bl);
92 encode(hash_level, bl);
7c673cae
FG
93 }
94
f67539c2 95 void decode(ceph::buffer::list::const_iterator &bl)
7c673cae 96 {
11fdf7f2 97 using ceph::decode;
7c673cae 98 __u8 v;
11fdf7f2
TL
99 decode(v, bl);
100 ceph_assert(v == 1);
101 decode(objs, bl);
102 decode(subdirs, bl);
103 decode(hash_level, bl);
7c673cae
FG
104 }
105 };
106
224ce89b
WB
107 struct settings_s {
108 uint32_t split_rand_factor; ///< random factor added to split threshold (only on root of collection)
109 settings_s() : split_rand_factor(0) {}
f67539c2 110 void encode(ceph::buffer::list &bl) const
224ce89b 111 {
11fdf7f2 112 using ceph::encode;
224ce89b 113 __u8 v = 1;
11fdf7f2
TL
114 encode(v, bl);
115 encode(split_rand_factor, bl);
224ce89b 116 }
f67539c2 117 void decode(ceph::buffer::list::const_iterator &bl)
224ce89b 118 {
11fdf7f2 119 using ceph::decode;
224ce89b 120 __u8 v;
11fdf7f2
TL
121 decode(v, bl);
122 decode(split_rand_factor, bl);
224ce89b
WB
123 }
124 } settings;
125
7c673cae
FG
126 /// Encodes in progress split or merge
127 struct InProgressOp {
128 static const int SPLIT = 0;
129 static const int MERGE = 1;
130 static const int COL_SPLIT = 2;
131 int op;
f67539c2 132 std::vector<std::string> path;
7c673cae 133
f67539c2 134 InProgressOp(int op, const std::vector<std::string> &path)
7c673cae
FG
135 : op(op), path(path) {}
136
f67539c2 137 explicit InProgressOp(ceph::buffer::list::const_iterator &bl) {
7c673cae
FG
138 decode(bl);
139 }
140
141 bool is_split() const { return op == SPLIT; }
142 bool is_col_split() const { return op == COL_SPLIT; }
143 bool is_merge() const { return op == MERGE; }
144
f67539c2 145 void encode(ceph::buffer::list &bl) const {
11fdf7f2 146 using ceph::encode;
7c673cae 147 __u8 v = 1;
11fdf7f2
TL
148 encode(v, bl);
149 encode(op, bl);
150 encode(path, bl);
7c673cae
FG
151 }
152
f67539c2 153 void decode(ceph::buffer::list::const_iterator &bl) {
11fdf7f2 154 using ceph::decode;
7c673cae 155 __u8 v;
11fdf7f2
TL
156 decode(v, bl);
157 ceph_assert(v == 1);
158 decode(op, bl);
159 decode(path, bl);
7c673cae
FG
160 }
161 };
162
163
164public:
165 /// Constructor.
166 HashIndex(
167 CephContext* cct,
168 coll_t collection, ///< [in] Collection
169 const char *base_path, ///< [in] Path to the index root.
11fdf7f2
TL
170 int merge_at, ///< [in] Merge threshold.
171 int split_multiple, ///< [in] Split threshold.
7c673cae
FG
172 uint32_t index_version,///< [in] Index version
173 double retry_probability=0) ///< [in] retry probability
174 : LFNIndex(cct, collection, base_path, index_version, retry_probability),
175 merge_threshold(merge_at),
224ce89b
WB
176 split_multiplier(split_multiple)
177 {}
178
179 int read_settings() override;
7c673cae
FG
180
181 /// @see CollectionIndex
182 uint32_t collection_version() override { return index_version; }
183
184 /// @see CollectionIndex
185 int cleanup() override;
186
187 /// @see CollectionIndex
188 int prep_delete() override;
189
190 /// @see CollectionIndex
191 int _split(
192 uint32_t match,
193 uint32_t bits,
194 CollectionIndex* dest
195 ) override;
196
11fdf7f2
TL
197 /// @see CollectionIndex
198 int _merge(
199 uint32_t bits,
200 CollectionIndex* dest
201 ) override;
202
203 int _merge_dirs(
204 HashIndex& from,
205 HashIndex& to,
f67539c2 206 const std::vector<std::string>& path);
11fdf7f2 207
7c673cae 208 /// @see CollectionIndex
1adf2230 209 int apply_layout_settings(int target_level) override;
7c673cae
FG
210
211protected:
212 int _init() override;
213
214 int _created(
f67539c2 215 const std::vector<std::string> &path,
7c673cae 216 const ghobject_t &oid,
f67539c2 217 const std::string &mangled_name
7c673cae
FG
218 ) override;
219 int _remove(
f67539c2 220 const std::vector<std::string> &path,
7c673cae 221 const ghobject_t &oid,
f67539c2 222 const std::string &mangled_name
7c673cae
FG
223 ) override;
224 int _lookup(
225 const ghobject_t &oid,
f67539c2
TL
226 std::vector<std::string> *path,
227 std::string *mangled_name,
7c673cae
FG
228 int *hardlink
229 ) override;
230
231 /**
232 * Pre-hash the collection to create folders according to the expected number
233 * of objects in this collection.
234 */
235 int _pre_hash_collection(
236 uint32_t pg_num,
237 uint64_t expected_num_objs
238 ) override;
239
240 int _collection_list_partial(
241 const ghobject_t &start,
242 const ghobject_t &end,
243 int max_count,
f67539c2 244 std::vector<ghobject_t> *ls,
7c673cae
FG
245 ghobject_t *next
246 ) override;
247private:
248 /// Internal recursively remove path and its subdirs
249 int _recursive_remove(
f67539c2 250 const std::vector<std::string> &path, ///< [in] path to remove
7c673cae
FG
251 bool top ///< [in] internal tracking of first caller
252 ); /// @return Error Code, 0 on success
253 /// Recursively remove path and its subdirs
254 int recursive_remove(
f67539c2 255 const std::vector<std::string> &path ///< [in] path to remove
7c673cae
FG
256 ); /// @return Error Code, 0 on success
257 /// Tag root directory at beginning of col_split
258 int start_col_split(
f67539c2 259 const std::vector<std::string> &path ///< [in] path to split
7c673cae
FG
260 ); ///< @return Error Code, 0 on success
261 /// Tag root directory at beginning of split
262 int start_split(
f67539c2 263 const std::vector<std::string> &path ///< [in] path to split
7c673cae
FG
264 ); ///< @return Error Code, 0 on success
265 /// Tag root directory at beginning of split
266 int start_merge(
f67539c2 267 const std::vector<std::string> &path ///< [in] path to merge
7c673cae
FG
268 ); ///< @return Error Code, 0 on success
269 /// Remove tag at end of split or merge
270 int end_split_or_merge(
f67539c2 271 const std::vector<std::string> &path ///< [in] path to split or merged
7c673cae
FG
272 ); ///< @return Error Code, 0 on success
273 /// Gets info from the xattr on the subdir represented by path
274 int get_info(
f67539c2 275 const std::vector<std::string> &path, ///< [in] Path from which to read attribute.
7c673cae
FG
276 subdir_info_s *info ///< [out] Attribute value
277 ); /// @return Error Code, 0 on success
278
279 /// Sets info to the xattr on the subdir represented by path
280 int set_info(
f67539c2 281 const std::vector<std::string> &path, ///< [in] Path on which to set attribute.
7c673cae
FG
282 const subdir_info_s &info ///< [in] Value to set
283 ); /// @return Error Code, 0 on success
284
285 /// Encapsulates logic for when to split.
286 bool must_merge(
287 const subdir_info_s &info ///< [in] Info to check
288 ); /// @return True if info must be merged, False otherwise
289
290 /// Encapsulates logic for when to merge.
291 bool must_split(
1adf2230
AA
292 const subdir_info_s &info, ///< [in] Info to check
293 int target_level = 0
7c673cae
FG
294 ); /// @return True if info must be split, False otherwise
295
296 /// Initiates merge
297 int initiate_merge(
f67539c2 298 const std::vector<std::string> &path, ///< [in] Subdir to merge
7c673cae
FG
299 subdir_info_s info ///< [in] Info attached to path
300 ); /// @return Error Code, 0 on success
301
302 /// Completes merge
303 int complete_merge(
f67539c2 304 const std::vector<std::string> &path, ///< [in] Subdir to merge
7c673cae
FG
305 subdir_info_s info ///< [in] Info attached to path
306 ); /// @return Error Code, 0 on success
307
308 /// Resets attr to match actual subdir contents
309 int reset_attr(
f67539c2 310 const std::vector<std::string> &path ///< [in] path to cleanup
7c673cae
FG
311 );
312
313 /// Initiate Split
314 int initiate_split(
f67539c2 315 const std::vector<std::string> &path, ///< [in] Subdir to split
7c673cae
FG
316 subdir_info_s info ///< [in] Info attached to path
317 ); /// @return Error Code, 0 on success
318
319 /// Completes Split
320 int complete_split(
f67539c2 321 const std::vector<std::string> &path, ///< [in] Subdir to split
7c673cae
FG
322 subdir_info_s info ///< [in] Info attached to path
323 ); /// @return Error Code, 0 on success
324
325 /// Determine path components from hoid hash
326 void get_path_components(
327 const ghobject_t &oid, ///< [in] Object for which to get path components
f67539c2 328 std::vector<std::string> *path ///< [out] Path components for hoid.
7c673cae
FG
329 );
330
331 /// Pre-hash and split folders to avoid runtime splitting
332 /// according to the given expected object number.
333 int pre_split_folder(uint32_t pg_num, uint64_t expected_num_objs);
334
335 /// Initialize the folder (dir info) with the given hash
336 /// level and number of its subdirs.
f67539c2 337 int init_split_folder(std::vector<std::string> &path, uint32_t hash_level);
7c673cae
FG
338
339 /// do collection split for path
340 static int col_split_level(
341 HashIndex &from, ///< [in] from index
342 HashIndex &dest, ///< [in] to index
f67539c2 343 const std::vector<std::string> &path, ///< [in] path to split
7c673cae
FG
344 uint32_t bits, ///< [in] num bits to match
345 uint32_t match, ///< [in] bits to match
346 unsigned *mkdirred ///< [in,out] path[:mkdirred] has been mkdirred
347 );
348
349
350 /**
f67539c2 351 * Get std::string representation of ghobject_t/hash
7c673cae
FG
352 *
353 * e.g: 0x01234567 -> "76543210"
354 */
f67539c2
TL
355 static std::string get_path_str(
356 const ghobject_t &oid ///< [in] Object to get hash std::string for
357 ); ///< @return Hash std::string for hoid.
7c673cae 358
f67539c2
TL
359 /// Get std::string from hash, @see get_path_str
360 static std::string get_hash_str(
7c673cae 361 uint32_t hash ///< [in] Hash to convert to a string.
f67539c2 362 ); ///< @return std::string representation of hash
7c673cae 363
f67539c2 364 /// Get hash from hash prefix std::string e.g. "FFFFAB" -> 0xFFFFAB00
7c673cae 365 static uint32_t hash_prefix_to_hash(
f67539c2 366 std::string prefix ///< [in] std::string to convert
7c673cae
FG
367 ); ///< @return Hash
368
369 /// Get hash mod from path
370 static void path_to_hobject_hash_prefix(
f67539c2 371 const std::vector<std::string> &path,///< [in] path to convert
7c673cae
FG
372 uint32_t *bits, ///< [out] bits
373 uint32_t *hash ///< [out] hash
374 ) {
f67539c2
TL
375 std::string hash_str;
376 for (auto i = path.begin(); i != path.end(); ++i) {
7c673cae
FG
377 hash_str.push_back(*i->begin());
378 }
379 uint32_t rev_hash = hash_prefix_to_hash(hash_str);
380 if (hash)
381 *hash = rev_hash;
382 if (bits)
383 *bits = path.size() * 4;
384 }
385
386 /// Calculate the number of bits.
387 static int calc_num_bits(uint64_t n) {
388 int ret = 0;
389 while (n > 0) {
390 n = n >> 1;
391 ret++;
392 }
393 return ret;
394 }
395
f67539c2
TL
396 /// Convert a number to hex std::string (upper case).
397 static std::string to_hex(int n) {
11fdf7f2 398 ceph_assert(n >= 0 && n < 16);
7c673cae 399 char c = (n <= 9 ? ('0' + n) : ('A' + n - 10));
f67539c2 400 std::string str;
7c673cae
FG
401 str.append(1, c);
402 return str;
403 }
404
405 struct CmpPairBitwise {
f67539c2
TL
406 bool operator()(const std::pair<std::string, ghobject_t>& l,
407 const std::pair<std::string, ghobject_t>& r) const
7c673cae
FG
408 {
409 if (l.first < r.first)
410 return true;
411 if (l.first > r.first)
412 return false;
413 if (cmp(l.second, r.second) < 0)
414 return true;
415 return false;
416 }
417 };
418
419 struct CmpHexdigitStringBitwise {
f67539c2 420 bool operator()(const std::string& l, const std::string& r) const {
7c673cae
FG
421 return reverse_hexdigit_bits_string(l) < reverse_hexdigit_bits_string(r);
422 }
423 };
424
425 /// Get path contents by hash
426 int get_path_contents_by_hash_bitwise(
f67539c2 427 const std::vector<std::string> &path, /// [in] Path to list
7c673cae 428 const ghobject_t *next_object, /// [in] list > *next_object
f67539c2
TL
429 std::set<std::string, CmpHexdigitStringBitwise> *hash_prefixes, /// [out] prefixes in dir
430 std::set<std::pair<std::string, ghobject_t>, CmpPairBitwise> *objects /// [out] objects
7c673cae
FG
431 );
432
433 /// List objects in collection in ghobject_t order
434 int list_by_hash(
f67539c2 435 const std::vector<std::string> &path, /// [in] Path to list
7c673cae
FG
436 const ghobject_t &end, /// [in] List only objects < end
437 int max_count, /// [in] List at most max_count
438 ghobject_t *next, /// [in,out] List objects >= *next
f67539c2 439 std::vector<ghobject_t> *out /// [out] Listed objects
7c673cae
FG
440 ); ///< @return Error Code, 0 on success
441 /// List objects in collection in ghobject_t order
442 int list_by_hash_bitwise(
f67539c2 443 const std::vector<std::string> &path, /// [in] Path to list
7c673cae
FG
444 const ghobject_t &end, /// [in] List only objects < end
445 int max_count, /// [in] List at most max_count
446 ghobject_t *next, /// [in,out] List objects >= *next
f67539c2 447 std::vector<ghobject_t> *out /// [out] Listed objects
7c673cae
FG
448 ); ///< @return Error Code, 0 on success
449
450 /// Create the given levels of sub directories from the given root.
451 /// The contents of *path* is not changed after calling this function.
f67539c2 452 int recursive_create_path(std::vector<std::string>& path, int level);
7c673cae
FG
453
454 /// split each dir below the given path
f67539c2 455 int split_dirs(const std::vector<std::string> &path, int target_level = 0);
224ce89b
WB
456
457 int write_settings();
7c673cae
FG
458};
459
460#endif