]> git.proxmox.com Git - ceph.git/blob - ceph/src/os/filestore/HashIndex.h
f9922bcb5fa77fe0bb4e7834f405df619660b754
[ceph.git] / ceph / src / os / filestore / HashIndex.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15 #ifndef CEPH_HASHINDEX_H
16 #define CEPH_HASHINDEX_H
17
18 #include "include/buffer_fwd.h"
19 #include "include/encoding.h"
20 #include "LFNIndex.h"
21
22 extern string reverse_hexdigit_bits_string(string l);
23
24 /**
25 * Implements collection prehashing.
26 *
27 * @verbatim
28 * (root) - 0 - 0
29 * - 1
30 * - E
31 * - 1
32 * - 2 - D - 0
33 * .
34 * .
35 * .
36 * - F - 0
37 * @endverbatim
38 *
39 * A file is located at the longest existing directory from the root
40 * given by the hex characters in the hash beginning with the least
41 * significant.
42 *
43 * ex: ghobject_t("object", CEPH_NO_SNAP, 0xA4CEE0D2)
44 * would be located in (root)/2/D/0/
45 *
46 * Subdirectories are created when the number of objects in a directory
47 * exceed (abs(merge_threshhold)) * 16 * split_multiplier. The number of objects in a directory
48 * is encoded as subdir_info_s in an xattr on the directory.
49 */
50 class HashIndex : public LFNIndex {
51 private:
52 /// Attribute name for storing subdir info @see subdir_info_s
53 static const string SUBDIR_ATTR;
54 /// Attribute name for storing in progress op tag
55 static const string IN_PROGRESS_OP_TAG;
56 /// Size (bits) in object hash
57 static const int PATH_HASH_LEN = 32;
58 /// Max length of hashed path
59 static const int MAX_HASH_LEVEL = (PATH_HASH_LEN/4);
60
61 /**
62 * Merges occur when the number of object drops below
63 * merge_threshold and splits occur when the number of objects
64 * exceeds 16 * abs(merge_threshold) * split_multiplier.
65 * Please note if merge_threshold is less than zero, it will never do merging
66 */
67 int merge_threshold;
68 int split_multiplier;
69
70 /// Encodes current subdir state for determining when to split/merge.
71 struct subdir_info_s {
72 uint64_t objs; ///< Objects in subdir.
73 uint32_t subdirs; ///< Subdirs in subdir.
74 uint32_t hash_level; ///< Hashlevel of subdir.
75
76 subdir_info_s() : objs(0), subdirs(0), hash_level(0) {}
77
78 void encode(bufferlist &bl) const
79 {
80 __u8 v = 1;
81 ::encode(v, bl);
82 ::encode(objs, bl);
83 ::encode(subdirs, bl);
84 ::encode(hash_level, bl);
85 }
86
87 void decode(bufferlist::iterator &bl)
88 {
89 __u8 v;
90 ::decode(v, bl);
91 assert(v == 1);
92 ::decode(objs, bl);
93 ::decode(subdirs, bl);
94 ::decode(hash_level, bl);
95 }
96 };
97
98 /// Encodes in progress split or merge
99 struct InProgressOp {
100 static const int SPLIT = 0;
101 static const int MERGE = 1;
102 static const int COL_SPLIT = 2;
103 int op;
104 vector<string> path;
105
106 InProgressOp(int op, const vector<string> &path)
107 : op(op), path(path) {}
108
109 explicit InProgressOp(bufferlist::iterator &bl) {
110 decode(bl);
111 }
112
113 bool is_split() const { return op == SPLIT; }
114 bool is_col_split() const { return op == COL_SPLIT; }
115 bool is_merge() const { return op == MERGE; }
116
117 void encode(bufferlist &bl) const {
118 __u8 v = 1;
119 ::encode(v, bl);
120 ::encode(op, bl);
121 ::encode(path, bl);
122 }
123
124 void decode(bufferlist::iterator &bl) {
125 __u8 v;
126 ::decode(v, bl);
127 assert(v == 1);
128 ::decode(op, bl);
129 ::decode(path, bl);
130 }
131 };
132
133
134 public:
135 /// Constructor.
136 HashIndex(
137 CephContext* cct,
138 coll_t collection, ///< [in] Collection
139 const char *base_path, ///< [in] Path to the index root.
140 int merge_at, ///< [in] Merge threshhold.
141 int split_multiple, ///< [in] Split threshhold.
142 uint32_t index_version,///< [in] Index version
143 double retry_probability=0) ///< [in] retry probability
144 : LFNIndex(cct, collection, base_path, index_version, retry_probability),
145 merge_threshold(merge_at),
146 split_multiplier(split_multiple) {}
147
148 /// @see CollectionIndex
149 uint32_t collection_version() override { return index_version; }
150
151 /// @see CollectionIndex
152 int cleanup() override;
153
154 /// @see CollectionIndex
155 int prep_delete() override;
156
157 /// @see CollectionIndex
158 int _split(
159 uint32_t match,
160 uint32_t bits,
161 CollectionIndex* dest
162 ) override;
163
164 /// @see CollectionIndex
165 int apply_layout_settings() override;
166
167 protected:
168 int _init() override;
169
170 int _created(
171 const vector<string> &path,
172 const ghobject_t &oid,
173 const string &mangled_name
174 ) override;
175 int _remove(
176 const vector<string> &path,
177 const ghobject_t &oid,
178 const string &mangled_name
179 ) override;
180 int _lookup(
181 const ghobject_t &oid,
182 vector<string> *path,
183 string *mangled_name,
184 int *hardlink
185 ) override;
186
187 /**
188 * Pre-hash the collection to create folders according to the expected number
189 * of objects in this collection.
190 */
191 int _pre_hash_collection(
192 uint32_t pg_num,
193 uint64_t expected_num_objs
194 ) override;
195
196 int _collection_list_partial(
197 const ghobject_t &start,
198 const ghobject_t &end,
199 int max_count,
200 vector<ghobject_t> *ls,
201 ghobject_t *next
202 ) override;
203 private:
204 /// Internal recursively remove path and its subdirs
205 int _recursive_remove(
206 const vector<string> &path, ///< [in] path to remove
207 bool top ///< [in] internal tracking of first caller
208 ); /// @return Error Code, 0 on success
209 /// Recursively remove path and its subdirs
210 int recursive_remove(
211 const vector<string> &path ///< [in] path to remove
212 ); /// @return Error Code, 0 on success
213 /// Tag root directory at beginning of col_split
214 int start_col_split(
215 const vector<string> &path ///< [in] path to split
216 ); ///< @return Error Code, 0 on success
217 /// Tag root directory at beginning of split
218 int start_split(
219 const vector<string> &path ///< [in] path to split
220 ); ///< @return Error Code, 0 on success
221 /// Tag root directory at beginning of split
222 int start_merge(
223 const vector<string> &path ///< [in] path to merge
224 ); ///< @return Error Code, 0 on success
225 /// Remove tag at end of split or merge
226 int end_split_or_merge(
227 const vector<string> &path ///< [in] path to split or merged
228 ); ///< @return Error Code, 0 on success
229 /// Gets info from the xattr on the subdir represented by path
230 int get_info(
231 const vector<string> &path, ///< [in] Path from which to read attribute.
232 subdir_info_s *info ///< [out] Attribute value
233 ); /// @return Error Code, 0 on success
234
235 /// Sets info to the xattr on the subdir represented by path
236 int set_info(
237 const vector<string> &path, ///< [in] Path on which to set attribute.
238 const subdir_info_s &info ///< [in] Value to set
239 ); /// @return Error Code, 0 on success
240
241 /// Encapsulates logic for when to split.
242 bool must_merge(
243 const subdir_info_s &info ///< [in] Info to check
244 ); /// @return True if info must be merged, False otherwise
245
246 /// Encapsulates logic for when to merge.
247 bool must_split(
248 const subdir_info_s &info ///< [in] Info to check
249 ); /// @return True if info must be split, False otherwise
250
251 /// Initiates merge
252 int initiate_merge(
253 const vector<string> &path, ///< [in] Subdir to merge
254 subdir_info_s info ///< [in] Info attached to path
255 ); /// @return Error Code, 0 on success
256
257 /// Completes merge
258 int complete_merge(
259 const vector<string> &path, ///< [in] Subdir to merge
260 subdir_info_s info ///< [in] Info attached to path
261 ); /// @return Error Code, 0 on success
262
263 /// Resets attr to match actual subdir contents
264 int reset_attr(
265 const vector<string> &path ///< [in] path to cleanup
266 );
267
268 /// Initiate Split
269 int initiate_split(
270 const vector<string> &path, ///< [in] Subdir to split
271 subdir_info_s info ///< [in] Info attached to path
272 ); /// @return Error Code, 0 on success
273
274 /// Completes Split
275 int complete_split(
276 const vector<string> &path, ///< [in] Subdir to split
277 subdir_info_s info ///< [in] Info attached to path
278 ); /// @return Error Code, 0 on success
279
280 /// Determine path components from hoid hash
281 void get_path_components(
282 const ghobject_t &oid, ///< [in] Object for which to get path components
283 vector<string> *path ///< [out] Path components for hoid.
284 );
285
286 /// Pre-hash and split folders to avoid runtime splitting
287 /// according to the given expected object number.
288 int pre_split_folder(uint32_t pg_num, uint64_t expected_num_objs);
289
290 /// Initialize the folder (dir info) with the given hash
291 /// level and number of its subdirs.
292 int init_split_folder(vector<string> &path, uint32_t hash_level);
293
294 /// do collection split for path
295 static int col_split_level(
296 HashIndex &from, ///< [in] from index
297 HashIndex &dest, ///< [in] to index
298 const vector<string> &path, ///< [in] path to split
299 uint32_t bits, ///< [in] num bits to match
300 uint32_t match, ///< [in] bits to match
301 unsigned *mkdirred ///< [in,out] path[:mkdirred] has been mkdirred
302 );
303
304
305 /**
306 * Get string representation of ghobject_t/hash
307 *
308 * e.g: 0x01234567 -> "76543210"
309 */
310 static string get_path_str(
311 const ghobject_t &oid ///< [in] Object to get hash string for
312 ); ///< @return Hash string for hoid.
313
314 /// Get string from hash, @see get_path_str
315 static string get_hash_str(
316 uint32_t hash ///< [in] Hash to convert to a string.
317 ); ///< @return String representation of hash
318
319 /// Get hash from hash prefix string e.g. "FFFFAB" -> 0xFFFFAB00
320 static uint32_t hash_prefix_to_hash(
321 string prefix ///< [in] string to convert
322 ); ///< @return Hash
323
324 /// Get hash mod from path
325 static void path_to_hobject_hash_prefix(
326 const vector<string> &path,///< [in] path to convert
327 uint32_t *bits, ///< [out] bits
328 uint32_t *hash ///< [out] hash
329 ) {
330 string hash_str;
331 for (vector<string>::const_iterator i = path.begin();
332 i != path.end();
333 ++i) {
334 hash_str.push_back(*i->begin());
335 }
336 uint32_t rev_hash = hash_prefix_to_hash(hash_str);
337 if (hash)
338 *hash = rev_hash;
339 if (bits)
340 *bits = path.size() * 4;
341 }
342
343 /// Calculate the number of bits.
344 static int calc_num_bits(uint64_t n) {
345 int ret = 0;
346 while (n > 0) {
347 n = n >> 1;
348 ret++;
349 }
350 return ret;
351 }
352
353 /// Convert a number to hex string (upper case).
354 static string to_hex(int n) {
355 assert(n >= 0 && n < 16);
356 char c = (n <= 9 ? ('0' + n) : ('A' + n - 10));
357 string str;
358 str.append(1, c);
359 return str;
360 }
361
362 struct CmpPairBitwise {
363 bool operator()(const pair<string, ghobject_t>& l,
364 const pair<string, ghobject_t>& r)
365 {
366 if (l.first < r.first)
367 return true;
368 if (l.first > r.first)
369 return false;
370 if (cmp(l.second, r.second) < 0)
371 return true;
372 return false;
373 }
374 };
375
376 struct CmpHexdigitStringBitwise {
377 bool operator()(const string& l, const string& r) {
378 return reverse_hexdigit_bits_string(l) < reverse_hexdigit_bits_string(r);
379 }
380 };
381
382 /// Get path contents by hash
383 int get_path_contents_by_hash_bitwise(
384 const vector<string> &path, /// [in] Path to list
385 const ghobject_t *next_object, /// [in] list > *next_object
386 set<string, CmpHexdigitStringBitwise> *hash_prefixes, /// [out] prefixes in dir
387 set<pair<string, ghobject_t>, CmpPairBitwise> *objects /// [out] objects
388 );
389
390 /// List objects in collection in ghobject_t order
391 int list_by_hash(
392 const vector<string> &path, /// [in] Path to list
393 const ghobject_t &end, /// [in] List only objects < end
394 int max_count, /// [in] List at most max_count
395 ghobject_t *next, /// [in,out] List objects >= *next
396 vector<ghobject_t> *out /// [out] Listed objects
397 ); ///< @return Error Code, 0 on success
398 /// List objects in collection in ghobject_t order
399 int list_by_hash_bitwise(
400 const vector<string> &path, /// [in] Path to list
401 const ghobject_t &end, /// [in] List only objects < end
402 int max_count, /// [in] List at most max_count
403 ghobject_t *next, /// [in,out] List objects >= *next
404 vector<ghobject_t> *out /// [out] Listed objects
405 ); ///< @return Error Code, 0 on success
406
407 /// Create the given levels of sub directories from the given root.
408 /// The contents of *path* is not changed after calling this function.
409 int recursive_create_path(vector<string>& path, int level);
410
411 /// split each dir below the given path
412 int split_dirs(const vector<string> &path);
413 };
414
415 #endif