1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
15 #ifndef CEPH_HASHINDEX_H
16 #define CEPH_HASHINDEX_H
18 #include "include/buffer_fwd.h"
19 #include "include/encoding.h"
22 extern string
reverse_hexdigit_bits_string(string l
);
25 * Implements collection prehashing.
39 * A file is located at the longest existing directory from the root
40 * given by the hex characters in the hash beginning with the least
43 * ex: ghobject_t("object", CEPH_NO_SNAP, 0xA4CEE0D2)
44 * would be located in (root)/2/D/0/
46 * Subdirectories are created when the number of objects in a
47 * directory exceed 16 * (abs(merge_threshhold)) * split_multiplier +
48 * split_rand_factor). The number of objects in a directory is encoded
49 * as subdir_info_s in an xattr on the directory.
51 class HashIndex
: public LFNIndex
{
53 /// Attribute name for storing subdir info @see subdir_info_s
54 static const string SUBDIR_ATTR
;
55 /// Attribute name for storing index-wide settings
56 static const string SETTINGS_ATTR
;
57 /// Attribute name for storing in progress op tag
58 static const string IN_PROGRESS_OP_TAG
;
59 /// Size (bits) in object hash
60 static const int PATH_HASH_LEN
= 32;
61 /// Max length of hashed path
62 static const int MAX_HASH_LEVEL
= (PATH_HASH_LEN
/4);
65 * Merges occur when the number of object drops below
66 * merge_threshold and splits occur when the number of objects
69 * 16 * (abs(merge_threshold) * split_multiplier + split_rand_factor)
71 * Please note if merge_threshold is less than zero, it will never
77 /// Encodes current subdir state for determining when to split/merge.
78 struct subdir_info_s
{
79 uint64_t objs
; ///< Objects in subdir.
80 uint32_t subdirs
; ///< Subdirs in subdir.
81 uint32_t hash_level
; ///< Hashlevel of subdir.
83 subdir_info_s() : objs(0), subdirs(0), hash_level(0) {}
85 void encode(bufferlist
&bl
) const
90 ::encode(subdirs
, bl
);
91 ::encode(hash_level
, bl
);
94 void decode(bufferlist::iterator
&bl
)
100 ::decode(subdirs
, bl
);
101 ::decode(hash_level
, bl
);
106 uint32_t split_rand_factor
; ///< random factor added to split threshold (only on root of collection)
107 settings_s() : split_rand_factor(0) {}
108 void encode(bufferlist
&bl
) const
112 ::encode(split_rand_factor
, bl
);
114 void decode(bufferlist::iterator
&bl
)
118 ::decode(split_rand_factor
, bl
);
122 /// Encodes in progress split or merge
123 struct InProgressOp
{
124 static const int SPLIT
= 0;
125 static const int MERGE
= 1;
126 static const int COL_SPLIT
= 2;
130 InProgressOp(int op
, const vector
<string
> &path
)
131 : op(op
), path(path
) {}
133 explicit InProgressOp(bufferlist::iterator
&bl
) {
137 bool is_split() const { return op
== SPLIT
; }
138 bool is_col_split() const { return op
== COL_SPLIT
; }
139 bool is_merge() const { return op
== MERGE
; }
141 void encode(bufferlist
&bl
) const {
148 void decode(bufferlist::iterator
&bl
) {
162 coll_t collection
, ///< [in] Collection
163 const char *base_path
, ///< [in] Path to the index root.
164 int merge_at
, ///< [in] Merge threshhold.
165 int split_multiple
, ///< [in] Split threshhold.
166 uint32_t index_version
,///< [in] Index version
167 double retry_probability
=0) ///< [in] retry probability
168 : LFNIndex(cct
, collection
, base_path
, index_version
, retry_probability
),
169 merge_threshold(merge_at
),
170 split_multiplier(split_multiple
)
173 int read_settings() override
;
175 /// @see CollectionIndex
176 uint32_t collection_version() override
{ return index_version
; }
178 /// @see CollectionIndex
179 int cleanup() override
;
181 /// @see CollectionIndex
182 int prep_delete() override
;
184 /// @see CollectionIndex
188 CollectionIndex
* dest
191 /// @see CollectionIndex
192 int apply_layout_settings() override
;
195 int _init() override
;
198 const vector
<string
> &path
,
199 const ghobject_t
&oid
,
200 const string
&mangled_name
203 const vector
<string
> &path
,
204 const ghobject_t
&oid
,
205 const string
&mangled_name
208 const ghobject_t
&oid
,
209 vector
<string
> *path
,
210 string
*mangled_name
,
215 * Pre-hash the collection to create folders according to the expected number
216 * of objects in this collection.
218 int _pre_hash_collection(
220 uint64_t expected_num_objs
223 int _collection_list_partial(
224 const ghobject_t
&start
,
225 const ghobject_t
&end
,
227 vector
<ghobject_t
> *ls
,
231 /// Internal recursively remove path and its subdirs
232 int _recursive_remove(
233 const vector
<string
> &path
, ///< [in] path to remove
234 bool top
///< [in] internal tracking of first caller
235 ); /// @return Error Code, 0 on success
236 /// Recursively remove path and its subdirs
237 int recursive_remove(
238 const vector
<string
> &path
///< [in] path to remove
239 ); /// @return Error Code, 0 on success
240 /// Tag root directory at beginning of col_split
242 const vector
<string
> &path
///< [in] path to split
243 ); ///< @return Error Code, 0 on success
244 /// Tag root directory at beginning of split
246 const vector
<string
> &path
///< [in] path to split
247 ); ///< @return Error Code, 0 on success
248 /// Tag root directory at beginning of split
250 const vector
<string
> &path
///< [in] path to merge
251 ); ///< @return Error Code, 0 on success
252 /// Remove tag at end of split or merge
253 int end_split_or_merge(
254 const vector
<string
> &path
///< [in] path to split or merged
255 ); ///< @return Error Code, 0 on success
256 /// Gets info from the xattr on the subdir represented by path
258 const vector
<string
> &path
, ///< [in] Path from which to read attribute.
259 subdir_info_s
*info
///< [out] Attribute value
260 ); /// @return Error Code, 0 on success
262 /// Sets info to the xattr on the subdir represented by path
264 const vector
<string
> &path
, ///< [in] Path on which to set attribute.
265 const subdir_info_s
&info
///< [in] Value to set
266 ); /// @return Error Code, 0 on success
268 /// Encapsulates logic for when to split.
270 const subdir_info_s
&info
///< [in] Info to check
271 ); /// @return True if info must be merged, False otherwise
273 /// Encapsulates logic for when to merge.
275 const subdir_info_s
&info
///< [in] Info to check
276 ); /// @return True if info must be split, False otherwise
280 const vector
<string
> &path
, ///< [in] Subdir to merge
281 subdir_info_s info
///< [in] Info attached to path
282 ); /// @return Error Code, 0 on success
286 const vector
<string
> &path
, ///< [in] Subdir to merge
287 subdir_info_s info
///< [in] Info attached to path
288 ); /// @return Error Code, 0 on success
290 /// Resets attr to match actual subdir contents
292 const vector
<string
> &path
///< [in] path to cleanup
297 const vector
<string
> &path
, ///< [in] Subdir to split
298 subdir_info_s info
///< [in] Info attached to path
299 ); /// @return Error Code, 0 on success
303 const vector
<string
> &path
, ///< [in] Subdir to split
304 subdir_info_s info
///< [in] Info attached to path
305 ); /// @return Error Code, 0 on success
307 /// Determine path components from hoid hash
308 void get_path_components(
309 const ghobject_t
&oid
, ///< [in] Object for which to get path components
310 vector
<string
> *path
///< [out] Path components for hoid.
313 /// Pre-hash and split folders to avoid runtime splitting
314 /// according to the given expected object number.
315 int pre_split_folder(uint32_t pg_num
, uint64_t expected_num_objs
);
317 /// Initialize the folder (dir info) with the given hash
318 /// level and number of its subdirs.
319 int init_split_folder(vector
<string
> &path
, uint32_t hash_level
);
321 /// do collection split for path
322 static int col_split_level(
323 HashIndex
&from
, ///< [in] from index
324 HashIndex
&dest
, ///< [in] to index
325 const vector
<string
> &path
, ///< [in] path to split
326 uint32_t bits
, ///< [in] num bits to match
327 uint32_t match
, ///< [in] bits to match
328 unsigned *mkdirred
///< [in,out] path[:mkdirred] has been mkdirred
333 * Get string representation of ghobject_t/hash
335 * e.g: 0x01234567 -> "76543210"
337 static string
get_path_str(
338 const ghobject_t
&oid
///< [in] Object to get hash string for
339 ); ///< @return Hash string for hoid.
341 /// Get string from hash, @see get_path_str
342 static string
get_hash_str(
343 uint32_t hash
///< [in] Hash to convert to a string.
344 ); ///< @return String representation of hash
346 /// Get hash from hash prefix string e.g. "FFFFAB" -> 0xFFFFAB00
347 static uint32_t hash_prefix_to_hash(
348 string prefix
///< [in] string to convert
351 /// Get hash mod from path
352 static void path_to_hobject_hash_prefix(
353 const vector
<string
> &path
,///< [in] path to convert
354 uint32_t *bits
, ///< [out] bits
355 uint32_t *hash
///< [out] hash
358 for (vector
<string
>::const_iterator i
= path
.begin();
361 hash_str
.push_back(*i
->begin());
363 uint32_t rev_hash
= hash_prefix_to_hash(hash_str
);
367 *bits
= path
.size() * 4;
370 /// Calculate the number of bits.
371 static int calc_num_bits(uint64_t n
) {
380 /// Convert a number to hex string (upper case).
381 static string
to_hex(int n
) {
382 assert(n
>= 0 && n
< 16);
383 char c
= (n
<= 9 ? ('0' + n
) : ('A' + n
- 10));
389 struct CmpPairBitwise
{
390 bool operator()(const pair
<string
, ghobject_t
>& l
,
391 const pair
<string
, ghobject_t
>& r
)
393 if (l
.first
< r
.first
)
395 if (l
.first
> r
.first
)
397 if (cmp(l
.second
, r
.second
) < 0)
403 struct CmpHexdigitStringBitwise
{
404 bool operator()(const string
& l
, const string
& r
) {
405 return reverse_hexdigit_bits_string(l
) < reverse_hexdigit_bits_string(r
);
409 /// Get path contents by hash
410 int get_path_contents_by_hash_bitwise(
411 const vector
<string
> &path
, /// [in] Path to list
412 const ghobject_t
*next_object
, /// [in] list > *next_object
413 set
<string
, CmpHexdigitStringBitwise
> *hash_prefixes
, /// [out] prefixes in dir
414 set
<pair
<string
, ghobject_t
>, CmpPairBitwise
> *objects
/// [out] objects
417 /// List objects in collection in ghobject_t order
419 const vector
<string
> &path
, /// [in] Path to list
420 const ghobject_t
&end
, /// [in] List only objects < end
421 int max_count
, /// [in] List at most max_count
422 ghobject_t
*next
, /// [in,out] List objects >= *next
423 vector
<ghobject_t
> *out
/// [out] Listed objects
424 ); ///< @return Error Code, 0 on success
425 /// List objects in collection in ghobject_t order
426 int list_by_hash_bitwise(
427 const vector
<string
> &path
, /// [in] Path to list
428 const ghobject_t
&end
, /// [in] List only objects < end
429 int max_count
, /// [in] List at most max_count
430 ghobject_t
*next
, /// [in,out] List objects >= *next
431 vector
<ghobject_t
> *out
/// [out] Listed objects
432 ); ///< @return Error Code, 0 on success
434 /// Create the given levels of sub directories from the given root.
435 /// The contents of *path* is not changed after calling this function.
436 int recursive_create_path(vector
<string
>& path
, int level
);
438 /// split each dir below the given path
439 int split_dirs(const vector
<string
> &path
);
441 int write_settings();