1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
15 #ifndef CEPH_HASHINDEX_H
16 #define CEPH_HASHINDEX_H
18 #include "include/buffer_fwd.h"
19 #include "include/encoding.h"
22 extern string
reverse_hexdigit_bits_string(string l
);
25 * Implements collection prehashing.
39 * A file is located at the longest existing directory from the root
40 * given by the hex characters in the hash beginning with the least
43 * ex: ghobject_t("object", CEPH_NO_SNAP, 0xA4CEE0D2)
44 * would be located in (root)/2/D/0/
46 * Subdirectories are created when the number of objects in a
47 * directory exceed 16 * (abs(merge_threshhold) * split_multiplier +
48 * split_rand_factor). The number of objects in a directory is encoded
49 * as subdir_info_s in an xattr on the directory.
51 class HashIndex
: public LFNIndex
{
53 /// Attribute name for storing subdir info @see subdir_info_s
54 static const string SUBDIR_ATTR
;
55 /// Attribute name for storing index-wide settings
56 static const string SETTINGS_ATTR
;
57 /// Attribute name for storing in progress op tag
58 static const string IN_PROGRESS_OP_TAG
;
59 /// Size (bits) in object hash
60 static const int PATH_HASH_LEN
= 32;
61 /// Max length of hashed path
62 static const int MAX_HASH_LEVEL
= (PATH_HASH_LEN
/4);
65 * Merges occur when the number of object drops below
66 * merge_threshold and splits occur when the number of objects
69 * 16 * (abs(merge_threshold) * split_multiplier + split_rand_factor)
71 * Please note if merge_threshold is less than zero, it will never
77 /// Encodes current subdir state for determining when to split/merge.
78 struct subdir_info_s
{
79 uint64_t objs
; ///< Objects in subdir.
80 uint32_t subdirs
; ///< Subdirs in subdir.
81 uint32_t hash_level
; ///< Hashlevel of subdir.
83 subdir_info_s() : objs(0), subdirs(0), hash_level(0) {}
85 void encode(bufferlist
&bl
) const
92 encode(hash_level
, bl
);
95 void decode(bufferlist::const_iterator
&bl
)
103 decode(hash_level
, bl
);
108 uint32_t split_rand_factor
; ///< random factor added to split threshold (only on root of collection)
109 settings_s() : split_rand_factor(0) {}
110 void encode(bufferlist
&bl
) const
115 encode(split_rand_factor
, bl
);
117 void decode(bufferlist::const_iterator
&bl
)
122 decode(split_rand_factor
, bl
);
126 /// Encodes in progress split or merge
127 struct InProgressOp
{
128 static const int SPLIT
= 0;
129 static const int MERGE
= 1;
130 static const int COL_SPLIT
= 2;
134 InProgressOp(int op
, const vector
<string
> &path
)
135 : op(op
), path(path
) {}
137 explicit InProgressOp(bufferlist::const_iterator
&bl
) {
141 bool is_split() const { return op
== SPLIT
; }
142 bool is_col_split() const { return op
== COL_SPLIT
; }
143 bool is_merge() const { return op
== MERGE
; }
145 void encode(bufferlist
&bl
) const {
153 void decode(bufferlist::const_iterator
&bl
) {
168 coll_t collection
, ///< [in] Collection
169 const char *base_path
, ///< [in] Path to the index root.
170 int merge_at
, ///< [in] Merge threshold.
171 int split_multiple
, ///< [in] Split threshold.
172 uint32_t index_version
,///< [in] Index version
173 double retry_probability
=0) ///< [in] retry probability
174 : LFNIndex(cct
, collection
, base_path
, index_version
, retry_probability
),
175 merge_threshold(merge_at
),
176 split_multiplier(split_multiple
)
179 int read_settings() override
;
181 /// @see CollectionIndex
182 uint32_t collection_version() override
{ return index_version
; }
184 /// @see CollectionIndex
185 int cleanup() override
;
187 /// @see CollectionIndex
188 int prep_delete() override
;
190 /// @see CollectionIndex
194 CollectionIndex
* dest
197 /// @see CollectionIndex
200 CollectionIndex
* dest
206 const vector
<string
>& path
);
208 /// @see CollectionIndex
209 int apply_layout_settings(int target_level
) override
;
212 int _init() override
;
215 const vector
<string
> &path
,
216 const ghobject_t
&oid
,
217 const string
&mangled_name
220 const vector
<string
> &path
,
221 const ghobject_t
&oid
,
222 const string
&mangled_name
225 const ghobject_t
&oid
,
226 vector
<string
> *path
,
227 string
*mangled_name
,
232 * Pre-hash the collection to create folders according to the expected number
233 * of objects in this collection.
235 int _pre_hash_collection(
237 uint64_t expected_num_objs
240 int _collection_list_partial(
241 const ghobject_t
&start
,
242 const ghobject_t
&end
,
244 vector
<ghobject_t
> *ls
,
248 /// Internal recursively remove path and its subdirs
249 int _recursive_remove(
250 const vector
<string
> &path
, ///< [in] path to remove
251 bool top
///< [in] internal tracking of first caller
252 ); /// @return Error Code, 0 on success
253 /// Recursively remove path and its subdirs
254 int recursive_remove(
255 const vector
<string
> &path
///< [in] path to remove
256 ); /// @return Error Code, 0 on success
257 /// Tag root directory at beginning of col_split
259 const vector
<string
> &path
///< [in] path to split
260 ); ///< @return Error Code, 0 on success
261 /// Tag root directory at beginning of split
263 const vector
<string
> &path
///< [in] path to split
264 ); ///< @return Error Code, 0 on success
265 /// Tag root directory at beginning of split
267 const vector
<string
> &path
///< [in] path to merge
268 ); ///< @return Error Code, 0 on success
269 /// Remove tag at end of split or merge
270 int end_split_or_merge(
271 const vector
<string
> &path
///< [in] path to split or merged
272 ); ///< @return Error Code, 0 on success
273 /// Gets info from the xattr on the subdir represented by path
275 const vector
<string
> &path
, ///< [in] Path from which to read attribute.
276 subdir_info_s
*info
///< [out] Attribute value
277 ); /// @return Error Code, 0 on success
279 /// Sets info to the xattr on the subdir represented by path
281 const vector
<string
> &path
, ///< [in] Path on which to set attribute.
282 const subdir_info_s
&info
///< [in] Value to set
283 ); /// @return Error Code, 0 on success
285 /// Encapsulates logic for when to split.
287 const subdir_info_s
&info
///< [in] Info to check
288 ); /// @return True if info must be merged, False otherwise
290 /// Encapsulates logic for when to merge.
292 const subdir_info_s
&info
, ///< [in] Info to check
294 ); /// @return True if info must be split, False otherwise
298 const vector
<string
> &path
, ///< [in] Subdir to merge
299 subdir_info_s info
///< [in] Info attached to path
300 ); /// @return Error Code, 0 on success
304 const vector
<string
> &path
, ///< [in] Subdir to merge
305 subdir_info_s info
///< [in] Info attached to path
306 ); /// @return Error Code, 0 on success
308 /// Resets attr to match actual subdir contents
310 const vector
<string
> &path
///< [in] path to cleanup
315 const vector
<string
> &path
, ///< [in] Subdir to split
316 subdir_info_s info
///< [in] Info attached to path
317 ); /// @return Error Code, 0 on success
321 const vector
<string
> &path
, ///< [in] Subdir to split
322 subdir_info_s info
///< [in] Info attached to path
323 ); /// @return Error Code, 0 on success
325 /// Determine path components from hoid hash
326 void get_path_components(
327 const ghobject_t
&oid
, ///< [in] Object for which to get path components
328 vector
<string
> *path
///< [out] Path components for hoid.
331 /// Pre-hash and split folders to avoid runtime splitting
332 /// according to the given expected object number.
333 int pre_split_folder(uint32_t pg_num
, uint64_t expected_num_objs
);
335 /// Initialize the folder (dir info) with the given hash
336 /// level and number of its subdirs.
337 int init_split_folder(vector
<string
> &path
, uint32_t hash_level
);
339 /// do collection split for path
340 static int col_split_level(
341 HashIndex
&from
, ///< [in] from index
342 HashIndex
&dest
, ///< [in] to index
343 const vector
<string
> &path
, ///< [in] path to split
344 uint32_t bits
, ///< [in] num bits to match
345 uint32_t match
, ///< [in] bits to match
346 unsigned *mkdirred
///< [in,out] path[:mkdirred] has been mkdirred
351 * Get string representation of ghobject_t/hash
353 * e.g: 0x01234567 -> "76543210"
355 static string
get_path_str(
356 const ghobject_t
&oid
///< [in] Object to get hash string for
357 ); ///< @return Hash string for hoid.
359 /// Get string from hash, @see get_path_str
360 static string
get_hash_str(
361 uint32_t hash
///< [in] Hash to convert to a string.
362 ); ///< @return String representation of hash
364 /// Get hash from hash prefix string e.g. "FFFFAB" -> 0xFFFFAB00
365 static uint32_t hash_prefix_to_hash(
366 string prefix
///< [in] string to convert
369 /// Get hash mod from path
370 static void path_to_hobject_hash_prefix(
371 const vector
<string
> &path
,///< [in] path to convert
372 uint32_t *bits
, ///< [out] bits
373 uint32_t *hash
///< [out] hash
376 for (vector
<string
>::const_iterator i
= path
.begin();
379 hash_str
.push_back(*i
->begin());
381 uint32_t rev_hash
= hash_prefix_to_hash(hash_str
);
385 *bits
= path
.size() * 4;
388 /// Calculate the number of bits.
389 static int calc_num_bits(uint64_t n
) {
398 /// Convert a number to hex string (upper case).
399 static string
to_hex(int n
) {
400 ceph_assert(n
>= 0 && n
< 16);
401 char c
= (n
<= 9 ? ('0' + n
) : ('A' + n
- 10));
407 struct CmpPairBitwise
{
408 bool operator()(const pair
<string
, ghobject_t
>& l
,
409 const pair
<string
, ghobject_t
>& r
) const
411 if (l
.first
< r
.first
)
413 if (l
.first
> r
.first
)
415 if (cmp(l
.second
, r
.second
) < 0)
421 struct CmpHexdigitStringBitwise
{
422 bool operator()(const string
& l
, const string
& r
) const {
423 return reverse_hexdigit_bits_string(l
) < reverse_hexdigit_bits_string(r
);
427 /// Get path contents by hash
428 int get_path_contents_by_hash_bitwise(
429 const vector
<string
> &path
, /// [in] Path to list
430 const ghobject_t
*next_object
, /// [in] list > *next_object
431 set
<string
, CmpHexdigitStringBitwise
> *hash_prefixes
, /// [out] prefixes in dir
432 set
<pair
<string
, ghobject_t
>, CmpPairBitwise
> *objects
/// [out] objects
435 /// List objects in collection in ghobject_t order
437 const vector
<string
> &path
, /// [in] Path to list
438 const ghobject_t
&end
, /// [in] List only objects < end
439 int max_count
, /// [in] List at most max_count
440 ghobject_t
*next
, /// [in,out] List objects >= *next
441 vector
<ghobject_t
> *out
/// [out] Listed objects
442 ); ///< @return Error Code, 0 on success
443 /// List objects in collection in ghobject_t order
444 int list_by_hash_bitwise(
445 const vector
<string
> &path
, /// [in] Path to list
446 const ghobject_t
&end
, /// [in] List only objects < end
447 int max_count
, /// [in] List at most max_count
448 ghobject_t
*next
, /// [in,out] List objects >= *next
449 vector
<ghobject_t
> *out
/// [out] Listed objects
450 ); ///< @return Error Code, 0 on success
452 /// Create the given levels of sub directories from the given root.
453 /// The contents of *path* is not changed after calling this function.
454 int recursive_create_path(vector
<string
>& path
, int level
);
456 /// split each dir below the given path
457 int split_dirs(const vector
<string
> &path
, int target_level
= 0);
459 int write_settings();