1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
15 #ifndef CEPH_HASHINDEX_H
16 #define CEPH_HASHINDEX_H
18 #include "include/buffer_fwd.h"
19 #include "include/encoding.h"
22 extern string
reverse_hexdigit_bits_string(string l
);
25 * Implements collection prehashing.
39 * A file is located at the longest existing directory from the root
40 * given by the hex characters in the hash beginning with the least
43 * ex: ghobject_t("object", CEPH_NO_SNAP, 0xA4CEE0D2)
44 * would be located in (root)/2/D/0/
46 * Subdirectories are created when the number of objects in a directory
47 * exceed (abs(merge_threshhold)) * 16 * split_multiplier. The number of objects in a directory
48 * is encoded as subdir_info_s in an xattr on the directory.
50 class HashIndex
: public LFNIndex
{
52 /// Attribute name for storing subdir info @see subdir_info_s
53 static const string SUBDIR_ATTR
;
54 /// Attribute name for storing in progress op tag
55 static const string IN_PROGRESS_OP_TAG
;
56 /// Size (bits) in object hash
57 static const int PATH_HASH_LEN
= 32;
58 /// Max length of hashed path
59 static const int MAX_HASH_LEVEL
= (PATH_HASH_LEN
/4);
62 * Merges occur when the number of object drops below
63 * merge_threshold and splits occur when the number of objects
64 * exceeds 16 * abs(merge_threshold) * split_multiplier.
65 * Please note if merge_threshold is less than zero, it will never do merging
70 /// Encodes current subdir state for determining when to split/merge.
71 struct subdir_info_s
{
72 uint64_t objs
; ///< Objects in subdir.
73 uint32_t subdirs
; ///< Subdirs in subdir.
74 uint32_t hash_level
; ///< Hashlevel of subdir.
76 subdir_info_s() : objs(0), subdirs(0), hash_level(0) {}
78 void encode(bufferlist
&bl
) const
83 ::encode(subdirs
, bl
);
84 ::encode(hash_level
, bl
);
87 void decode(bufferlist::iterator
&bl
)
93 ::decode(subdirs
, bl
);
94 ::decode(hash_level
, bl
);
98 /// Encodes in progress split or merge
100 static const int SPLIT
= 0;
101 static const int MERGE
= 1;
102 static const int COL_SPLIT
= 2;
106 InProgressOp(int op
, const vector
<string
> &path
)
107 : op(op
), path(path
) {}
109 explicit InProgressOp(bufferlist::iterator
&bl
) {
113 bool is_split() const { return op
== SPLIT
; }
114 bool is_col_split() const { return op
== COL_SPLIT
; }
115 bool is_merge() const { return op
== MERGE
; }
117 void encode(bufferlist
&bl
) const {
124 void decode(bufferlist::iterator
&bl
) {
138 coll_t collection
, ///< [in] Collection
139 const char *base_path
, ///< [in] Path to the index root.
140 int merge_at
, ///< [in] Merge threshhold.
141 int split_multiple
, ///< [in] Split threshhold.
142 uint32_t index_version
,///< [in] Index version
143 double retry_probability
=0) ///< [in] retry probability
144 : LFNIndex(cct
, collection
, base_path
, index_version
, retry_probability
),
145 merge_threshold(merge_at
),
146 split_multiplier(split_multiple
) {}
148 /// @see CollectionIndex
149 uint32_t collection_version() override
{ return index_version
; }
151 /// @see CollectionIndex
152 int cleanup() override
;
154 /// @see CollectionIndex
155 int prep_delete() override
;
157 /// @see CollectionIndex
161 CollectionIndex
* dest
164 /// @see CollectionIndex
165 int apply_layout_settings() override
;
168 int _init() override
;
171 const vector
<string
> &path
,
172 const ghobject_t
&oid
,
173 const string
&mangled_name
176 const vector
<string
> &path
,
177 const ghobject_t
&oid
,
178 const string
&mangled_name
181 const ghobject_t
&oid
,
182 vector
<string
> *path
,
183 string
*mangled_name
,
188 * Pre-hash the collection to create folders according to the expected number
189 * of objects in this collection.
191 int _pre_hash_collection(
193 uint64_t expected_num_objs
196 int _collection_list_partial(
197 const ghobject_t
&start
,
198 const ghobject_t
&end
,
200 vector
<ghobject_t
> *ls
,
204 /// Internal recursively remove path and its subdirs
205 int _recursive_remove(
206 const vector
<string
> &path
, ///< [in] path to remove
207 bool top
///< [in] internal tracking of first caller
208 ); /// @return Error Code, 0 on success
209 /// Recursively remove path and its subdirs
210 int recursive_remove(
211 const vector
<string
> &path
///< [in] path to remove
212 ); /// @return Error Code, 0 on success
213 /// Tag root directory at beginning of col_split
215 const vector
<string
> &path
///< [in] path to split
216 ); ///< @return Error Code, 0 on success
217 /// Tag root directory at beginning of split
219 const vector
<string
> &path
///< [in] path to split
220 ); ///< @return Error Code, 0 on success
221 /// Tag root directory at beginning of split
223 const vector
<string
> &path
///< [in] path to merge
224 ); ///< @return Error Code, 0 on success
225 /// Remove tag at end of split or merge
226 int end_split_or_merge(
227 const vector
<string
> &path
///< [in] path to split or merged
228 ); ///< @return Error Code, 0 on success
229 /// Gets info from the xattr on the subdir represented by path
231 const vector
<string
> &path
, ///< [in] Path from which to read attribute.
232 subdir_info_s
*info
///< [out] Attribute value
233 ); /// @return Error Code, 0 on success
235 /// Sets info to the xattr on the subdir represented by path
237 const vector
<string
> &path
, ///< [in] Path on which to set attribute.
238 const subdir_info_s
&info
///< [in] Value to set
239 ); /// @return Error Code, 0 on success
241 /// Encapsulates logic for when to split.
243 const subdir_info_s
&info
///< [in] Info to check
244 ); /// @return True if info must be merged, False otherwise
246 /// Encapsulates logic for when to merge.
248 const subdir_info_s
&info
///< [in] Info to check
249 ); /// @return True if info must be split, False otherwise
253 const vector
<string
> &path
, ///< [in] Subdir to merge
254 subdir_info_s info
///< [in] Info attached to path
255 ); /// @return Error Code, 0 on success
259 const vector
<string
> &path
, ///< [in] Subdir to merge
260 subdir_info_s info
///< [in] Info attached to path
261 ); /// @return Error Code, 0 on success
263 /// Resets attr to match actual subdir contents
265 const vector
<string
> &path
///< [in] path to cleanup
270 const vector
<string
> &path
, ///< [in] Subdir to split
271 subdir_info_s info
///< [in] Info attached to path
272 ); /// @return Error Code, 0 on success
276 const vector
<string
> &path
, ///< [in] Subdir to split
277 subdir_info_s info
///< [in] Info attached to path
278 ); /// @return Error Code, 0 on success
280 /// Determine path components from hoid hash
281 void get_path_components(
282 const ghobject_t
&oid
, ///< [in] Object for which to get path components
283 vector
<string
> *path
///< [out] Path components for hoid.
286 /// Pre-hash and split folders to avoid runtime splitting
287 /// according to the given expected object number.
288 int pre_split_folder(uint32_t pg_num
, uint64_t expected_num_objs
);
290 /// Initialize the folder (dir info) with the given hash
291 /// level and number of its subdirs.
292 int init_split_folder(vector
<string
> &path
, uint32_t hash_level
);
294 /// do collection split for path
295 static int col_split_level(
296 HashIndex
&from
, ///< [in] from index
297 HashIndex
&dest
, ///< [in] to index
298 const vector
<string
> &path
, ///< [in] path to split
299 uint32_t bits
, ///< [in] num bits to match
300 uint32_t match
, ///< [in] bits to match
301 unsigned *mkdirred
///< [in,out] path[:mkdirred] has been mkdirred
306 * Get string representation of ghobject_t/hash
308 * e.g: 0x01234567 -> "76543210"
310 static string
get_path_str(
311 const ghobject_t
&oid
///< [in] Object to get hash string for
312 ); ///< @return Hash string for hoid.
314 /// Get string from hash, @see get_path_str
315 static string
get_hash_str(
316 uint32_t hash
///< [in] Hash to convert to a string.
317 ); ///< @return String representation of hash
319 /// Get hash from hash prefix string e.g. "FFFFAB" -> 0xFFFFAB00
320 static uint32_t hash_prefix_to_hash(
321 string prefix
///< [in] string to convert
324 /// Get hash mod from path
325 static void path_to_hobject_hash_prefix(
326 const vector
<string
> &path
,///< [in] path to convert
327 uint32_t *bits
, ///< [out] bits
328 uint32_t *hash
///< [out] hash
331 for (vector
<string
>::const_iterator i
= path
.begin();
334 hash_str
.push_back(*i
->begin());
336 uint32_t rev_hash
= hash_prefix_to_hash(hash_str
);
340 *bits
= path
.size() * 4;
343 /// Calculate the number of bits.
344 static int calc_num_bits(uint64_t n
) {
353 /// Convert a number to hex string (upper case).
354 static string
to_hex(int n
) {
355 assert(n
>= 0 && n
< 16);
356 char c
= (n
<= 9 ? ('0' + n
) : ('A' + n
- 10));
362 struct CmpPairBitwise
{
363 bool operator()(const pair
<string
, ghobject_t
>& l
,
364 const pair
<string
, ghobject_t
>& r
)
366 if (l
.first
< r
.first
)
368 if (l
.first
> r
.first
)
370 if (cmp(l
.second
, r
.second
) < 0)
376 struct CmpHexdigitStringBitwise
{
377 bool operator()(const string
& l
, const string
& r
) {
378 return reverse_hexdigit_bits_string(l
) < reverse_hexdigit_bits_string(r
);
382 /// Get path contents by hash
383 int get_path_contents_by_hash_bitwise(
384 const vector
<string
> &path
, /// [in] Path to list
385 const ghobject_t
*next_object
, /// [in] list > *next_object
386 set
<string
, CmpHexdigitStringBitwise
> *hash_prefixes
, /// [out] prefixes in dir
387 set
<pair
<string
, ghobject_t
>, CmpPairBitwise
> *objects
/// [out] objects
390 /// List objects in collection in ghobject_t order
392 const vector
<string
> &path
, /// [in] Path to list
393 const ghobject_t
&end
, /// [in] List only objects < end
394 int max_count
, /// [in] List at most max_count
395 ghobject_t
*next
, /// [in,out] List objects >= *next
396 vector
<ghobject_t
> *out
/// [out] Listed objects
397 ); ///< @return Error Code, 0 on success
398 /// List objects in collection in ghobject_t order
399 int list_by_hash_bitwise(
400 const vector
<string
> &path
, /// [in] Path to list
401 const ghobject_t
&end
, /// [in] List only objects < end
402 int max_count
, /// [in] List at most max_count
403 ghobject_t
*next
, /// [in,out] List objects >= *next
404 vector
<ghobject_t
> *out
/// [out] Listed objects
405 ); ///< @return Error Code, 0 on success
407 /// Create the given levels of sub directories from the given root.
408 /// The contents of *path* is not changed after calling this function.
409 int recursive_create_path(vector
<string
>& path
, int level
);
411 /// split each dir below the given path
412 int split_dirs(const vector
<string
> &path
);