1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
15 #include "include/compat.h"
16 #include "include/types.h"
17 #include "include/buffer.h"
18 #include "osd/osd_types.h"
21 #include "HashIndex.h"
23 #include "common/errno.h"
24 #include "common/debug.h"
25 #define dout_context cct
26 #define dout_subsys ceph_subsys_filestore
34 using ceph::bufferptr
;
35 using ceph::bufferlist
;
37 const string
HashIndex::SUBDIR_ATTR
= "contents";
38 const string
HashIndex::SETTINGS_ATTR
= "settings";
39 const string
HashIndex::IN_PROGRESS_OP_TAG
= "in_progress_op";
41 /// hex digit to integer value
42 int hex_to_int(char c
)
44 if (c
>= '0' && c
<= '9')
46 if (c
>= 'A' && c
<= 'F')
51 /// int value to hex digit
52 char int_to_hex(int v
)
60 /// reverse bits in a nibble (0..15)
61 int reverse_nibble_bits(int in
)
71 /// reverse nibble bits in a hex digit
72 char reverse_hexdigit_bits(char c
)
74 return int_to_hex(reverse_nibble_bits(hex_to_int(c
)));
77 /// reverse nibble bits in a hex string
78 string
reverse_hexdigit_bits_string(string s
)
80 for (unsigned i
=0; i
<s
.size(); ++i
)
81 s
[i
] = reverse_hexdigit_bits(s
[i
]);
85 /// compare hex digit (as length 1 string) bitwise
86 bool cmp_hexdigit_bitwise(const string
& l
, const string
& r
)
88 ceph_assert(l
.length() == 1 && r
.length() == 1);
89 int lv
= hex_to_int(l
[0]);
90 int rv
= hex_to_int(r
[0]);
93 return reverse_nibble_bits(lv
) < reverse_nibble_bits(rv
);
96 /// compare hex digit string bitwise
97 bool cmp_hexdigit_string_bitwise(const string
& l
, const string
& r
)
99 string ll
= reverse_hexdigit_bits_string(l
);
100 string rr
= reverse_hexdigit_bits_string(r
);
104 int HashIndex::cleanup() {
106 int r
= get_attr_path(vector
<string
>(), IN_PROGRESS_OP_TAG
, bl
);
108 // No in progress operations!
111 auto i
= bl
.cbegin();
112 InProgressOp
in_progress(i
);
114 r
= get_info(in_progress
.path
, &info
);
116 return end_split_or_merge(in_progress
.path
);
121 if (in_progress
.is_split())
122 return complete_split(in_progress
.path
, info
);
123 else if (in_progress
.is_merge())
124 return complete_merge(in_progress
.path
, info
);
125 else if (in_progress
.is_col_split()) {
126 for (vector
<string
>::iterator i
= in_progress
.path
.begin();
127 i
!= in_progress
.path
.end();
129 vector
<string
> path(in_progress
.path
.begin(), i
);
130 int r
= reset_attr(path
);
140 int HashIndex::reset_attr(
141 const vector
<string
> &path
)
144 int r
= path_exists(path
, &exists
);
149 map
<string
, ghobject_t
> objects
;
150 vector
<string
> subdirs
;
151 r
= list_objects(path
, 0, 0, &objects
);
154 r
= list_subdirs(path
, &subdirs
);
159 info
.hash_level
= path
.size();
160 info
.objs
= objects
.size();
161 info
.subdirs
= subdirs
.size();
162 return set_info(path
, info
);
165 int HashIndex::col_split_level(
168 const vector
<string
> &path
,
173 /* For each subdir, move, recurse, or ignore based on comparing the low order
174 * bits of the hash represented by the subdir path with inbits, match passed
177 vector
<string
> subdirs
;
178 int r
= from
.list_subdirs(path
, &subdirs
);
181 map
<string
, ghobject_t
> objects
;
182 r
= from
.list_objects(path
, 0, 0, &objects
);
187 for (vector
<string
>::iterator i
= subdirs
.begin();
192 vector
<string
> sub_path(path
.begin(), path
.end());
193 sub_path
.push_back(*i
);
194 path_to_hobject_hash_prefix(sub_path
, &bits
, &hash
);
196 if (hobject_t::match_hash(hash
, bits
, match
)) {
206 if (*mkdirred
> path
.size())
207 *mkdirred
= path
.size();
208 } // else, skip, doesn't need to be moved or recursed into
210 if (hobject_t::match_hash(hash
, inbits
, match
)) {
213 } // else, skip, doesn't need to be moved or recursed into
216 /* Then, do the same for each object */
217 map
<string
, ghobject_t
> objs_to_move
;
218 for (map
<string
, ghobject_t
>::iterator i
= objects
.begin();
221 if (i
->second
.match(inbits
, match
)) {
222 objs_to_move
.insert(*i
);
226 if (objs_to_move
.empty() && to_move
.empty())
229 // Make parent directories as needed
230 while (*mkdirred
< path
.size()) {
233 vector
<string
> creating_path(path
.begin(), path
.begin()+*mkdirred
);
234 r
= to
.path_exists(creating_path
, &exists
);
242 info
.hash_level
= creating_path
.size();
243 if (*mkdirred
< path
.size() - 1)
245 r
= to
.start_col_split(creating_path
);
248 r
= to
.create_path(creating_path
);
251 r
= to
.set_info(creating_path
, info
);
254 r
= to
.end_split_or_merge(creating_path
);
259 subdir_info_s from_info
;
260 subdir_info_s to_info
;
261 r
= from
.get_info(path
, &from_info
);
264 r
= to
.get_info(path
, &to_info
);
268 from
.start_col_split(path
);
269 to
.start_col_split(path
);
272 for (set
<string
>::iterator i
= to_move
.begin();
277 r
= move_subdir(from
, to
, path
, *i
);
282 for (map
<string
, ghobject_t
>::iterator i
= objs_to_move
.begin();
283 i
!= objs_to_move
.end();
287 r
= move_object(from
, to
, path
, *i
);
293 r
= to
.set_info(path
, to_info
);
296 r
= from
.set_info(path
, from_info
);
299 from
.end_split_or_merge(path
);
300 to
.end_split_or_merge(path
);
304 int HashIndex::_merge(
306 CollectionIndex
* dest
) {
307 dout(20) << __func__
<< " bits " << bits
<< dendl
;
308 ceph_assert(collection_version() == dest
->collection_version());
310 vector
<string
> emptypath
;
312 // pre-split to common/target level so that any shared prefix DIR_?
313 // directories already exist at the destination. Since each
314 // directory is a nibble (4 bits),
315 unsigned shared
= bits
/ 4;
316 dout(20) << __func__
<< " pre-splitting to shared level " << shared
<< dendl
;
318 split_dirs(emptypath
, shared
);
319 ((HashIndex
*)dest
)->split_dirs(emptypath
, shared
);
322 // now merge the contents
323 _merge_dirs(*this, *(HashIndex
*)dest
, emptypath
);
328 int HashIndex::_merge_dirs(
331 const vector
<string
>& path
)
333 dout(20) << __func__
<< " path " << path
<< dendl
;
336 vector
<string
> src_subs
, dst_subs
;
337 r
= from
.list_subdirs(path
, &src_subs
);
339 lgeneric_subdout(g_ceph_context
,filestore
,20) << __func__
340 << " r " << r
<< " from "
341 << "from.list_subdirs"
345 r
= to
.list_subdirs(path
, &dst_subs
);
347 lgeneric_subdout(g_ceph_context
,filestore
,20) << __func__
348 << " r " << r
<< " from "
354 for (auto& i
: src_subs
) {
355 if (std::find(dst_subs
.begin(), dst_subs
.end(), i
) == dst_subs
.end()) {
357 r
= move_subdir(from
, to
, path
, i
);
359 lgeneric_subdout(g_ceph_context
,filestore
,20) << __func__
360 << " r " << r
<< " from "
361 << "move_subdir(...,"
362 << path
<< "," << i
<< ")"
368 vector
<string
> nested
= path
;
370 r
= _merge_dirs(from
, to
, nested
);
372 lgeneric_subdout(g_ceph_context
,filestore
,20) << __func__
373 << " r " << r
<< " from "
380 r
= remove_path(nested
);
382 lgeneric_subdout(g_ceph_context
,filestore
,20) << __func__
383 << " r " << r
<< " from "
393 map
<string
, ghobject_t
> objects
;
394 r
= from
.list_objects(path
, 0, 0, &objects
);
396 lgeneric_subdout(g_ceph_context
,filestore
,20) << __func__
397 << " r " << r
<< " from "
398 << "from.list_objects"
403 for (auto& i
: objects
) {
404 r
= move_object(from
, to
, path
, i
);
406 lgeneric_subdout(g_ceph_context
,filestore
,20) << __func__
407 << " r " << r
<< " from "
408 << "move_object(...,"
409 << path
<< "," << i
<< ")"
419 int HashIndex::_split(
422 CollectionIndex
* dest
) {
423 ceph_assert(collection_version() == dest
->collection_version());
424 unsigned mkdirred
= 0;
426 return col_split_level(
428 *static_cast<HashIndex
*>(dest
),
435 int HashIndex::split_dirs(const vector
<string
> &path
, int target_level
) {
436 dout(20) << __func__
<< " " << path
<< " target level: "
437 << target_level
<< dendl
;
439 int r
= get_info(path
, &info
);
441 dout(10) << "error looking up info for " << path
<< ": "
442 << cpp_strerror(r
) << dendl
;
446 if (must_split(info
, target_level
)) {
447 dout(1) << __func__
<< " " << path
<< " has " << info
.objs
448 << " objects, " << info
.hash_level
449 << " level, starting split in pg " << coll() << "." << dendl
;
450 r
= initiate_split(path
, info
);
452 dout(10) << "error initiating split on " << path
<< ": "
453 << cpp_strerror(r
) << dendl
;
457 r
= complete_split(path
, info
);
458 dout(1) << __func__
<< " " << path
<< " split completed in pg " << coll() << "."
461 dout(10) << "error completing split on " << path
<< ": "
462 << cpp_strerror(r
) << dendl
;
467 vector
<string
> subdirs
;
468 r
= list_subdirs(path
, &subdirs
);
470 dout(10) << "error listing subdirs of " << path
<< ": "
471 << cpp_strerror(r
) << dendl
;
474 for (vector
<string
>::const_iterator it
= subdirs
.begin();
475 it
!= subdirs
.end(); ++it
) {
476 vector
<string
> subdir_path(path
);
477 subdir_path
.push_back(*it
);
478 r
= split_dirs(subdir_path
, target_level
);
487 int HashIndex::apply_layout_settings(int target_level
) {
489 dout(10) << __func__
<< " split multiple = " << split_multiplier
490 << " merge threshold = " << merge_threshold
491 << " split rand factor = " << cct
->_conf
->filestore_split_rand_factor
492 << " target level = " << target_level
494 int r
= write_settings();
497 return split_dirs(path
, target_level
);
500 int HashIndex::_init() {
503 int r
= set_info(path
, info
);
506 return write_settings();
509 int HashIndex::write_settings() {
510 if (cct
->_conf
->filestore_split_rand_factor
> 0) {
511 settings
.split_rand_factor
= rand() % cct
->_conf
->filestore_split_rand_factor
;
513 settings
.split_rand_factor
= 0;
518 return add_attr_path(path
, SETTINGS_ATTR
, bl
);
521 int HashIndex::read_settings() {
524 int r
= get_attr_path(path
, SETTINGS_ATTR
, bl
);
528 derr
<< __func__
<< " error reading settings: " << cpp_strerror(r
) << dendl
;
531 auto it
= bl
.cbegin();
533 dout(20) << __func__
<< " split_rand_factor = " << settings
.split_rand_factor
<< dendl
;
537 /* LFNIndex virtual method implementations */
538 int HashIndex::_created(const vector
<string
> &path
,
539 const ghobject_t
&oid
,
540 const string
&mangled_name
) {
543 r
= get_info(path
, &info
);
547 r
= set_info(path
, info
);
551 if (must_split(info
)) {
552 dout(1) << __func__
<< " " << path
<< " has " << info
.objs
553 << " objects, starting split in pg " << coll() << "." << dendl
;
554 int r
= initiate_split(path
, info
);
556 derr
<< __func__
<< " error starting split " << path
<< " in pg "
557 << coll() << ": " << cpp_strerror(r
) << dendl
;
558 ceph_assert(!cct
->_conf
->filestore_fail_eio
);
560 r
= complete_split(path
, info
);
562 derr
<< __func__
<< " error completing split " << path
<< " in pg "
563 << coll() << ": " << cpp_strerror(r
) << dendl
;
564 ceph_assert(!cct
->_conf
->filestore_fail_eio
);
566 dout(1) << __func__
<< " " << path
<< " split completed in pg " << coll()
574 int HashIndex::_remove(const vector
<string
> &path
,
575 const ghobject_t
&oid
,
576 const string
&mangled_name
) {
578 r
= remove_object(path
, oid
);
582 r
= get_info(path
, &info
);
586 r
= set_info(path
, info
);
590 if (must_merge(info
)) {
591 dout(1) << __func__
<< " " << path
<< " has " << info
.objs
592 << " objects, starting merge in pg " << coll() << "." << dendl
;
593 r
= initiate_merge(path
, info
);
595 derr
<< __func__
<< " error starting merge " << path
<< " in pg "
596 << coll() << ": " << cpp_strerror(r
) << dendl
;
597 ceph_assert(!cct
->_conf
->filestore_fail_eio
);
599 r
= complete_merge(path
, info
);
601 derr
<< __func__
<< " error completing merge " << path
<< " in pg "
602 << coll() << ": " << cpp_strerror(r
) << dendl
;
603 ceph_assert(!cct
->_conf
->filestore_fail_eio
);
605 dout(1) << __func__
<< " " << path
<< " merge completed in pg " << coll()
613 int HashIndex::_lookup(const ghobject_t
&oid
,
614 vector
<string
> *path
,
615 string
*mangled_name
,
617 vector
<string
> path_comp
;
618 get_path_components(oid
, &path_comp
);
619 vector
<string
>::iterator next
= path_comp
.begin();
622 int r
= path_exists(*path
, &exists
);
631 if (next
== path_comp
.end())
633 path
->push_back(*(next
++));
635 return get_mangled_name(*path
, oid
, mangled_name
, hardlink
);
638 int HashIndex::_collection_list_partial(const ghobject_t
&start
,
639 const ghobject_t
&end
,
641 vector
<ghobject_t
> *ls
,
648 dout(20) << __func__
<< " start:" << start
<< " end:" << end
<< "-" << max_count
<< " ls.size " << ls
->size() << dendl
;
649 return list_by_hash(path
, end
, max_count
, next
, ls
);
652 int HashIndex::prep_delete() {
653 return recursive_remove(vector
<string
>());
656 int HashIndex::_pre_hash_collection(uint32_t pg_num
, uint64_t expected_num_objs
) {
659 subdir_info_s root_info
;
660 // Make sure there is neither objects nor sub-folders
661 // in this collection
662 ret
= get_info(path
, &root_info
);
666 // Do the folder splitting first
667 ret
= pre_split_folder(pg_num
, expected_num_objs
);
670 // Initialize the folder info starting from root
671 return init_split_folder(path
, 0);
674 int HashIndex::pre_split_folder(uint32_t pg_num
, uint64_t expected_num_objs
)
676 // If folder merging is enabled (by setting the threshold positive),
678 if (merge_threshold
> 0)
680 const coll_t c
= coll();
681 // Do not split if the expected number of objects in this collection is zero (by default)
682 if (expected_num_objs
== 0)
685 // Calculate the number of leaf folders (which actually store files)
686 // need to be created
687 const uint64_t objs_per_folder
= ((uint64_t)(abs(merge_threshold
)) * (uint64_t)split_multiplier
+ settings
.split_rand_factor
) * 16;
688 uint64_t leavies
= expected_num_objs
/ objs_per_folder
;
690 if (leavies
== 0 || expected_num_objs
== objs_per_folder
)
694 if (!c
.is_pg_prefix(&spgid
))
696 const ps_t ps
= spgid
.pgid
.ps();
698 // the most significant bits of pg_num
699 const int pg_num_bits
= calc_num_bits(pg_num
- 1);
701 // calculate the number of levels we only create one sub folder
702 int num
= pg_num_bits
/ 4;
703 // pg num's hex value is like 1xxx,xxxx,xxxx but not 1111,1111,1111,
704 // so that splitting starts at level 3
705 if (pg_num_bits
% 4 == 0 && pg_num
< ((uint32_t)1 << pg_num_bits
)) {
710 // Start with creation that only has one subfolder
711 vector
<string
> paths
;
714 ps_t v
= tmp_id
& 0x0000000f;
715 paths
.push_back(to_hex(v
));
716 ret
= create_path(paths
);
717 if (ret
< 0 && ret
!= -EEXIST
)
719 tmp_id
= tmp_id
>> 4;
722 // Starting from here, we can split by creating multiple subfolders
723 const int left_bits
= pg_num_bits
- dump_num
* 4;
724 // this variable denotes how many bits (for this level) that can be
725 // used for sub folder splitting
726 int split_bits
= 4 - left_bits
;
727 // the below logic is inspired by rados.h#ceph_stable_mod,
728 // it basically determines how many sub-folders should we
729 // create for splitting
730 ceph_assert(pg_num_bits
> 0); // otherwise BAD_SHIFT
731 if (((1 << (pg_num_bits
- 1)) | ps
) >= pg_num
) {
734 const uint32_t subs
= (1 << split_bits
);
735 // Calculate how many levels we create starting from here
737 int level_limit
= MAX_HASH_LEVEL
- dump_num
- 1;
738 uint64_t actual_leaves
= subs
;
739 while (actual_leaves
< leavies
&& level
< level_limit
) {
743 for (uint32_t i
= 0; i
< subs
; ++i
) {
744 ceph_assert(split_bits
<= 4); // otherwise BAD_SHIFT
745 int v
= tmp_id
| (i
<< ((4 - split_bits
) % 4));
746 paths
.push_back(to_hex(v
));
747 ret
= create_path(paths
);
748 if (ret
< 0 && ret
!= -EEXIST
)
750 ret
= recursive_create_path(paths
, level
);
758 int HashIndex::init_split_folder(vector
<string
> &path
, uint32_t hash_level
)
760 // Get the number of sub directories for the current path
761 vector
<string
> subdirs
;
762 int ret
= list_subdirs(path
, &subdirs
);
766 info
.subdirs
= subdirs
.size();
767 info
.hash_level
= hash_level
;
768 ret
= set_info(path
, info
);
771 ret
= fsync_dir(path
);
775 // Do the same for subdirs
776 vector
<string
>::const_iterator iter
;
777 for (iter
= subdirs
.begin(); iter
!= subdirs
.end(); ++iter
) {
778 path
.push_back(*iter
);
779 ret
= init_split_folder(path
, hash_level
+ 1);
787 int HashIndex::recursive_create_path(vector
<string
>& path
, int level
)
791 for (int i
= 0; i
< 16; ++i
) {
792 path
.push_back(to_hex(i
));
793 int ret
= create_path(path
);
794 if (ret
< 0 && ret
!= -EEXIST
)
796 ret
= recursive_create_path(path
, level
- 1);
804 int HashIndex::recursive_remove(const vector
<string
> &path
) {
805 return _recursive_remove(path
, true);
808 int HashIndex::_recursive_remove(const vector
<string
> &path
, bool top
) {
809 vector
<string
> subdirs
;
810 dout(20) << __func__
<< " path=" << path
<< dendl
;
811 int r
= list_subdirs(path
, &subdirs
);
814 map
<string
, ghobject_t
> objects
;
815 r
= list_objects(path
, 0, 0, &objects
);
818 if (!objects
.empty())
820 vector
<string
> subdir(path
);
821 for (vector
<string
>::iterator i
= subdirs
.begin();
824 subdir
.push_back(*i
);
825 r
= _recursive_remove(subdir
, false);
833 return remove_path(path
);
836 int HashIndex::start_col_split(const vector
<string
> &path
) {
838 InProgressOp
op_tag(InProgressOp::COL_SPLIT
, path
);
840 int r
= add_attr_path(vector
<string
>(), IN_PROGRESS_OP_TAG
, bl
);
843 return fsync_dir(vector
<string
>());
846 int HashIndex::start_split(const vector
<string
> &path
) {
848 InProgressOp
op_tag(InProgressOp::SPLIT
, path
);
850 int r
= add_attr_path(vector
<string
>(), IN_PROGRESS_OP_TAG
, bl
);
853 return fsync_dir(vector
<string
>());
856 int HashIndex::start_merge(const vector
<string
> &path
) {
858 InProgressOp
op_tag(InProgressOp::MERGE
, path
);
860 int r
= add_attr_path(vector
<string
>(), IN_PROGRESS_OP_TAG
, bl
);
863 return fsync_dir(vector
<string
>());
866 int HashIndex::end_split_or_merge(const vector
<string
> &path
) {
867 return remove_attr_path(vector
<string
>(), IN_PROGRESS_OP_TAG
);
870 int HashIndex::get_info(const vector
<string
> &path
, subdir_info_s
*info
) {
872 int r
= get_attr_path(path
, SUBDIR_ATTR
, buf
);
875 auto bufiter
= buf
.cbegin();
876 info
->decode(bufiter
);
877 ceph_assert(path
.size() == (unsigned)info
->hash_level
);
881 int HashIndex::set_info(const vector
<string
> &path
, const subdir_info_s
&info
) {
883 ceph_assert(path
.size() == (unsigned)info
.hash_level
);
885 return add_attr_path(path
, SUBDIR_ATTR
, buf
);
888 bool HashIndex::must_merge(const subdir_info_s
&info
) {
889 return (info
.hash_level
> 0 &&
890 merge_threshold
> 0 &&
891 info
.objs
< (unsigned)merge_threshold
&&
895 bool HashIndex::must_split(const subdir_info_s
&info
, int target_level
) {
896 // target_level is used for ceph-objectstore-tool to split dirs offline.
897 // if it is set (defalult is 0) and current hash level < target_level,
898 // this dir would be split no matters how many objects it has.
899 return (info
.hash_level
< (unsigned)MAX_HASH_LEVEL
&&
900 ((target_level
> 0 && info
.hash_level
< (unsigned)target_level
) ||
901 (info
.objs
> ((unsigned)(abs(merge_threshold
) * split_multiplier
+ settings
.split_rand_factor
) * 16))));
904 int HashIndex::initiate_merge(const vector
<string
> &path
, subdir_info_s info
) {
905 return start_merge(path
);
908 int HashIndex::complete_merge(const vector
<string
> &path
, subdir_info_s info
) {
909 vector
<string
> dst
= path
;
911 subdir_info_s dstinfo
;
913 r
= path_exists(path
, &exists
);
916 r
= get_info(dst
, &dstinfo
);
920 r
= move_objects(path
, dst
);
926 r
= remove_path(path
);
930 if (must_merge(dstinfo
)) {
931 r
= initiate_merge(dst
, dstinfo
);
937 return complete_merge(dst
, dstinfo
);
942 return end_split_or_merge(path
);
945 int HashIndex::initiate_split(const vector
<string
> &path
, subdir_info_s info
) {
946 return start_split(path
);
949 int HashIndex::complete_split(const vector
<string
> &path
, subdir_info_s info
) {
950 int level
= info
.hash_level
;
951 map
<string
, ghobject_t
> objects
;
952 vector
<string
> dst
= path
;
955 r
= list_objects(path
, 0, 0, &objects
);
958 vector
<string
> subdirs_vec
;
959 r
= list_subdirs(path
, &subdirs_vec
);
963 subdirs
.insert(subdirs_vec
.begin(), subdirs_vec
.end());
964 map
<string
, map
<string
, ghobject_t
> > mapped
;
965 map
<string
, ghobject_t
> moved
;
967 for (map
<string
, ghobject_t
>::iterator i
= objects
.begin();
970 vector
<string
> new_path
;
971 get_path_components(i
->second
, &new_path
);
972 mapped
[new_path
[level
]][i
->first
] = i
->second
;
974 for (map
<string
, map
<string
, ghobject_t
> >::iterator i
= mapped
.begin();
977 dst
[level
] = i
->first
;
978 /* If the info already exists, it must be correct,
979 * we may be picking up a partially finished split */
981 // subdir has already been fully copied
982 if (subdirs
.count(i
->first
) && !get_info(dst
, &temp
)) {
983 for (map
<string
, ghobject_t
>::iterator j
= i
->second
.begin();
984 j
!= i
->second
.end();
986 moved
[j
->first
] = j
->second
;
988 objects
.erase(j
->first
);
994 subdir_info_s info_new
;
995 info_new
.objs
= i
->second
.size();
996 info_new
.subdirs
= 0;
997 info_new
.hash_level
= level
+ 1;
998 if (must_merge(info_new
) && !subdirs
.count(i
->first
)) {
1003 // Subdir doesn't yet exist
1004 if (!subdirs
.count(i
->first
)) {
1006 r
= create_path(dst
);
1009 } // else subdir has been created but only partially copied
1011 for (map
<string
, ghobject_t
>::iterator j
= i
->second
.begin();
1012 j
!= i
->second
.end();
1014 moved
[j
->first
] = j
->second
;
1016 objects
.erase(j
->first
);
1017 r
= link_object(path
, dst
, j
->second
, j
->first
);
1018 // May be a partially finished split
1019 if (r
< 0 && r
!= -EEXIST
) {
1028 // Presence of info must imply that all objects have been copied
1029 r
= set_info(dst
, info_new
);
1039 r
= remove_objects(path
, moved
, &objects
);
1042 info
.objs
= objects
.size();
1043 r
= reset_attr(path
);
1046 r
= fsync_dir(path
);
1049 return end_split_or_merge(path
);
1052 void HashIndex::get_path_components(const ghobject_t
&oid
,
1053 vector
<string
> *path
) {
1054 char buf
[MAX_HASH_LEVEL
+ 1];
1055 snprintf(buf
, sizeof(buf
), "%.*X", MAX_HASH_LEVEL
, (uint32_t)oid
.hobj
.get_nibblewise_key());
1057 // Path components are the hex characters of oid.hobj.hash, least
1058 // significant first
1059 for (int i
= 0; i
< MAX_HASH_LEVEL
; ++i
) {
1060 path
->push_back(string(&buf
[i
], 1));
1064 string
HashIndex::get_hash_str(uint32_t hash
) {
1065 char buf
[MAX_HASH_LEVEL
+ 1];
1066 snprintf(buf
, sizeof(buf
), "%.*X", MAX_HASH_LEVEL
, hash
);
1068 for (int i
= 0; i
< MAX_HASH_LEVEL
; ++i
) {
1069 retval
.push_back(buf
[MAX_HASH_LEVEL
- 1 - i
]);
1074 string
HashIndex::get_path_str(const ghobject_t
&oid
) {
1075 ceph_assert(!oid
.is_max());
1076 return get_hash_str(oid
.hobj
.get_hash());
1079 uint32_t HashIndex::hash_prefix_to_hash(string prefix
) {
1080 while (prefix
.size() < sizeof(uint32_t) * 2) {
1081 prefix
.push_back('0');
1084 sscanf(prefix
.c_str(), "%x", &hash
);
1086 hash
= ((hash
& 0x0f0f0f0f) << 4) | ((hash
& 0xf0f0f0f0) >> 4);
1087 hash
= ((hash
& 0x00ff00ff) << 8) | ((hash
& 0xff00ff00) >> 8);
1088 hash
= ((hash
& 0x0000ffff) << 16) | ((hash
& 0xffff0000) >> 16);
1092 int HashIndex::get_path_contents_by_hash_bitwise(
1093 const vector
<string
> &path
,
1094 const ghobject_t
*next_object
,
1095 set
<string
, CmpHexdigitStringBitwise
> *hash_prefixes
,
1096 set
<pair
<string
, ghobject_t
>, CmpPairBitwise
> *objects
)
1098 map
<string
, ghobject_t
> rev_objects
;
1100 r
= list_objects(path
, 0, 0, &rev_objects
);
1104 for (map
<string
, ghobject_t
>::iterator i
= rev_objects
.begin();
1105 i
!= rev_objects
.end();
1107 if (next_object
&& i
->second
< *next_object
)
1109 string hash_prefix
= get_path_str(i
->second
);
1110 hash_prefixes
->insert(hash_prefix
);
1111 objects
->insert(pair
<string
, ghobject_t
>(hash_prefix
, i
->second
));
1113 vector
<string
> subdirs
;
1114 r
= list_subdirs(path
, &subdirs
);
1118 // sort subdirs bitwise (by reversing hex digit nibbles)
1119 std::sort(subdirs
.begin(), subdirs
.end(), cmp_hexdigit_bitwise
);
1121 // Local to this function, we will convert the prefix strings
1122 // (previously simply the reversed hex digits) to also have each
1123 // digit's nibbles reversed. This will make the strings sort
1126 for (vector
<string
>::const_iterator i
= path
.begin();
1129 cur_prefix
.append(reverse_hexdigit_bits_string(*i
));
1131 string next_object_string
;
1133 next_object_string
= reverse_hexdigit_bits_string(get_path_str(*next_object
));
1134 for (vector
<string
>::iterator i
= subdirs
.begin();
1137 string candidate
= cur_prefix
+ reverse_hexdigit_bits_string(*i
);
1139 if (next_object
->is_max())
1141 if (candidate
< next_object_string
.substr(0, candidate
.size()))
1144 // re-reverse the hex digit nibbles for the caller
1145 hash_prefixes
->insert(reverse_hexdigit_bits_string(candidate
));
1150 int HashIndex::list_by_hash(const vector
<string
> &path
,
1151 const ghobject_t
&end
,
1154 vector
<ghobject_t
> *out
)
1157 return list_by_hash_bitwise(path
, end
, max_count
, next
, out
);
1160 int HashIndex::list_by_hash_bitwise(
1161 const vector
<string
> &path
,
1162 const ghobject_t
& end
,
1165 vector
<ghobject_t
> *out
)
1167 vector
<string
> next_path
= path
;
1168 next_path
.push_back("");
1169 set
<string
, CmpHexdigitStringBitwise
> hash_prefixes
;
1170 set
<pair
<string
, ghobject_t
>, CmpPairBitwise
> objects
;
1171 int r
= get_path_contents_by_hash_bitwise(path
,
1177 for (set
<string
, CmpHexdigitStringBitwise
>::iterator i
= hash_prefixes
.begin();
1178 i
!= hash_prefixes
.end();
1180 dout(20) << __func__
<< " prefix " << *i
<< dendl
;
1181 set
<pair
<string
, ghobject_t
>, CmpPairBitwise
>::iterator j
= objects
.lower_bound(
1182 make_pair(*i
, ghobject_t()));
1183 if (j
== objects
.end() || j
->first
!= *i
) {
1184 *(next_path
.rbegin()) = *(i
->rbegin());
1185 ghobject_t next_recurse
;
1187 next_recurse
= *next
;
1188 r
= list_by_hash_bitwise(next_path
,
1196 if (!next_recurse
.is_max()) {
1198 *next
= next_recurse
;
1202 while (j
!= objects
.end() && j
->first
== *i
) {
1203 if (max_count
> 0 && out
->size() == (unsigned)max_count
) {
1208 if (j
->second
>= end
) {
1213 if (!next
|| j
->second
>= *next
) {
1214 dout(20) << __func__
<< " prefix " << *i
<< " ob " << j
->second
<< dendl
;
1215 out
->push_back(j
->second
);
1222 *next
= ghobject_t::get_max();