1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 #include "osd/osd_types.h"
5 #include "common/debug.h"
6 #include "common/Formatter.h"
7 #include "common/errno.h"
8 #include "common/TextTable.h"
9 #include "include/stringify.h"
11 #include "CrushWrapper.h"
12 #include "CrushTreeDumper.h"
14 #define dout_subsys ceph_subsys_crush
21 using std::ostringstream
;
27 using ceph::bufferlist
;
29 using ceph::decode_nohead
;
31 using ceph::Formatter
;
33 bool CrushWrapper::has_legacy_rule_ids() const
35 for (unsigned i
=0; i
<crush
->max_rules
; i
++) {
36 crush_rule
*r
= crush
->rules
[i
];
38 r
->mask
.ruleset
!= i
) {
45 std::map
<int, int> CrushWrapper::renumber_rules()
47 std::map
<int, int> result
;
48 for (unsigned i
=0; i
<crush
->max_rules
; i
++) {
49 crush_rule
*r
= crush
->rules
[i
];
50 if (r
&& r
->mask
.ruleset
!= i
) {
51 result
[r
->mask
.ruleset
] = i
;
58 bool CrushWrapper::has_non_straw2_buckets() const
60 for (int i
=0; i
<crush
->max_buckets
; ++i
) {
61 crush_bucket
*b
= crush
->buckets
[i
];
64 if (b
->alg
!= CRUSH_BUCKET_STRAW2
)
70 bool CrushWrapper::has_v2_rules() const
72 for (unsigned i
=0; i
<crush
->max_rules
; i
++) {
80 bool CrushWrapper::is_v2_rule(unsigned ruleid
) const
82 // check rule for use of indep or new SET_* rule steps
83 if (ruleid
>= crush
->max_rules
)
85 crush_rule
*r
= crush
->rules
[ruleid
];
88 for (unsigned j
=0; j
<r
->len
; j
++) {
89 if (r
->steps
[j
].op
== CRUSH_RULE_CHOOSE_INDEP
||
90 r
->steps
[j
].op
== CRUSH_RULE_CHOOSELEAF_INDEP
||
91 r
->steps
[j
].op
== CRUSH_RULE_SET_CHOOSE_TRIES
||
92 r
->steps
[j
].op
== CRUSH_RULE_SET_CHOOSELEAF_TRIES
) {
99 bool CrushWrapper::has_v3_rules() const
101 for (unsigned i
=0; i
<crush
->max_rules
; i
++) {
109 bool CrushWrapper::is_v3_rule(unsigned ruleid
) const
111 // check rule for use of SET_CHOOSELEAF_VARY_R step
112 if (ruleid
>= crush
->max_rules
)
114 crush_rule
*r
= crush
->rules
[ruleid
];
117 for (unsigned j
=0; j
<r
->len
; j
++) {
118 if (r
->steps
[j
].op
== CRUSH_RULE_SET_CHOOSELEAF_VARY_R
) {
125 bool CrushWrapper::has_v4_buckets() const
127 for (int i
=0; i
<crush
->max_buckets
; ++i
) {
128 crush_bucket
*b
= crush
->buckets
[i
];
131 if (b
->alg
== CRUSH_BUCKET_STRAW2
)
137 bool CrushWrapper::has_v5_rules() const
139 for (unsigned i
=0; i
<crush
->max_rules
; i
++) {
147 bool CrushWrapper::is_v5_rule(unsigned ruleid
) const
149 // check rule for use of SET_CHOOSELEAF_STABLE step
150 if (ruleid
>= crush
->max_rules
)
152 crush_rule
*r
= crush
->rules
[ruleid
];
155 for (unsigned j
=0; j
<r
->len
; j
++) {
156 if (r
->steps
[j
].op
== CRUSH_RULE_SET_CHOOSELEAF_STABLE
) {
163 bool CrushWrapper::has_choose_args() const
165 return !choose_args
.empty();
168 bool CrushWrapper::has_incompat_choose_args() const
170 if (choose_args
.empty())
172 if (choose_args
.size() > 1)
174 if (choose_args
.begin()->first
!= DEFAULT_CHOOSE_ARGS
)
176 crush_choose_arg_map arg_map
= choose_args
.begin()->second
;
177 for (__u32 i
= 0; i
< arg_map
.size
; i
++) {
178 crush_choose_arg
*arg
= &arg_map
.args
[i
];
179 if (arg
->weight_set_positions
== 0 &&
182 if (arg
->weight_set_positions
!= 1)
184 if (arg
->ids_size
!= 0)
190 int CrushWrapper::split_id_class(int i
, int *idout
, int *classout
) const
194 string name
= get_item_name(i
);
195 size_t pos
= name
.find("~");
196 if (pos
== string::npos
) {
201 string name_no_class
= name
.substr(0, pos
);
202 if (!name_exists(name_no_class
))
204 string class_name
= name
.substr(pos
+ 1);
205 if (!class_exists(class_name
))
207 *idout
= get_item_id(name_no_class
);
208 *classout
= get_class_id(class_name
);
212 int CrushWrapper::can_rename_item(const string
& srcname
,
213 const string
& dstname
,
216 if (name_exists(srcname
)) {
217 if (name_exists(dstname
)) {
218 *ss
<< "dstname = '" << dstname
<< "' already exists";
221 if (is_valid_crush_name(dstname
)) {
224 *ss
<< "dstname = '" << dstname
<< "' does not match [-_.0-9a-zA-Z]+";
228 if (name_exists(dstname
)) {
229 *ss
<< "srcname = '" << srcname
<< "' does not exist "
230 << "and dstname = '" << dstname
<< "' already exists";
233 *ss
<< "srcname = '" << srcname
<< "' does not exist";
239 int CrushWrapper::rename_item(const string
& srcname
,
240 const string
& dstname
,
243 int ret
= can_rename_item(srcname
, dstname
, ss
);
246 int oldid
= get_item_id(srcname
);
247 return set_item_name(oldid
, dstname
);
250 int CrushWrapper::can_rename_bucket(const string
& srcname
,
251 const string
& dstname
,
254 int ret
= can_rename_item(srcname
, dstname
, ss
);
257 int srcid
= get_item_id(srcname
);
259 *ss
<< "srcname = '" << srcname
<< "' is not a bucket "
260 << "because its id = " << srcid
<< " is >= 0";
266 int CrushWrapper::rename_bucket(const string
& srcname
,
267 const string
& dstname
,
270 int ret
= can_rename_bucket(srcname
, dstname
, ss
);
273 int oldid
= get_item_id(srcname
);
274 return set_item_name(oldid
, dstname
);
277 int CrushWrapper::rename_rule(const string
& srcname
,
278 const string
& dstname
,
281 if (!rule_exists(srcname
)) {
283 *ss
<< "source rule name '" << srcname
<< "' does not exist";
287 if (rule_exists(dstname
)) {
289 *ss
<< "destination rule name '" << dstname
<< "' already exists";
293 int rule_id
= get_rule_id(srcname
);
294 auto it
= rule_name_map
.find(rule_id
);
295 ceph_assert(it
!= rule_name_map
.end());
296 it
->second
= dstname
;
298 rule_name_rmap
.erase(srcname
);
299 rule_name_rmap
[dstname
] = rule_id
;
304 void CrushWrapper::find_takes(set
<int> *roots
) const
306 for (unsigned i
=0; i
<crush
->max_rules
; i
++) {
307 crush_rule
*r
= crush
->rules
[i
];
310 for (unsigned j
=0; j
<r
->len
; j
++) {
311 if (r
->steps
[j
].op
== CRUSH_RULE_TAKE
)
312 roots
->insert(r
->steps
[j
].arg1
);
317 void CrushWrapper::find_takes_by_rule(int rule
, set
<int> *roots
) const
319 if (rule
< 0 || rule
>= (int)crush
->max_rules
)
321 crush_rule
*r
= crush
->rules
[rule
];
324 for (unsigned i
= 0; i
< r
->len
; i
++) {
325 if (r
->steps
[i
].op
== CRUSH_RULE_TAKE
)
326 roots
->insert(r
->steps
[i
].arg1
);
330 void CrushWrapper::find_roots(set
<int> *roots
) const
332 for (int i
= 0; i
< crush
->max_buckets
; i
++) {
333 if (!crush
->buckets
[i
])
335 crush_bucket
*b
= crush
->buckets
[i
];
336 if (!_search_item_exists(b
->id
))
337 roots
->insert(b
->id
);
341 bool CrushWrapper::subtree_contains(int root
, int item
) const
347 return false; // root is a leaf
349 const crush_bucket
*b
= get_bucket(root
);
353 for (unsigned j
=0; j
<b
->size
; j
++) {
354 if (subtree_contains(b
->items
[j
], item
))
360 bool CrushWrapper::_maybe_remove_last_instance(CephContext
*cct
, int item
, bool unlink_only
)
363 if (_search_item_exists(item
)) {
366 if (item
< 0 && _bucket_is_in_use(item
)) {
370 if (item
< 0 && !unlink_only
) {
371 crush_bucket
*t
= get_bucket(item
);
372 ldout(cct
, 5) << "_maybe_remove_last_instance removing bucket " << item
<< dendl
;
373 crush_remove_bucket(crush
, t
);
374 if (class_bucket
.count(item
) != 0)
375 class_bucket
.erase(item
);
376 class_remove_item(item
);
377 update_choose_args(cct
);
379 if ((item
>= 0 || !unlink_only
) && name_map
.count(item
)) {
380 ldout(cct
, 5) << "_maybe_remove_last_instance removing name for item " << item
<< dendl
;
381 name_map
.erase(item
);
383 if (item
>= 0 && !unlink_only
) {
384 class_remove_item(item
);
387 rebuild_roots_with_classes(cct
);
391 int CrushWrapper::remove_root(CephContext
*cct
, int item
)
393 crush_bucket
*b
= get_bucket(item
);
395 // should be idempotent
396 // e.g.: we use 'crush link' to link same host into
397 // different roots, which as a result can cause different
398 // shadow trees reference same hosts too. This means
399 // we may need to destory the same buckets(hosts, racks, etc.)
400 // multiple times during rebuilding all shadow trees.
404 for (unsigned n
= 0; n
< b
->size
; n
++) {
405 if (b
->items
[n
] >= 0)
407 int r
= remove_root(cct
, b
->items
[n
]);
412 crush_remove_bucket(crush
, b
);
413 if (name_map
.count(item
) != 0) {
414 name_map
.erase(item
);
417 if (class_bucket
.count(item
) != 0)
418 class_bucket
.erase(item
);
419 class_remove_item(item
);
420 update_choose_args(cct
);
424 void CrushWrapper::update_choose_args(CephContext
*cct
)
426 for (auto& i
: choose_args
) {
427 crush_choose_arg_map
&arg_map
= i
.second
;
428 assert(arg_map
.size
== (unsigned)crush
->max_buckets
);
429 unsigned positions
= get_choose_args_positions(arg_map
);
430 for (int j
= 0; j
< crush
->max_buckets
; ++j
) {
431 crush_bucket
*b
= crush
->buckets
[j
];
432 assert(j
< (int)arg_map
.size
);
433 auto& carg
= arg_map
.args
[j
];
434 // strip out choose_args for any buckets that no longer exist
435 if (!b
|| b
->alg
!= CRUSH_BUCKET_STRAW2
) {
438 ldout(cct
,10) << __func__
<< " removing " << i
.first
<< " bucket "
439 << (-1-j
) << " ids" << dendl
;
444 if (carg
.weight_set
) {
446 ldout(cct
,10) << __func__
<< " removing " << i
.first
<< " bucket "
447 << (-1-j
) << " weight_sets" << dendl
;
448 for (unsigned p
= 0; p
< carg
.weight_set_positions
; ++p
) {
449 free(carg
.weight_set
[p
].weights
);
451 free(carg
.weight_set
);
453 carg
.weight_set_positions
= 0;
457 if (carg
.weight_set_positions
== 0) {
460 if (carg
.weight_set_positions
!= positions
) {
462 lderr(cct
) << __func__
<< " " << i
.first
<< " bucket "
463 << (-1-j
) << " positions " << carg
.weight_set_positions
464 << " -> " << positions
<< dendl
;
465 continue; // wth... skip!
467 // mis-sized weight_sets? this shouldn't ever happen.
468 for (unsigned p
= 0; p
< positions
; ++p
) {
469 if (carg
.weight_set
[p
].size
!= b
->size
) {
471 lderr(cct
) << __func__
<< " fixing " << i
.first
<< " bucket "
472 << (-1-j
) << " position " << p
473 << " size " << carg
.weight_set
[p
].size
<< " -> "
475 auto old_ws
= carg
.weight_set
[p
];
476 carg
.weight_set
[p
].size
= b
->size
;
477 carg
.weight_set
[p
].weights
= (__u32
*)calloc(b
->size
, sizeof(__u32
));
478 auto max
= std::min
<unsigned>(old_ws
.size
, b
->size
);
479 for (unsigned k
= 0; k
< max
; ++k
) {
480 carg
.weight_set
[p
].weights
[k
] = old_ws
.weights
[k
];
482 free(old_ws
.weights
);
489 int CrushWrapper::remove_item(CephContext
*cct
, int item
, bool unlink_only
)
491 ldout(cct
, 5) << "remove_item " << item
492 << (unlink_only
? " unlink_only":"") << dendl
;
496 if (item
< 0 && !unlink_only
) {
497 crush_bucket
*t
= get_bucket(item
);
499 ldout(cct
, 1) << "remove_item bucket " << item
<< " does not exist"
505 ldout(cct
, 1) << "remove_item bucket " << item
<< " has " << t
->size
506 << " items, not empty" << dendl
;
509 if (_bucket_is_in_use(item
)) {
514 for (int i
= 0; i
< crush
->max_buckets
; i
++) {
515 if (!crush
->buckets
[i
])
517 crush_bucket
*b
= crush
->buckets
[i
];
519 for (unsigned i
=0; i
<b
->size
; ++i
) {
520 int id
= b
->items
[i
];
522 ldout(cct
, 5) << "remove_item removing item " << item
523 << " from bucket " << b
->id
<< dendl
;
524 adjust_item_weight_in_bucket(cct
, item
, 0, b
->id
, true);
525 bucket_remove_item(b
, item
);
531 if (_maybe_remove_last_instance(cct
, item
, unlink_only
))
537 bool CrushWrapper::_search_item_exists(int item
) const
539 for (int i
= 0; i
< crush
->max_buckets
; i
++) {
540 if (!crush
->buckets
[i
])
542 crush_bucket
*b
= crush
->buckets
[i
];
543 for (unsigned j
=0; j
<b
->size
; ++j
) {
544 if (b
->items
[j
] == item
)
551 bool CrushWrapper::_bucket_is_in_use(int item
)
553 for (auto &i
: class_bucket
)
554 for (auto &j
: i
.second
)
555 if (j
.second
== item
)
557 for (unsigned i
= 0; i
< crush
->max_rules
; ++i
) {
558 crush_rule
*r
= crush
->rules
[i
];
561 for (unsigned j
= 0; j
< r
->len
; ++j
) {
562 if (r
->steps
[j
].op
== CRUSH_RULE_TAKE
) {
563 int step_item
= r
->steps
[j
].arg1
;
566 int res
= split_id_class(step_item
, &original_item
, &c
);
569 if (step_item
== item
|| original_item
== item
)
577 int CrushWrapper::_remove_item_under(
578 CephContext
*cct
, int item
, int ancestor
, bool unlink_only
)
580 ldout(cct
, 5) << "_remove_item_under " << item
<< " under " << ancestor
581 << (unlink_only
? " unlink_only":"") << dendl
;
587 if (!bucket_exists(ancestor
))
592 crush_bucket
*b
= get_bucket(ancestor
);
593 for (unsigned i
=0; i
<b
->size
; ++i
) {
594 int id
= b
->items
[i
];
596 ldout(cct
, 5) << "_remove_item_under removing item " << item
597 << " from bucket " << b
->id
<< dendl
;
598 adjust_item_weight_in_bucket(cct
, item
, 0, b
->id
, true);
599 bucket_remove_item(b
, item
);
602 int r
= remove_item_under(cct
, item
, id
, unlink_only
);
610 int CrushWrapper::remove_item_under(
611 CephContext
*cct
, int item
, int ancestor
, bool unlink_only
)
613 ldout(cct
, 5) << "remove_item_under " << item
<< " under " << ancestor
614 << (unlink_only
? " unlink_only":"") << dendl
;
616 if (!unlink_only
&& _bucket_is_in_use(item
)) {
620 int ret
= _remove_item_under(cct
, item
, ancestor
, unlink_only
);
624 if (item
< 0 && !unlink_only
) {
625 crush_bucket
*t
= get_bucket(item
);
627 ldout(cct
, 1) << "remove_item_under bucket " << item
628 << " does not exist" << dendl
;
633 ldout(cct
, 1) << "remove_item_under bucket " << item
<< " has " << t
->size
634 << " items, not empty" << dendl
;
639 if (_maybe_remove_last_instance(cct
, item
, unlink_only
))
645 int CrushWrapper::get_common_ancestor_distance(CephContext
*cct
, int id
,
646 const std::multimap
<string
,string
>& loc
) const
648 ldout(cct
, 5) << __func__
<< " " << id
<< " " << loc
<< dendl
;
649 if (!item_exists(id
))
651 map
<string
,string
> id_loc
= get_full_location(id
);
652 ldout(cct
, 20) << " id is at " << id_loc
<< dendl
;
654 for (map
<int,string
>::const_iterator p
= type_map
.begin();
657 map
<string
,string
>::iterator ip
= id_loc
.find(p
->second
);
658 if (ip
== id_loc
.end())
660 for (std::multimap
<string
,string
>::const_iterator q
= loc
.find(p
->second
);
663 if (q
->first
!= p
->second
)
665 if (q
->second
== ip
->second
)
672 int CrushWrapper::parse_loc_map(const std::vector
<string
>& args
,
673 std::map
<string
,string
> *ploc
)
676 for (unsigned i
= 0; i
< args
.size(); ++i
) {
677 const char *s
= args
[i
].c_str();
678 const char *pos
= strchr(s
, '=');
681 string
key(s
, 0, pos
-s
);
684 (*ploc
)[key
] = value
;
691 int CrushWrapper::parse_loc_multimap(const std::vector
<string
>& args
,
692 std::multimap
<string
,string
> *ploc
)
695 for (unsigned i
= 0; i
< args
.size(); ++i
) {
696 const char *s
= args
[i
].c_str();
697 const char *pos
= strchr(s
, '=');
700 string
key(s
, 0, pos
-s
);
703 ploc
->insert(make_pair(key
, value
));
710 bool CrushWrapper::check_item_loc(CephContext
*cct
, int item
, const map
<string
,string
>& loc
,
713 ldout(cct
, 5) << "check_item_loc item " << item
<< " loc " << loc
<< dendl
;
715 for (map
<int,string
>::const_iterator p
= type_map
.begin(); p
!= type_map
.end(); ++p
) {
720 // ignore types that aren't specified in loc
721 map
<string
,string
>::const_iterator q
= loc
.find(p
->second
);
722 if (q
== loc
.end()) {
723 ldout(cct
, 2) << "warning: did not specify location for '" << p
->second
<< "' level (levels are "
724 << type_map
<< ")" << dendl
;
728 if (!name_exists(q
->second
)) {
729 ldout(cct
, 5) << "check_item_loc bucket " << q
->second
<< " dne" << dendl
;
733 int id
= get_item_id(q
->second
);
735 ldout(cct
, 5) << "check_item_loc requested " << q
->second
<< " for type " << p
->second
736 << " is a device, not bucket" << dendl
;
740 ceph_assert(bucket_exists(id
));
741 crush_bucket
*b
= get_bucket(id
);
743 // see if item exists in this bucket
744 for (unsigned j
=0; j
<b
->size
; j
++) {
745 if (b
->items
[j
] == item
) {
746 ldout(cct
, 2) << "check_item_loc " << item
<< " exists in bucket " << b
->id
<< dendl
;
748 *weight
= crush_get_bucket_item_weight(b
, j
);
755 ldout(cct
, 2) << __func__
<< " item " << item
<< " loc " << loc
<< dendl
;
759 map
<string
, string
> CrushWrapper::get_full_location(int id
) const
761 vector
<pair
<string
, string
> > full_location_ordered
;
762 map
<string
,string
> full_location
;
764 get_full_location_ordered(id
, full_location_ordered
);
766 std::copy(full_location_ordered
.begin(),
767 full_location_ordered
.end(),
768 std::inserter(full_location
, full_location
.begin()));
770 return full_location
;
773 int CrushWrapper::get_full_location(const string
& name
,
774 map
<string
,string
> *ploc
)
777 auto p
= name_rmap
.find(name
);
778 if (p
== name_rmap
.end()) {
781 *ploc
= get_full_location(p
->second
);
785 int CrushWrapper::get_full_location_ordered(int id
, vector
<pair
<string
, string
> >& path
) const
787 if (!item_exists(id
))
792 pair
<string
, string
> parent_coord
= get_immediate_parent(cur
, &ret
);
795 path
.push_back(parent_coord
);
796 cur
= get_item_id(parent_coord
.second
);
801 string
CrushWrapper::get_full_location_ordered_string(int id
) const
803 vector
<pair
<string
, string
> > full_location_ordered
;
804 string full_location
;
805 get_full_location_ordered(id
, full_location_ordered
);
806 reverse(begin(full_location_ordered
), end(full_location_ordered
));
807 for(auto i
= full_location_ordered
.begin(); i
!= full_location_ordered
.end(); i
++) {
808 full_location
= full_location
+ i
->first
+ "=" + i
->second
;
809 if (i
!= full_location_ordered
.end() - 1) {
810 full_location
= full_location
+ ",";
813 return full_location
;
816 map
<int, string
> CrushWrapper::get_parent_hierarchy(int id
) const
818 map
<int,string
> parent_hierarchy
;
819 pair
<string
, string
> parent_coord
= get_immediate_parent(id
);
822 // get the integer type for id and create a counter from there
823 int type_counter
= get_bucket_type(id
);
825 // if we get a negative type then we can assume that we have an OSD
826 // change behavior in get_item_type FIXME
827 if (type_counter
< 0)
830 // read the type map and get the name of the type with the largest ID
832 if (!type_map
.empty())
833 high_type
= type_map
.rbegin()->first
;
835 parent_id
= get_item_id(parent_coord
.second
);
837 while (type_counter
< high_type
) {
839 parent_hierarchy
[ type_counter
] = parent_coord
.first
;
841 if (type_counter
< high_type
){
842 // get the coordinate information for the next parent
843 parent_coord
= get_immediate_parent(parent_id
);
844 parent_id
= get_item_id(parent_coord
.second
);
848 return parent_hierarchy
;
851 int CrushWrapper::get_children(int id
, list
<int> *children
) const
858 auto *b
= get_bucket(id
);
863 for (unsigned n
=0; n
<b
->size
; n
++) {
864 children
->push_back(b
->items
[n
]);
869 int CrushWrapper::get_all_children(int id
, set
<int> *children
) const
876 auto *b
= get_bucket(id
);
882 for (unsigned n
= 0; n
< b
->size
; n
++) {
883 children
->insert(b
->items
[n
]);
885 auto r
= get_all_children(b
->items
[n
], children
);
893 void CrushWrapper::get_children_of_type(int id
,
895 vector
<int> *children
,
896 bool exclude_shadow
) const
901 children
->push_back(id
);
905 auto b
= get_bucket(id
);
909 if (b
->type
< type
) {
912 } else if (b
->type
== type
) {
913 if (!is_shadow_item(b
->id
) || !exclude_shadow
) {
914 children
->push_back(b
->id
);
918 for (unsigned n
= 0; n
< b
->size
; n
++) {
919 get_children_of_type(b
->items
[n
], type
, children
, exclude_shadow
);
923 int CrushWrapper::verify_upmap(CephContext
*cct
,
926 const vector
<int>& up
)
928 auto rule
= get_rule(rule_id
);
929 if (IS_ERR(rule
) || !rule
) {
930 lderr(cct
) << __func__
<< " rule " << rule_id
<< " does not exist"
936 std::map
<int, int> type_stack
;
937 for (unsigned step
= 0; step
< rule
->len
; ++step
) {
938 auto curstep
= &rule
->steps
[step
];
939 ldout(cct
, 10) << __func__
<< " step " << step
<< dendl
;
940 switch (curstep
->op
) {
941 case CRUSH_RULE_TAKE
:
943 root_bucket
= curstep
->arg1
;
946 case CRUSH_RULE_CHOOSELEAF_FIRSTN
:
947 case CRUSH_RULE_CHOOSELEAF_INDEP
:
949 int numrep
= curstep
->arg1
;
950 int type
= curstep
->arg2
;
953 type_stack
.emplace(type
, numrep
);
954 if (type
== 0) // osd
956 map
<int, set
<int>> osds_by_parent
; // parent_of_desired_type -> osds
957 for (auto osd
: up
) {
958 auto parent
= get_parent_of_type(osd
, type
, rule_id
);
960 osds_by_parent
[parent
].insert(osd
);
962 ldout(cct
, 1) << __func__
<< " unable to get parent of osd." << osd
963 << ", skipping for now"
967 for (auto i
: osds_by_parent
) {
968 if (i
.second
.size() > 1) {
969 lderr(cct
) << __func__
<< " multiple osds " << i
.second
970 << " come from same failure domain " << i
.first
978 case CRUSH_RULE_CHOOSE_FIRSTN
:
979 case CRUSH_RULE_CHOOSE_INDEP
:
981 int numrep
= curstep
->arg1
;
982 int type
= curstep
->arg2
;
985 type_stack
.emplace(type
, numrep
);
986 if (type
== 0) // osd
988 set
<int> parents_of_type
;
989 for (auto osd
: up
) {
990 auto parent
= get_parent_of_type(osd
, type
, rule_id
);
992 parents_of_type
.insert(parent
);
994 ldout(cct
, 1) << __func__
<< " unable to get parent of osd." << osd
995 << ", skipping for now"
999 if ((int)parents_of_type
.size() > numrep
) {
1000 lderr(cct
) << __func__
<< " number of buckets "
1001 << parents_of_type
.size() << " exceeds desired " << numrep
1008 case CRUSH_RULE_EMIT
:
1010 if (root_bucket
< 0) {
1012 for (auto &item
: type_stack
) {
1013 num_osds
*= item
.second
;
1015 // validate the osd's in subtree
1016 for (int c
= 0; cursor
< (int)up
.size() && c
< num_osds
; ++cursor
, ++c
) {
1017 int osd
= up
[cursor
];
1018 if (!subtree_contains(root_bucket
, osd
)) {
1019 lderr(cct
) << __func__
<< " osd " << osd
<< " not in bucket " << root_bucket
<< dendl
;
1036 int CrushWrapper::_get_leaves(int id
, list
<int> *leaves
) const
1038 ceph_assert(leaves
);
1042 leaves
->push_back(id
);
1046 auto b
= get_bucket(id
);
1051 for (unsigned n
= 0; n
< b
->size
; n
++) {
1052 if (b
->items
[n
] >= 0) {
1053 leaves
->push_back(b
->items
[n
]);
1055 // is a bucket, do recursive call
1056 int r
= _get_leaves(b
->items
[n
], leaves
);
1063 return 0; // all is well
1066 int CrushWrapper::get_leaves(const string
&name
, set
<int> *leaves
) const
1068 ceph_assert(leaves
);
1071 if (!name_exists(name
)) {
1075 int id
= get_item_id(name
);
1082 list
<int> unordered
;
1083 int r
= _get_leaves(id
, &unordered
);
1088 for (auto &p
: unordered
) {
1095 int CrushWrapper::insert_item(
1096 CephContext
*cct
, int item
, float weight
, string name
,
1097 const map
<string
,string
>& loc
, // typename -> bucketname
1098 bool init_weight_sets
)
1100 ldout(cct
, 5) << "insert_item item " << item
<< " weight " << weight
1101 << " name " << name
<< " loc " << loc
<< dendl
;
1103 if (!is_valid_crush_name(name
))
1106 if (!is_valid_crush_loc(cct
, loc
))
1109 int r
= validate_weightf(weight
);
1114 if (name_exists(name
)) {
1115 if (get_item_id(name
) != item
) {
1116 ldout(cct
, 10) << "device name '" << name
<< "' already exists as id "
1117 << get_item_id(name
) << dendl
;
1121 set_item_name(item
, name
);
1126 // create locations if locations don't exist and add child in
1127 // location with 0 weight the more detail in the insert_item method
1128 // declaration in CrushWrapper.h
1129 for (auto p
= type_map
.begin(); p
!= type_map
.end(); ++p
) {
1130 // ignore device type
1134 // skip types that are unspecified
1135 map
<string
,string
>::const_iterator q
= loc
.find(p
->second
);
1136 if (q
== loc
.end()) {
1137 ldout(cct
, 2) << "warning: did not specify location for '"
1138 << p
->second
<< "' level (levels are "
1139 << type_map
<< ")" << dendl
;
1143 if (!name_exists(q
->second
)) {
1144 ldout(cct
, 5) << "insert_item creating bucket " << q
->second
<< dendl
;
1145 int empty
= 0, newid
;
1146 int r
= add_bucket(0, 0,
1147 CRUSH_HASH_DEFAULT
, p
->first
, 1, &cur
, &empty
, &newid
);
1149 ldout(cct
, 1) << "add_bucket failure error: " << cpp_strerror(r
)
1153 set_item_name(newid
, q
->second
);
1159 // add to an existing bucket
1160 int id
= get_item_id(q
->second
);
1161 if (!bucket_exists(id
)) {
1162 ldout(cct
, 1) << "insert_item doesn't have bucket " << id
<< dendl
;
1166 // check that we aren't creating a cycle.
1167 if (subtree_contains(id
, cur
)) {
1168 ldout(cct
, 1) << "insert_item item " << cur
<< " already exists beneath "
1173 // we have done sanity check above
1174 crush_bucket
*b
= get_bucket(id
);
1176 if (p
->first
!= b
->type
) {
1177 ldout(cct
, 1) << "insert_item existing bucket has type "
1178 << "'" << type_map
[b
->type
] << "' != "
1179 << "'" << type_map
[p
->first
] << "'" << dendl
;
1183 // are we forming a loop?
1184 if (subtree_contains(cur
, b
->id
)) {
1185 ldout(cct
, 1) << "insert_item " << cur
<< " already contains " << b
->id
1186 << "; cannot form loop" << dendl
;
1190 ldout(cct
, 5) << "insert_item adding " << cur
<< " weight " << weight
1191 << " to bucket " << id
<< dendl
;
1192 [[maybe_unused
]] int r
= bucket_add_item(b
, cur
, 0);
1197 // adjust the item's weight in location
1198 if (adjust_item_weightf_in_loc(cct
, item
, weight
, loc
,
1199 item
>= 0 && init_weight_sets
) > 0) {
1200 if (item
>= crush
->max_devices
) {
1201 crush
->max_devices
= item
+ 1;
1202 ldout(cct
, 5) << "insert_item max_devices now " << crush
->max_devices
1205 r
= rebuild_roots_with_classes(cct
);
1207 ldout(cct
, 0) << __func__
<< " unable to rebuild roots with classes: "
1208 << cpp_strerror(r
) << dendl
;
1214 ldout(cct
, 1) << "error: didn't find anywhere to add item " << item
1215 << " in " << loc
<< dendl
;
1220 int CrushWrapper::move_bucket(
1221 CephContext
*cct
, int id
, const map
<string
,string
>& loc
)
1223 // sorry this only works for buckets
1227 if (!item_exists(id
))
1230 // get the name of the bucket we are trying to move for later
1231 string id_name
= get_item_name(id
);
1233 // detach the bucket
1234 int bucket_weight
= detach_bucket(cct
, id
);
1236 // insert the bucket back into the hierarchy
1237 return insert_item(cct
, id
, bucket_weight
/ (float)0x10000, id_name
, loc
,
1241 int CrushWrapper::detach_bucket(CephContext
*cct
, int item
)
1249 // check that the bucket that we want to detach exists
1250 ceph_assert(bucket_exists(item
));
1252 // get the bucket's weight
1253 crush_bucket
*b
= get_bucket(item
);
1254 unsigned bucket_weight
= b
->weight
;
1256 // get where the bucket is located
1257 pair
<string
, string
> bucket_location
= get_immediate_parent(item
);
1259 // get the id of the parent bucket
1260 int parent_id
= get_item_id(bucket_location
.second
);
1262 // get the parent bucket
1263 crush_bucket
*parent_bucket
= get_bucket(parent_id
);
1265 if (!IS_ERR(parent_bucket
)) {
1266 // zero out the bucket weight
1267 adjust_item_weight_in_bucket(cct
, item
, 0, parent_bucket
->id
, true);
1269 // remove the bucket from the parent
1270 bucket_remove_item(parent_bucket
, item
);
1271 } else if (PTR_ERR(parent_bucket
) != -ENOENT
) {
1272 return PTR_ERR(parent_bucket
);
1275 // check that we're happy
1276 int test_weight
= 0;
1277 map
<string
,string
> test_location
;
1278 test_location
[ bucket_location
.first
] = (bucket_location
.second
);
1280 bool successful_detach
= !(check_item_loc(cct
, item
, test_location
,
1282 ceph_assert(successful_detach
);
1283 ceph_assert(test_weight
== 0);
1285 return bucket_weight
;
1288 bool CrushWrapper::is_parent_of(int child
, int p
) const
1291 while (!get_immediate_parent_id(child
, &parent
)) {
1300 int CrushWrapper::swap_bucket(CephContext
*cct
, int src
, int dst
)
1302 if (src
>= 0 || dst
>= 0)
1304 if (!item_exists(src
) || !item_exists(dst
))
1306 crush_bucket
*a
= get_bucket(src
);
1307 crush_bucket
*b
= get_bucket(dst
);
1308 if (is_parent_of(a
->id
, b
->id
) || is_parent_of(b
->id
, a
->id
)) {
1311 unsigned aw
= a
->weight
;
1312 unsigned bw
= b
->weight
;
1315 adjust_item_weight(cct
, a
->id
, bw
);
1316 adjust_item_weight(cct
, b
->id
, aw
);
1319 map
<int,unsigned> tmp
;
1320 unsigned as
= a
->size
;
1321 unsigned bs
= b
->size
;
1322 for (unsigned i
= 0; i
< as
; ++i
) {
1323 int item
= a
->items
[0];
1324 int itemw
= crush_get_bucket_item_weight(a
, 0);
1326 bucket_remove_item(a
, item
);
1328 ceph_assert(a
->size
== 0);
1329 ceph_assert(b
->size
== bs
);
1330 for (unsigned i
= 0; i
< bs
; ++i
) {
1331 int item
= b
->items
[0];
1332 int itemw
= crush_get_bucket_item_weight(b
, 0);
1333 bucket_remove_item(b
, item
);
1334 bucket_add_item(a
, item
, itemw
);
1336 ceph_assert(a
->size
== bs
);
1337 ceph_assert(b
->size
== 0);
1338 for (auto t
: tmp
) {
1339 bucket_add_item(b
, t
.first
, t
.second
);
1341 ceph_assert(a
->size
== bs
);
1342 ceph_assert(b
->size
== as
);
1345 swap_names(src
, dst
);
1346 return rebuild_roots_with_classes(cct
);
1349 int CrushWrapper::link_bucket(
1350 CephContext
*cct
, int id
, const map
<string
,string
>& loc
)
1352 // sorry this only works for buckets
1356 if (!item_exists(id
))
1359 // get the name of the bucket we are trying to move for later
1360 string id_name
= get_item_name(id
);
1362 crush_bucket
*b
= get_bucket(id
);
1363 unsigned bucket_weight
= b
->weight
;
1365 return insert_item(cct
, id
, bucket_weight
/ (float)0x10000, id_name
, loc
);
1368 int CrushWrapper::create_or_move_item(
1369 CephContext
*cct
, int item
, float weight
, string name
,
1370 const map
<string
,string
>& loc
, // typename -> bucketname
1371 bool init_weight_sets
)
1376 if (!is_valid_crush_name(name
))
1379 if (check_item_loc(cct
, item
, loc
, &old_iweight
)) {
1380 ldout(cct
, 5) << "create_or_move_item " << item
<< " already at " << loc
1383 if (_search_item_exists(item
)) {
1384 weight
= get_item_weightf(item
);
1385 ldout(cct
, 10) << "create_or_move_item " << item
1386 << " exists with weight " << weight
<< dendl
;
1387 remove_item(cct
, item
, true);
1389 ldout(cct
, 5) << "create_or_move_item adding " << item
1390 << " weight " << weight
1391 << " at " << loc
<< dendl
;
1392 ret
= insert_item(cct
, item
, weight
, name
, loc
,
1393 item
>= 0 && init_weight_sets
);
1400 int CrushWrapper::update_item(
1401 CephContext
*cct
, int item
, float weight
, string name
,
1402 const map
<string
,string
>& loc
) // typename -> bucketname
1404 ldout(cct
, 5) << "update_item item " << item
<< " weight " << weight
1405 << " name " << name
<< " loc " << loc
<< dendl
;
1408 if (!is_valid_crush_name(name
))
1411 if (!is_valid_crush_loc(cct
, loc
))
1414 ret
= validate_weightf(weight
);
1419 // compare quantized (fixed-point integer) weights!
1420 int iweight
= (int)(weight
* (float)0x10000);
1422 if (check_item_loc(cct
, item
, loc
, &old_iweight
)) {
1423 ldout(cct
, 5) << "update_item " << item
<< " already at " << loc
<< dendl
;
1424 if (old_iweight
!= iweight
) {
1425 ldout(cct
, 5) << "update_item " << item
<< " adjusting weight "
1426 << ((float)old_iweight
/(float)0x10000) << " -> " << weight
1428 adjust_item_weight_in_loc(cct
, item
, iweight
, loc
);
1429 ret
= rebuild_roots_with_classes(cct
);
1431 ldout(cct
, 0) << __func__
<< " unable to rebuild roots with classes: "
1432 << cpp_strerror(ret
) << dendl
;
1437 if (get_item_name(item
) != name
) {
1438 ldout(cct
, 5) << "update_item setting " << item
<< " name to " << name
1440 set_item_name(item
, name
);
1444 if (item_exists(item
)) {
1445 remove_item(cct
, item
, true);
1447 ldout(cct
, 5) << "update_item adding " << item
<< " weight " << weight
1448 << " at " << loc
<< dendl
;
1449 ret
= insert_item(cct
, item
, weight
, name
, loc
);
1456 int CrushWrapper::get_item_weight(int id
) const
1458 for (int bidx
= 0; bidx
< crush
->max_buckets
; bidx
++) {
1459 crush_bucket
*b
= crush
->buckets
[bidx
];
1464 for (unsigned i
= 0; i
< b
->size
; i
++)
1465 if (b
->items
[i
] == id
)
1466 return crush_get_bucket_item_weight(b
, i
);
1471 int CrushWrapper::get_item_weight_in_loc(int id
, const map
<string
,string
> &loc
)
1473 for (map
<string
,string
>::const_iterator l
= loc
.begin(); l
!= loc
.end(); ++l
) {
1475 int bid
= get_item_id(l
->second
);
1476 if (!bucket_exists(bid
))
1478 crush_bucket
*b
= get_bucket(bid
);
1479 for (unsigned int i
= 0; i
< b
->size
; i
++) {
1480 if (b
->items
[i
] == id
) {
1481 return crush_get_bucket_item_weight(b
, i
);
1488 int CrushWrapper::adjust_item_weight(CephContext
*cct
, int id
, int weight
,
1489 bool update_weight_sets
)
1491 ldout(cct
, 5) << __func__
<< " " << id
<< " weight " << weight
1492 << " update_weight_sets=" << (int)update_weight_sets
1495 for (int bidx
= 0; bidx
< crush
->max_buckets
; bidx
++) {
1496 if (!crush
->buckets
[bidx
]) {
1499 int r
= adjust_item_weight_in_bucket(cct
, id
, weight
, -1-bidx
,
1500 update_weight_sets
);
1511 int CrushWrapper::adjust_item_weight_in_bucket(
1512 CephContext
*cct
, int id
, int weight
,
1514 bool update_weight_sets
)
1516 ldout(cct
, 5) << __func__
<< " " << id
<< " weight " << weight
1517 << " in bucket " << bucket_id
1518 << " update_weight_sets=" << (int)update_weight_sets
1521 if (!bucket_exists(bucket_id
)) {
1524 crush_bucket
*b
= get_bucket(bucket_id
);
1525 for (unsigned int i
= 0; i
< b
->size
; i
++) {
1526 if (b
->items
[i
] == id
) {
1527 int diff
= bucket_adjust_item_weight(cct
, b
, id
, weight
,
1528 update_weight_sets
);
1529 ldout(cct
, 5) << __func__
<< " " << id
<< " diff " << diff
1530 << " in bucket " << bucket_id
<< dendl
;
1531 adjust_item_weight(cct
, bucket_id
, b
->weight
, false);
1535 // update weight-sets so they continue to sum
1536 for (auto& p
: choose_args
) {
1537 auto &cmap
= p
.second
;
1541 crush_choose_arg
*arg
= &cmap
.args
[-1 - bucket_id
];
1542 if (!arg
->weight_set
) {
1545 ceph_assert(arg
->weight_set_positions
> 0);
1546 vector
<int> w(arg
->weight_set_positions
);
1547 for (unsigned i
= 0; i
< b
->size
; ++i
) {
1548 for (unsigned j
= 0; j
< arg
->weight_set_positions
; ++j
) {
1549 crush_weight_set
*weight_set
= &arg
->weight_set
[j
];
1550 w
[j
] += weight_set
->weights
[i
];
1553 ldout(cct
,5) << __func__
<< " adjusting bucket " << bucket_id
1554 << " cmap " << p
.first
<< " weights to " << w
<< dendl
;
1556 choose_args_adjust_item_weight(cct
, cmap
, bucket_id
, w
, &ss
);
1564 int CrushWrapper::adjust_item_weight_in_loc(
1565 CephContext
*cct
, int id
, int weight
,
1566 const map
<string
,string
>& loc
,
1567 bool update_weight_sets
)
1569 ldout(cct
, 5) << "adjust_item_weight_in_loc " << id
<< " weight " << weight
1571 << " update_weight_sets=" << (int)update_weight_sets
1574 for (auto l
= loc
.begin(); l
!= loc
.end(); ++l
) {
1575 int bid
= get_item_id(l
->second
);
1576 if (!bucket_exists(bid
))
1578 int r
= adjust_item_weight_in_bucket(cct
, id
, weight
, bid
,
1579 update_weight_sets
);
1590 int CrushWrapper::adjust_subtree_weight(CephContext
*cct
, int id
, int weight
,
1591 bool update_weight_sets
)
1593 ldout(cct
, 5) << __func__
<< " " << id
<< " weight " << weight
<< dendl
;
1594 crush_bucket
*b
= get_bucket(id
);
1598 list
<crush_bucket
*> q
;
1600 while (!q
.empty()) {
1603 int local_changed
= 0;
1604 for (unsigned i
=0; i
<b
->size
; ++i
) {
1605 int n
= b
->items
[i
];
1607 adjust_item_weight_in_bucket(cct
, n
, weight
, b
->id
, update_weight_sets
);
1611 crush_bucket
*sub
= get_bucket(n
);
1618 int ret
= rebuild_roots_with_classes(cct
);
1620 ldout(cct
, 0) << __func__
<< " unable to rebuild roots with classes: "
1621 << cpp_strerror(ret
) << dendl
;
1627 bool CrushWrapper::check_item_present(int id
) const
1631 for (int bidx
= 0; bidx
< crush
->max_buckets
; bidx
++) {
1632 crush_bucket
*b
= crush
->buckets
[bidx
];
1635 for (unsigned i
= 0; i
< b
->size
; i
++)
1636 if (b
->items
[i
] == id
)
1643 pair
<string
,string
> CrushWrapper::get_immediate_parent(int id
, int *_ret
) const
1646 for (int bidx
= 0; bidx
< crush
->max_buckets
; bidx
++) {
1647 crush_bucket
*b
= crush
->buckets
[bidx
];
1650 if (is_shadow_item(b
->id
))
1652 for (unsigned i
= 0; i
< b
->size
; i
++)
1653 if (b
->items
[i
] == id
) {
1654 string parent_id
= name_map
.at(b
->id
);
1655 string parent_bucket_type
= type_map
.at(b
->type
);
1658 return make_pair(parent_bucket_type
, parent_id
);
1665 return pair
<string
, string
>();
1668 int CrushWrapper::get_immediate_parent_id(int id
, int *parent
) const
1670 for (int bidx
= 0; bidx
< crush
->max_buckets
; bidx
++) {
1671 crush_bucket
*b
= crush
->buckets
[bidx
];
1674 if (is_shadow_item(b
->id
))
1676 for (unsigned i
= 0; i
< b
->size
; i
++) {
1677 if (b
->items
[i
] == id
) {
1686 int CrushWrapper::get_parent_of_type(int item
, int type
, int rule
) const
1689 // no rule specified
1691 int r
= get_immediate_parent_id(item
, &item
);
1695 } while (get_bucket_type(item
) != type
);
1699 find_takes_by_rule(rule
, &roots
);
1700 for (auto root
: roots
) {
1701 vector
<int> candidates
;
1702 get_children_of_type(root
, type
, &candidates
, false);
1703 for (auto candidate
: candidates
) {
1704 if (subtree_contains(candidate
, item
)) {
1705 // note that here we assure that no two different buckets
1706 // from a single crush rule will share a same device,
1707 // which should generally be true.
1712 return 0; // not found
1715 void CrushWrapper::get_subtree_of_type(int type
, vector
<int> *subtrees
)
1719 for (auto r
: roots
) {
1720 crush_bucket
*b
= get_bucket(r
);
1723 get_children_of_type(b
->id
, type
, subtrees
);
1727 bool CrushWrapper::class_is_in_use(int class_id
, ostream
*ss
)
1729 list
<unsigned> rules
;
1730 for (unsigned i
= 0; i
< crush
->max_rules
; ++i
) {
1731 crush_rule
*r
= crush
->rules
[i
];
1734 for (unsigned j
= 0; j
< r
->len
; ++j
) {
1735 if (r
->steps
[j
].op
== CRUSH_RULE_TAKE
) {
1736 int root
= r
->steps
[j
].arg1
;
1737 for (auto &p
: class_bucket
) {
1739 if (q
.count(class_id
) && q
[class_id
] == root
) {
1746 if (rules
.empty()) {
1751 for (auto &p
: rules
) {
1752 os
<< "'" << get_rule_name(p
) <<"',";
1754 string
out(os
.str());
1755 out
.resize(out
.size() - 1); // drop last ','
1756 *ss
<< "still referenced by crush_rule(s): " << out
;
1761 int CrushWrapper::rename_class(const string
& srcname
, const string
& dstname
)
1763 auto i
= class_rname
.find(srcname
);
1764 if (i
== class_rname
.end())
1766 auto j
= class_rname
.find(dstname
);
1767 if (j
!= class_rname
.end())
1770 int class_id
= i
->second
;
1771 ceph_assert(class_name
.count(class_id
));
1772 // rename any shadow buckets of old class name
1773 for (auto &it
: class_map
) {
1774 if (it
.first
< 0 && it
.second
== class_id
) {
1775 string old_name
= get_item_name(it
.first
);
1776 size_t pos
= old_name
.find("~");
1777 ceph_assert(pos
!= string::npos
);
1778 string name_no_class
= old_name
.substr(0, pos
);
1779 string old_class_name
= old_name
.substr(pos
+ 1);
1780 ceph_assert(old_class_name
== srcname
);
1781 string new_name
= name_no_class
+ "~" + dstname
;
1782 // we do not use set_item_name
1783 // because the name is intentionally invalid
1784 name_map
[it
.first
] = new_name
;
1790 class_rname
.erase(srcname
);
1791 class_name
.erase(class_id
);
1792 class_rname
[dstname
] = class_id
;
1793 class_name
[class_id
] = dstname
;
1797 int CrushWrapper::populate_classes(
1798 const std::map
<int32_t, map
<int32_t, int32_t>>& old_class_bucket
)
1800 // build set of previous used shadow ids
1801 set
<int32_t> used_ids
;
1802 for (auto& p
: old_class_bucket
) {
1803 for (auto& q
: p
.second
) {
1804 used_ids
.insert(q
.second
);
1807 // accumulate weight values for each carg and bucket as we go. because it is
1808 // depth first, we will have the nested bucket weights we need when we
1809 // finish constructing the containing buckets.
1810 map
<int,map
<int,vector
<int>>> cmap_item_weight
; // cargs -> bno -> [bucket weight for each position]
1812 find_nonshadow_roots(&roots
);
1813 for (auto &r
: roots
) {
1816 for (auto &c
: class_name
) {
1818 int res
= device_class_clone(r
, c
.first
, old_class_bucket
, used_ids
,
1819 &clone
, &cmap_item_weight
);
1827 int CrushWrapper::trim_roots_with_class(CephContext
*cct
)
1830 find_shadow_roots(&roots
);
1831 for (auto &r
: roots
) {
1834 int res
= remove_root(cct
, r
);
1838 // there is no need to reweight because we only remove from the
1843 int32_t CrushWrapper::_alloc_class_id() const {
1844 if (class_name
.empty()) {
1847 int32_t class_id
= class_name
.rbegin()->first
+ 1;
1848 if (class_id
>= 0) {
1851 // wrapped, pick a random start and do exhaustive search
1852 uint32_t upperlimit
= std::numeric_limits
<int32_t>::max();
1854 class_id
= rand() % upperlimit
;
1855 const auto start
= class_id
;
1857 if (!class_name
.count(class_id
)) {
1865 } while (class_id
!= start
);
1866 ceph_abort_msg("no available class id");
1869 int CrushWrapper::set_subtree_class(
1870 const string
& subtree
,
1871 const string
& new_class
)
1873 if (!name_exists(subtree
)) {
1877 int new_class_id
= get_or_create_class_id(new_class
);
1878 int id
= get_item_id(subtree
);
1879 list
<int> q
= { id
};
1880 while (!q
.empty()) {
1883 crush_bucket
*b
= get_bucket(id
);
1887 for (unsigned i
= 0; i
< b
->size
; ++i
) {
1888 int item
= b
->items
[i
];
1890 class_map
[item
] = new_class_id
;
1899 int CrushWrapper::reclassify(
1902 const map
<string
,string
>& classify_root
,
1903 const map
<string
,pair
<string
,string
>>& classify_bucket
1906 map
<int,string
> reclassified_bucket
; // orig_id -> class
1909 for (auto& i
: classify_root
) {
1910 string root
= i
.first
;
1911 if (!name_exists(root
)) {
1912 out
<< "root " << root
<< " does not exist" << std::endl
;
1915 int root_id
= get_item_id(root
);
1916 string new_class
= i
.second
;
1917 int new_class_id
= get_or_create_class_id(new_class
);
1918 out
<< "classify_root " << root
<< " (" << root_id
1919 << ") as " << new_class
<< std::endl
;
1922 for (unsigned j
= 0; j
< crush
->max_rules
; j
++) {
1923 if (crush
->rules
[j
]) {
1924 auto rule
= crush
->rules
[j
];
1925 for (unsigned k
= 0; k
< rule
->len
; ++k
) {
1926 if (rule
->steps
[k
].op
== CRUSH_RULE_TAKE
) {
1927 int step_item
= get_rule_arg1(j
, k
);
1930 int res
= split_id_class(step_item
, &original_item
, &c
);
1934 if (original_item
== root_id
) {
1935 out
<< " rule " << j
<< " includes take on root "
1936 << root
<< " class " << c
<< std::endl
;
1945 // rebuild new buckets for root
1946 //cout << "before class_bucket: " << class_bucket << std::endl;
1947 map
<int,int> renumber
;
1949 q
.push_back(root_id
);
1950 while (!q
.empty()) {
1953 crush_bucket
*bucket
= get_bucket(id
);
1954 if (IS_ERR(bucket
)) {
1955 out
<< "cannot find bucket " << id
1956 << ": " << cpp_strerror(PTR_ERR(bucket
)) << std::endl
;
1957 return PTR_ERR(bucket
);
1961 int new_id
= get_new_bucket_id();
1962 out
<< " renumbering bucket " << id
<< " -> " << new_id
<< std::endl
;
1963 renumber
[id
] = new_id
;
1964 crush
->buckets
[-1-new_id
] = bucket
;
1965 bucket
->id
= new_id
;
1966 crush
->buckets
[-1-id
] = crush_make_bucket(crush
,
1971 crush
->buckets
[-1-id
]->id
= id
;
1972 for (auto& i
: choose_args
) {
1973 i
.second
.args
[-1-new_id
] = i
.second
.args
[-1-id
];
1974 memset(&i
.second
.args
[-1-id
], 0, sizeof(i
.second
.args
[0]));
1976 class_bucket
.erase(id
);
1977 class_bucket
[new_id
][new_class_id
] = id
;
1978 name_map
[new_id
] = string(get_item_name(id
));
1979 name_map
[id
] = string(get_item_name(id
)) + "~" + new_class
;
1981 for (unsigned j
= 0; j
< bucket
->size
; ++j
) {
1982 if (bucket
->items
[j
] < 0) {
1983 q
.push_front(bucket
->items
[j
]);
1985 // we don't reclassify the device here; if the users wants that,
1986 // they can pass --set-subtree-class separately.
1990 //cout << "mid class_bucket: " << class_bucket << std::endl;
1992 for (int i
= 0; i
< crush
->max_buckets
; ++i
) {
1993 crush_bucket
*b
= crush
->buckets
[i
];
1997 for (unsigned j
= 0; j
< b
->size
; ++j
) {
1998 if (renumber
.count(b
->items
[j
])) {
1999 b
->items
[j
] = renumber
[b
->items
[j
]];
2004 int r
= rebuild_roots_with_classes(cct
);
2006 out
<< "failed to rebuild_roots_with_classes: " << cpp_strerror(r
)
2010 //cout << "final class_bucket: " << class_bucket << std::endl;
2014 map
<int,int> send_to
; // source bucket -> dest bucket
2015 map
<int,map
<int,int>> new_class_bucket
;
2016 map
<int,string
> new_bucket_names
;
2017 map
<int,map
<string
,string
>> new_buckets
;
2018 map
<string
,int> new_bucket_by_name
;
2019 for (auto& i
: classify_bucket
) {
2020 const string
& match
= i
.first
; // prefix% or %suffix
2021 const string
& new_class
= i
.second
.first
;
2022 const string
& default_parent
= i
.second
.second
;
2023 if (!name_exists(default_parent
)) {
2024 out
<< "default parent " << default_parent
<< " does not exist"
2028 int default_parent_id
= get_item_id(default_parent
);
2029 crush_bucket
*default_parent_bucket
= get_bucket(default_parent_id
);
2030 assert(default_parent_bucket
);
2031 string default_parent_type_name
= get_type_name(default_parent_bucket
->type
);
2033 out
<< "classify_bucket " << match
<< " as " << new_class
2034 << " default bucket " << default_parent
2035 << " (" << default_parent_type_name
<< ")" << std::endl
;
2037 int new_class_id
= get_or_create_class_id(new_class
);
2038 for (int j
= 0; j
< crush
->max_buckets
; ++j
) {
2039 crush_bucket
*b
= crush
->buckets
[j
];
2040 if (!b
|| is_shadow_item(b
->id
)) {
2043 string name
= get_item_name(b
->id
);
2044 if (name
.length() < match
.length()) {
2048 if (match
[0] == '%') {
2049 if (match
.substr(1) != name
.substr(name
.size() - match
.size() + 1)) {
2052 basename
= name
.substr(0, name
.size() - match
.size() + 1);
2053 } else if (match
[match
.size() - 1] == '%') {
2054 if (match
.substr(0, match
.size() - 1) !=
2055 name
.substr(0, match
.size() - 1)) {
2058 basename
= name
.substr(match
.size() - 1);
2059 } else if (match
== name
) {
2060 basename
= default_parent
;
2064 cout
<< "match " << match
<< " to " << name
<< " basename " << basename
2066 // look up or create basename bucket
2068 if (name_exists(basename
)) {
2069 base_id
= get_item_id(basename
);
2070 cout
<< " have base " << base_id
<< std::endl
;
2071 } else if (new_bucket_by_name
.count(basename
)) {
2072 base_id
= new_bucket_by_name
[basename
];
2073 cout
<< " already creating base " << base_id
<< std::endl
;
2075 base_id
= get_new_bucket_id();
2076 crush
->buckets
[-1-base_id
] = crush_make_bucket(crush
,
2081 crush
->buckets
[-1-base_id
]->id
= base_id
;
2082 name_map
[base_id
] = basename
;
2083 new_bucket_by_name
[basename
] = base_id
;
2084 cout
<< " created base " << base_id
<< std::endl
;
2086 new_buckets
[base_id
][default_parent_type_name
] = default_parent
;
2088 send_to
[b
->id
] = base_id
;
2089 new_class_bucket
[base_id
][new_class_id
] = b
->id
;
2090 new_bucket_names
[b
->id
] = basename
+ "~" + get_class_name(new_class_id
);
2092 // make sure devices are classified
2093 for (unsigned i
= 0; i
< b
->size
; ++i
) {
2094 int item
= b
->items
[i
];
2096 class_map
[item
] = new_class_id
;
2102 // no name_exists() works below,
2105 // copy items around
2106 //cout << "send_to " << send_to << std::endl;
2109 for (auto& i
: send_to
) {
2110 crush_bucket
*from
= get_bucket(i
.first
);
2111 crush_bucket
*to
= get_bucket(i
.second
);
2112 cout
<< "moving items from " << from
->id
<< " (" << get_item_name(from
->id
)
2113 << ") to " << to
->id
<< " (" << get_item_name(to
->id
) << ")"
2115 for (unsigned j
= 0; j
< from
->size
; ++j
) {
2116 int item
= from
->items
[j
];
2118 map
<string
,string
> to_loc
;
2119 to_loc
[get_type_name(to
->type
)] = get_item_name(to
->id
);
2121 if (subtree_contains(to
->id
, item
)) {
2124 map
<string
,string
> from_loc
;
2125 from_loc
[get_type_name(from
->type
)] = get_item_name(from
->id
);
2126 auto w
= get_item_weightf_in_loc(item
, from_loc
);
2127 r
= insert_item(cct
, item
,
2129 get_item_name(item
),
2132 if (!send_to
.count(item
)) {
2133 lderr(cct
) << "item " << item
<< " in bucket " << from
->id
2134 << " is not also a reclassified bucket" << dendl
;
2137 int newitem
= send_to
[item
];
2138 if (subtree_contains(to
->id
, newitem
)) {
2141 r
= link_bucket(cct
, newitem
, to_loc
);
2144 cout
<< __func__
<< " err from insert_item: " << cpp_strerror(r
)
2151 // make sure new buckets have parents
2152 for (auto& i
: new_buckets
) {
2154 if (get_immediate_parent_id(i
.first
, &parent
) < 0) {
2155 cout
<< "new bucket " << i
.first
<< " missing parent, adding at "
2156 << i
.second
<< std::endl
;
2157 int r
= link_bucket(cct
, i
.first
, i
.second
);
2159 cout
<< __func__
<< " err from insert_item: " << cpp_strerror(r
)
2166 // set class mappings
2167 //cout << "pre class_bucket: " << class_bucket << std::endl;
2168 for (auto& i
: new_class_bucket
) {
2169 for (auto& j
: i
.second
) {
2170 class_bucket
[i
.first
][j
.first
] = j
.second
;
2174 //cout << "post class_bucket: " << class_bucket << std::endl;
2175 for (auto& i
: new_bucket_names
) {
2176 name_map
[i
.first
] = i
.second
;
2179 int r
= rebuild_roots_with_classes(cct
);
2181 out
<< "failed to rebuild_roots_with_classes: " << cpp_strerror(r
)
2185 //cout << "final class_bucket: " << class_bucket << std::endl;
2190 int CrushWrapper::get_new_bucket_id()
2193 while (crush
->buckets
[-1-id
] &&
2194 -1-id
< crush
->max_buckets
) {
2197 if (-1-id
== crush
->max_buckets
) {
2198 ++crush
->max_buckets
;
2199 crush
->buckets
= (struct crush_bucket
**)realloc(
2201 sizeof(crush
->buckets
[0]) * crush
->max_buckets
);
2202 for (auto& i
: choose_args
) {
2203 assert(i
.second
.size
== (__u32
)crush
->max_buckets
- 1);
2205 i
.second
.args
= (struct crush_choose_arg
*)realloc(
2207 sizeof(i
.second
.args
[0]) * i
.second
.size
);
2213 void CrushWrapper::reweight(CephContext
*cct
)
2216 find_nonshadow_roots(&roots
);
2217 for (auto id
: roots
) {
2220 crush_bucket
*b
= get_bucket(id
);
2221 ldout(cct
, 5) << "reweight root bucket " << id
<< dendl
;
2222 int r
= crush_reweight_bucket(crush
, b
);
2223 ceph_assert(r
== 0);
2225 for (auto& i
: choose_args
) {
2226 //cout << "carg " << i.first << std::endl;
2227 vector
<uint32_t> w
; // discard top-level weights
2228 reweight_bucket(b
, i
.second
, &w
);
2231 int r
= rebuild_roots_with_classes(cct
);
2232 ceph_assert(r
== 0);
2235 void CrushWrapper::reweight_bucket(
2237 crush_choose_arg_map
& arg_map
,
2238 vector
<uint32_t> *weightv
)
2240 int idx
= -1 - b
->id
;
2241 unsigned npos
= arg_map
.args
[idx
].weight_set_positions
;
2242 //cout << __func__ << " " << b->id << " npos " << npos << std::endl;
2243 weightv
->resize(npos
);
2244 for (unsigned i
= 0; i
< b
->size
; ++i
) {
2245 int item
= b
->items
[i
];
2247 for (unsigned pos
= 0; pos
< npos
; ++pos
) {
2248 (*weightv
)[pos
] += arg_map
.args
[idx
].weight_set
->weights
[i
];
2251 vector
<uint32_t> subw(npos
);
2252 crush_bucket
*sub
= get_bucket(item
);
2254 reweight_bucket(sub
, arg_map
, &subw
);
2255 for (unsigned pos
= 0; pos
< npos
; ++pos
) {
2256 (*weightv
)[pos
] += subw
[pos
];
2257 // strash the real bucket weight as the weights for this reference
2258 arg_map
.args
[idx
].weight_set
->weights
[i
] = subw
[pos
];
2262 //cout << __func__ << " finish " << b->id << " " << *weightv << std::endl;
2265 int CrushWrapper::add_simple_rule_at(
2266 string name
, string root_name
,
2267 string failure_domain_name
,
2268 string device_class
,
2269 string mode
, int rule_type
,
2273 if (rule_exists(name
)) {
2275 *err
<< "rule " << name
<< " exists";
2279 if (rule_exists(rno
)) {
2281 *err
<< "rule with ruleno " << rno
<< " exists";
2284 if (ruleset_exists(rno
)) {
2286 *err
<< "ruleset " << rno
<< " exists";
2290 for (rno
= 0; rno
< get_max_rules(); rno
++) {
2291 if (!rule_exists(rno
) && !ruleset_exists(rno
))
2295 if (!name_exists(root_name
)) {
2297 *err
<< "root item " << root_name
<< " does not exist";
2300 int root
= get_item_id(root_name
);
2302 if (failure_domain_name
.length()) {
2303 type
= get_type_id(failure_domain_name
);
2306 *err
<< "unknown type " << failure_domain_name
;
2310 if (device_class
.size()) {
2311 if (!class_exists(device_class
)) {
2313 *err
<< "device class " << device_class
<< " does not exist";
2316 int c
= get_class_id(device_class
);
2317 if (class_bucket
.count(root
) == 0 ||
2318 class_bucket
[root
].count(c
) == 0) {
2320 *err
<< "root " << root_name
<< " has no devices with class "
2324 root
= class_bucket
[root
][c
];
2326 if (mode
!= "firstn" && mode
!= "indep") {
2328 *err
<< "unknown mode " << mode
;
2333 if (mode
== "indep")
2335 int min_rep
= mode
== "firstn" ? 1 : 3;
2336 int max_rep
= mode
== "firstn" ? 10 : 20;
2337 //set the ruleset the same as rule_id(rno)
2338 crush_rule
*rule
= crush_make_rule(steps
, rno
, rule_type
, min_rep
, max_rep
);
2341 if (mode
== "indep") {
2342 crush_rule_set_step(rule
, step
++, CRUSH_RULE_SET_CHOOSELEAF_TRIES
, 5, 0);
2343 crush_rule_set_step(rule
, step
++, CRUSH_RULE_SET_CHOOSE_TRIES
, 100, 0);
2345 crush_rule_set_step(rule
, step
++, CRUSH_RULE_TAKE
, root
, 0);
2347 crush_rule_set_step(rule
, step
++,
2348 mode
== "firstn" ? CRUSH_RULE_CHOOSELEAF_FIRSTN
:
2349 CRUSH_RULE_CHOOSELEAF_INDEP
,
2353 crush_rule_set_step(rule
, step
++,
2354 mode
== "firstn" ? CRUSH_RULE_CHOOSE_FIRSTN
:
2355 CRUSH_RULE_CHOOSE_INDEP
,
2358 crush_rule_set_step(rule
, step
++, CRUSH_RULE_EMIT
, 0, 0);
2360 int ret
= crush_add_rule(crush
, rule
, rno
);
2362 *err
<< "failed to add rule " << rno
<< " because " << cpp_strerror(ret
);
2365 set_rule_name(rno
, name
);
2370 int CrushWrapper::add_simple_rule(
2371 string name
, string root_name
,
2372 string failure_domain_name
,
2373 string device_class
,
2374 string mode
, int rule_type
,
2377 return add_simple_rule_at(name
, root_name
, failure_domain_name
, device_class
,
2379 rule_type
, -1, err
);
2382 float CrushWrapper::_get_take_weight_osd_map(int root
,
2383 map
<int,float> *pmap
) const
2388 //breadth first iterate the OSD tree
2389 while (!q
.empty()) {
2390 int bno
= q
.front();
2392 crush_bucket
*b
= crush
->buckets
[-1-bno
];
2394 for (unsigned j
=0; j
<b
->size
; ++j
) {
2395 int item_id
= b
->items
[j
];
2396 if (item_id
>= 0) { //it's an OSD
2397 float w
= crush_get_bucket_item_weight(b
, j
);
2398 (*pmap
)[item_id
] = w
;
2400 } else { //not an OSD, expand the child later
2401 q
.push_back(item_id
);
2408 void CrushWrapper::_normalize_weight_map(float sum
,
2409 const map
<int,float>& m
,
2410 map
<int,float> *pmap
) const
2413 map
<int,float>::iterator q
= pmap
->find(p
.first
);
2414 if (q
== pmap
->end()) {
2415 (*pmap
)[p
.first
] = p
.second
/ sum
;
2417 q
->second
+= p
.second
/ sum
;
2422 int CrushWrapper::get_take_weight_osd_map(int root
, map
<int,float> *pmap
) const
2425 float sum
= _get_take_weight_osd_map(root
, &m
);
2426 _normalize_weight_map(sum
, m
, pmap
);
2430 int CrushWrapper::get_rule_weight_osd_map(unsigned ruleno
,
2431 map
<int,float> *pmap
) const
2433 if (ruleno
>= crush
->max_rules
)
2435 if (crush
->rules
[ruleno
] == NULL
)
2437 crush_rule
*rule
= crush
->rules
[ruleno
];
2439 // build a weight map for each TAKE in the rule, and then merge them
2441 // FIXME: if there are multiple takes that place a different number of
2442 // objects we do not take that into account. (Also, note that doing this
2443 // right is also a function of the pool, since the crush rule
2444 // might choose 2 + choose 2 but pool size may only be 3.)
2445 for (unsigned i
=0; i
<rule
->len
; ++i
) {
2448 if (rule
->steps
[i
].op
== CRUSH_RULE_TAKE
) {
2449 int n
= rule
->steps
[i
].arg1
;
2454 sum
+= _get_take_weight_osd_map(n
, &m
);
2457 _normalize_weight_map(sum
, m
, pmap
);
2463 int CrushWrapper::remove_rule(int ruleno
)
2465 if (ruleno
>= (int)crush
->max_rules
)
2467 if (crush
->rules
[ruleno
] == NULL
)
2469 crush_destroy_rule(crush
->rules
[ruleno
]);
2470 crush
->rules
[ruleno
] = NULL
;
2471 rule_name_map
.erase(ruleno
);
2473 return rebuild_roots_with_classes(nullptr);
2476 int CrushWrapper::bucket_adjust_item_weight(
2477 CephContext
*cct
, crush_bucket
*bucket
, int item
, int weight
,
2478 bool adjust_weight_sets
)
2480 if (adjust_weight_sets
) {
2482 for (position
= 0; position
< bucket
->size
; position
++)
2483 if (bucket
->items
[position
] == item
)
2485 ceph_assert(position
!= bucket
->size
);
2486 for (auto &w
: choose_args
) {
2487 crush_choose_arg_map
&arg_map
= w
.second
;
2488 crush_choose_arg
*arg
= &arg_map
.args
[-1-bucket
->id
];
2489 for (__u32 j
= 0; j
< arg
->weight_set_positions
; j
++) {
2490 crush_weight_set
*weight_set
= &arg
->weight_set
[j
];
2491 weight_set
->weights
[position
] = weight
;
2495 return crush_bucket_adjust_item_weight(crush
, bucket
, item
, weight
);
2498 int CrushWrapper::add_bucket(
2499 int bucketno
, int alg
, int hash
, int type
, int size
,
2500 int *items
, int *weights
, int *idout
)
2503 alg
= get_default_bucket_alg();
2507 crush_bucket
*b
= crush_make_bucket(crush
, alg
, hash
, type
, size
, items
,
2511 int r
= crush_add_bucket(crush
, bucketno
, b
, idout
);
2512 int pos
= -1 - *idout
;
2513 for (auto& p
: choose_args
) {
2514 crush_choose_arg_map
& cmap
= p
.second
;
2515 unsigned new_size
= crush
->max_buckets
;
2517 if ((int)cmap
.size
< crush
->max_buckets
) {
2518 cmap
.args
= static_cast<crush_choose_arg
*>(realloc(
2520 sizeof(crush_choose_arg
) * new_size
));
2521 ceph_assert(cmap
.args
);
2522 memset(&cmap
.args
[cmap
.size
], 0,
2523 sizeof(crush_choose_arg
) * (new_size
- cmap
.size
));
2524 cmap
.size
= new_size
;
2527 cmap
.args
= static_cast<crush_choose_arg
*>(calloc(sizeof(crush_choose_arg
),
2529 ceph_assert(cmap
.args
);
2530 cmap
.size
= new_size
;
2533 int positions
= get_choose_args_positions(cmap
);
2534 crush_choose_arg
& carg
= cmap
.args
[pos
];
2535 carg
.weight_set
= static_cast<crush_weight_set
*>(calloc(sizeof(crush_weight_set
),
2537 carg
.weight_set_positions
= positions
;
2538 for (int ppos
= 0; ppos
< positions
; ++ppos
) {
2539 carg
.weight_set
[ppos
].weights
= (__u32
*)calloc(sizeof(__u32
), size
);
2540 carg
.weight_set
[ppos
].size
= size
;
2541 for (int bpos
= 0; bpos
< size
; ++bpos
) {
2542 carg
.weight_set
[ppos
].weights
[bpos
] = weights
[bpos
];
2546 assert(crush
->max_buckets
== (int)cmap
.size
);
2551 int CrushWrapper::bucket_add_item(crush_bucket
*bucket
, int item
, int weight
)
2553 __u32 new_size
= bucket
->size
+ 1;
2554 int r
= crush_bucket_add_item(crush
, bucket
, item
, weight
);
2558 for (auto &w
: choose_args
) {
2559 crush_choose_arg_map
&arg_map
= w
.second
;
2560 crush_choose_arg
*arg
= &arg_map
.args
[-1-bucket
->id
];
2561 for (__u32 j
= 0; j
< arg
->weight_set_positions
; j
++) {
2562 crush_weight_set
*weight_set
= &arg
->weight_set
[j
];
2563 weight_set
->weights
= (__u32
*)realloc(weight_set
->weights
,
2564 new_size
* sizeof(__u32
));
2565 ceph_assert(weight_set
->size
+ 1 == new_size
);
2566 weight_set
->weights
[weight_set
->size
] = weight
;
2567 weight_set
->size
= new_size
;
2569 if (arg
->ids_size
) {
2570 arg
->ids
= (__s32
*)realloc(arg
->ids
, new_size
* sizeof(__s32
));
2571 ceph_assert(arg
->ids_size
+ 1 == new_size
);
2572 arg
->ids
[arg
->ids_size
] = item
;
2573 arg
->ids_size
= new_size
;
2579 int CrushWrapper::bucket_remove_item(crush_bucket
*bucket
, int item
)
2581 __u32 new_size
= bucket
->size
- 1;
2583 for (position
= 0; position
< bucket
->size
; position
++)
2584 if (bucket
->items
[position
] == item
)
2586 ceph_assert(position
!= bucket
->size
);
2587 int r
= crush_bucket_remove_item(crush
, bucket
, item
);
2591 for (auto &w
: choose_args
) {
2592 crush_choose_arg_map
&arg_map
= w
.second
;
2593 crush_choose_arg
*arg
= &arg_map
.args
[-1-bucket
->id
];
2594 for (__u32 j
= 0; j
< arg
->weight_set_positions
; j
++) {
2595 crush_weight_set
*weight_set
= &arg
->weight_set
[j
];
2596 ceph_assert(weight_set
->size
- 1 == new_size
);
2597 for (__u32 k
= position
; k
< new_size
; k
++)
2598 weight_set
->weights
[k
] = weight_set
->weights
[k
+1];
2600 weight_set
->weights
= (__u32
*)realloc(weight_set
->weights
,
2601 new_size
* sizeof(__u32
));
2603 free(weight_set
->weights
);
2604 weight_set
->weights
= NULL
;
2606 weight_set
->size
= new_size
;
2608 if (arg
->ids_size
) {
2609 ceph_assert(arg
->ids_size
- 1 == new_size
);
2610 for (__u32 k
= position
; k
< new_size
; k
++)
2611 arg
->ids
[k
] = arg
->ids
[k
+1];
2613 arg
->ids
= (__s32
*)realloc(arg
->ids
, new_size
* sizeof(__s32
));
2618 arg
->ids_size
= new_size
;
2624 int CrushWrapper::bucket_set_alg(int bid
, int alg
)
2626 crush_bucket
*b
= get_bucket(bid
);
2634 int CrushWrapper::update_device_class(int id
,
2635 const string
& class_name
,
2639 ceph_assert(item_exists(id
));
2640 auto old_class_name
= get_item_class(id
);
2641 if (old_class_name
&& old_class_name
!= class_name
) {
2642 *ss
<< "osd." << id
<< " has already bound to class '" << old_class_name
2643 << "', can not reset class to '" << class_name
<< "'; "
2644 << "use 'ceph osd crush rm-device-class <id>' to "
2645 << "remove old class first";
2649 int class_id
= get_or_create_class_id(class_name
);
2651 *ss
<< name
<< " id " << id
<< " is negative";
2655 if (class_map
.count(id
) != 0 && class_map
[id
] == class_id
) {
2656 *ss
<< name
<< " already set to class " << class_name
<< ". ";
2660 set_item_class(id
, class_id
);
2662 int r
= rebuild_roots_with_classes(nullptr);
2668 int CrushWrapper::remove_device_class(CephContext
*cct
, int id
, ostream
*ss
)
2671 const char *name
= get_item_name(id
);
2673 *ss
<< "osd." << id
<< " does not have a name";
2677 const char *class_name
= get_item_class(id
);
2679 *ss
<< "osd." << id
<< " has not been bound to a specific class yet";
2682 class_remove_item(id
);
2684 int r
= rebuild_roots_with_classes(cct
);
2686 *ss
<< "unable to rebuild roots with class '" << class_name
<< "' "
2687 << "of osd." << id
<< ": " << cpp_strerror(r
);
2693 int CrushWrapper::device_class_clone(
2694 int original_id
, int device_class
,
2695 const std::map
<int32_t, map
<int32_t, int32_t>>& old_class_bucket
,
2696 const std::set
<int32_t>& used_ids
,
2698 map
<int,map
<int,vector
<int>>> *cmap_item_weight
)
2700 const char *item_name
= get_item_name(original_id
);
2701 if (item_name
== NULL
)
2703 const char *class_name
= get_class_name(device_class
);
2704 if (class_name
== NULL
)
2706 string copy_name
= item_name
+ string("~") + class_name
;
2707 if (name_exists(copy_name
)) {
2708 *clone
= get_item_id(copy_name
);
2712 crush_bucket
*original
= get_bucket(original_id
);
2713 ceph_assert(!IS_ERR(original
));
2714 crush_bucket
*copy
= crush_make_bucket(crush
,
2721 vector
<unsigned> item_orig_pos
; // new item pos -> orig item pos
2722 for (unsigned i
= 0; i
< original
->size
; i
++) {
2723 int item
= original
->items
[i
];
2724 int weight
= crush_get_bucket_item_weight(original
, i
);
2726 if (class_map
.count(item
) != 0 && class_map
[item
] == device_class
) {
2727 int res
= crush_bucket_add_item(crush
, copy
, item
, weight
);
2735 int res
= device_class_clone(item
, device_class
, old_class_bucket
,
2736 used_ids
, &child_copy_id
,
2740 crush_bucket
*child_copy
= get_bucket(child_copy_id
);
2741 ceph_assert(!IS_ERR(child_copy
));
2742 res
= crush_bucket_add_item(crush
, copy
, child_copy_id
,
2743 child_copy
->weight
);
2747 item_orig_pos
.push_back(i
);
2749 ceph_assert(item_orig_pos
.size() == copy
->size
);
2752 if (old_class_bucket
.count(original_id
) &&
2753 old_class_bucket
.at(original_id
).count(device_class
)) {
2754 bno
= old_class_bucket
.at(original_id
).at(device_class
);
2756 // pick a new shadow bucket id that is not used by the current map
2757 // *or* any previous shadow buckets.
2759 while (((-1-bno
) < crush
->max_buckets
&& crush
->buckets
[-1-bno
]) ||
2760 used_ids
.count(bno
)) {
2764 int res
= crush_add_bucket(crush
, bno
, copy
, clone
);
2767 ceph_assert(!bno
|| bno
== *clone
);
2769 res
= set_item_class(*clone
, device_class
);
2773 // we do not use set_item_name because the name is intentionally invalid
2774 name_map
[*clone
] = copy_name
;
2776 name_rmap
[copy_name
] = *clone
;
2777 class_bucket
[original_id
][device_class
] = *clone
;
2779 // set up choose_args for the new bucket.
2780 for (auto& w
: choose_args
) {
2781 crush_choose_arg_map
& cmap
= w
.second
;
2782 if (crush
->max_buckets
> (int)cmap
.size
) {
2783 unsigned new_size
= crush
->max_buckets
;
2784 cmap
.args
= static_cast<crush_choose_arg
*>(realloc(cmap
.args
,
2785 new_size
* sizeof(cmap
.args
[0])));
2786 ceph_assert(cmap
.args
);
2787 memset(cmap
.args
+ cmap
.size
, 0,
2788 (new_size
- cmap
.size
) * sizeof(cmap
.args
[0]));
2789 cmap
.size
= new_size
;
2791 auto& o
= cmap
.args
[-1-original_id
];
2792 auto& n
= cmap
.args
[-1-bno
];
2793 n
.ids_size
= 0; // FIXME: implement me someday
2794 n
.weight_set_positions
= o
.weight_set_positions
;
2795 n
.weight_set
= static_cast<crush_weight_set
*>(calloc(
2796 n
.weight_set_positions
, sizeof(crush_weight_set
)));
2797 for (size_t s
= 0; s
< n
.weight_set_positions
; ++s
) {
2798 n
.weight_set
[s
].size
= copy
->size
;
2799 n
.weight_set
[s
].weights
= (__u32
*)calloc(copy
->size
, sizeof(__u32
));
2801 for (size_t s
= 0; s
< n
.weight_set_positions
; ++s
) {
2802 vector
<int> bucket_weights(n
.weight_set_positions
);
2803 for (size_t i
= 0; i
< copy
->size
; ++i
) {
2804 int item
= copy
->items
[i
];
2806 n
.weight_set
[s
].weights
[i
] = o
.weight_set
[s
].weights
[item_orig_pos
[i
]];
2807 } else if ((*cmap_item_weight
)[w
.first
].count(item
)) {
2808 n
.weight_set
[s
].weights
[i
] = (*cmap_item_weight
)[w
.first
][item
][s
];
2810 n
.weight_set
[s
].weights
[i
] = 0;
2812 bucket_weights
[s
] += n
.weight_set
[s
].weights
[i
];
2814 (*cmap_item_weight
)[w
.first
][bno
] = bucket_weights
;
2820 int CrushWrapper::get_rules_by_class(const string
&class_name
, set
<int> *rules
)
2824 if (!class_exists(class_name
)) {
2827 int class_id
= get_class_id(class_name
);
2828 for (unsigned i
= 0; i
< crush
->max_rules
; ++i
) {
2829 crush_rule
*r
= crush
->rules
[i
];
2832 for (unsigned j
= 0; j
< r
->len
; ++j
) {
2833 if (r
->steps
[j
].op
== CRUSH_RULE_TAKE
) {
2834 int step_item
= r
->steps
[j
].arg1
;
2837 int res
= split_id_class(step_item
, &original_item
, &c
);
2841 if (c
!= -1 && c
== class_id
) {
2851 // return rules that might reference the given osd
2852 int CrushWrapper::get_rules_by_osd(int osd
, set
<int> *rules
)
2859 for (unsigned i
= 0; i
< crush
->max_rules
; ++i
) {
2860 crush_rule
*r
= crush
->rules
[i
];
2863 for (unsigned j
= 0; j
< r
->len
; ++j
) {
2864 if (r
->steps
[j
].op
== CRUSH_RULE_TAKE
) {
2865 int step_item
= r
->steps
[j
].arg1
;
2866 list
<int> unordered
;
2867 int rc
= _get_leaves(step_item
, &unordered
);
2869 return rc
; // propagate fatal errors!
2872 for (auto &o
: unordered
) {
2873 ceph_assert(o
>= 0);
2889 bool CrushWrapper::_class_is_dead(int class_id
)
2891 for (auto &p
: class_map
) {
2892 if (p
.first
>= 0 && p
.second
== class_id
) {
2896 for (unsigned i
= 0; i
< crush
->max_rules
; ++i
) {
2897 crush_rule
*r
= crush
->rules
[i
];
2900 for (unsigned j
= 0; j
< r
->len
; ++j
) {
2901 if (r
->steps
[j
].op
== CRUSH_RULE_TAKE
) {
2902 int root
= r
->steps
[j
].arg1
;
2903 for (auto &p
: class_bucket
) {
2905 if (q
.count(class_id
) && q
[class_id
] == root
) {
2912 // no more referenced by any devices or crush rules
2916 void CrushWrapper::cleanup_dead_classes()
2918 auto p
= class_name
.begin();
2919 while (p
!= class_name
.end()) {
2920 if (_class_is_dead(p
->first
)) {
2921 string n
= p
->second
;
2923 remove_class_name(n
);
2930 int CrushWrapper::rebuild_roots_with_classes(CephContext
*cct
)
2932 std::map
<int32_t, map
<int32_t, int32_t> > old_class_bucket
= class_bucket
;
2933 cleanup_dead_classes();
2934 int r
= trim_roots_with_class(cct
);
2937 class_bucket
.clear();
2938 return populate_classes(old_class_bucket
);
2941 void CrushWrapper::encode(bufferlist
& bl
, uint64_t features
) const
2946 __u32 magic
= CRUSH_MAGIC
;
2949 encode(crush
->max_buckets
, bl
);
2950 encode(crush
->max_rules
, bl
);
2951 encode(crush
->max_devices
, bl
);
2953 bool encode_compat_choose_args
= false;
2954 crush_choose_arg_map arg_map
;
2955 memset(&arg_map
, '\0', sizeof(arg_map
));
2956 if (has_choose_args() &&
2957 !HAVE_FEATURE(features
, CRUSH_CHOOSE_ARGS
)) {
2958 ceph_assert(!has_incompat_choose_args());
2959 encode_compat_choose_args
= true;
2960 arg_map
= choose_args
.begin()->second
;
2964 for (int i
=0; i
<crush
->max_buckets
; i
++) {
2966 if (crush
->buckets
[i
]) alg
= crush
->buckets
[i
]->alg
;
2971 encode(crush
->buckets
[i
]->id
, bl
);
2972 encode(crush
->buckets
[i
]->type
, bl
);
2973 encode(crush
->buckets
[i
]->alg
, bl
);
2974 encode(crush
->buckets
[i
]->hash
, bl
);
2975 encode(crush
->buckets
[i
]->weight
, bl
);
2976 encode(crush
->buckets
[i
]->size
, bl
);
2977 for (unsigned j
=0; j
<crush
->buckets
[i
]->size
; j
++)
2978 encode(crush
->buckets
[i
]->items
[j
], bl
);
2980 switch (crush
->buckets
[i
]->alg
) {
2981 case CRUSH_BUCKET_UNIFORM
:
2982 encode((reinterpret_cast<crush_bucket_uniform
*>(crush
->buckets
[i
]))->item_weight
, bl
);
2985 case CRUSH_BUCKET_LIST
:
2986 for (unsigned j
=0; j
<crush
->buckets
[i
]->size
; j
++) {
2987 encode((reinterpret_cast<crush_bucket_list
*>(crush
->buckets
[i
]))->item_weights
[j
], bl
);
2988 encode((reinterpret_cast<crush_bucket_list
*>(crush
->buckets
[i
]))->sum_weights
[j
], bl
);
2992 case CRUSH_BUCKET_TREE
:
2993 encode((reinterpret_cast<crush_bucket_tree
*>(crush
->buckets
[i
]))->num_nodes
, bl
);
2994 for (unsigned j
=0; j
<(reinterpret_cast<crush_bucket_tree
*>(crush
->buckets
[i
]))->num_nodes
; j
++)
2995 encode((reinterpret_cast<crush_bucket_tree
*>(crush
->buckets
[i
]))->node_weights
[j
], bl
);
2998 case CRUSH_BUCKET_STRAW
:
2999 for (unsigned j
=0; j
<crush
->buckets
[i
]->size
; j
++) {
3000 encode((reinterpret_cast<crush_bucket_straw
*>(crush
->buckets
[i
]))->item_weights
[j
], bl
);
3001 encode((reinterpret_cast<crush_bucket_straw
*>(crush
->buckets
[i
]))->straws
[j
], bl
);
3005 case CRUSH_BUCKET_STRAW2
:
3008 if (encode_compat_choose_args
&&
3009 arg_map
.args
[i
].weight_set_positions
> 0) {
3010 weights
= arg_map
.args
[i
].weight_set
[0].weights
;
3012 weights
= (reinterpret_cast<crush_bucket_straw2
*>(crush
->buckets
[i
]))->item_weights
;
3014 for (unsigned j
=0; j
<crush
->buckets
[i
]->size
; j
++) {
3015 encode(weights
[j
], bl
);
3027 for (unsigned i
=0; i
<crush
->max_rules
; i
++) {
3028 __u32 yes
= crush
->rules
[i
] ? 1:0;
3033 encode(crush
->rules
[i
]->len
, bl
);
3034 encode(crush
->rules
[i
]->mask
, bl
);
3035 for (unsigned j
=0; j
<crush
->rules
[i
]->len
; j
++)
3036 encode(crush
->rules
[i
]->steps
[j
], bl
);
3040 encode(type_map
, bl
);
3041 encode(name_map
, bl
);
3042 encode(rule_name_map
, bl
);
3045 encode(crush
->choose_local_tries
, bl
);
3046 encode(crush
->choose_local_fallback_tries
, bl
);
3047 encode(crush
->choose_total_tries
, bl
);
3048 encode(crush
->chooseleaf_descend_once
, bl
);
3049 encode(crush
->chooseleaf_vary_r
, bl
);
3050 encode(crush
->straw_calc_version
, bl
);
3051 encode(crush
->allowed_bucket_algs
, bl
);
3052 if (features
& CEPH_FEATURE_CRUSH_TUNABLES5
) {
3053 encode(crush
->chooseleaf_stable
, bl
);
3056 if (HAVE_FEATURE(features
, SERVER_LUMINOUS
)) {
3058 encode(class_map
, bl
);
3059 encode(class_name
, bl
);
3060 encode(class_bucket
, bl
);
3063 __u32 size
= (__u32
)choose_args
.size();
3065 for (auto c
: choose_args
) {
3066 encode(c
.first
, bl
);
3067 crush_choose_arg_map arg_map
= c
.second
;
3069 for (__u32 i
= 0; i
< arg_map
.size
; i
++) {
3070 crush_choose_arg
*arg
= &arg_map
.args
[i
];
3071 if (arg
->weight_set_positions
== 0 &&
3077 for (__u32 i
= 0; i
< arg_map
.size
; i
++) {
3078 crush_choose_arg
*arg
= &arg_map
.args
[i
];
3079 if (arg
->weight_set_positions
== 0 &&
3083 encode(arg
->weight_set_positions
, bl
);
3084 for (__u32 j
= 0; j
< arg
->weight_set_positions
; j
++) {
3085 crush_weight_set
*weight_set
= &arg
->weight_set
[j
];
3086 encode(weight_set
->size
, bl
);
3087 for (__u32 k
= 0; k
< weight_set
->size
; k
++)
3088 encode(weight_set
->weights
[k
], bl
);
3090 encode(arg
->ids_size
, bl
);
3091 for (__u32 j
= 0; j
< arg
->ids_size
; j
++)
3092 encode(arg
->ids
[j
], bl
);
3098 static void decode_32_or_64_string_map(map
<int32_t,string
>& m
, bufferlist::const_iterator
& blp
)
3108 decode(strlen
, blp
);
3110 // der, key was actually 64-bits!
3111 decode(strlen
, blp
);
3113 decode_nohead(strlen
, m
[key
], blp
);
3117 void CrushWrapper::decode(bufferlist::const_iterator
& blp
)
3124 if (magic
!= CRUSH_MAGIC
)
3125 throw ceph::buffer::malformed_input("bad magic number");
3127 decode(crush
->max_buckets
, blp
);
3128 decode(crush
->max_rules
, blp
);
3129 decode(crush
->max_devices
, blp
);
3131 // legacy tunables, unless we decode something newer
3132 set_tunables_legacy();
3136 crush
->buckets
= (crush_bucket
**)calloc(1, crush
->max_buckets
* sizeof(crush_bucket
*));
3137 for (int i
=0; i
<crush
->max_buckets
; i
++) {
3138 decode_crush_bucket(&crush
->buckets
[i
], blp
);
3142 crush
->rules
= (crush_rule
**)calloc(1, crush
->max_rules
* sizeof(crush_rule
*));
3143 for (unsigned i
= 0; i
< crush
->max_rules
; ++i
) {
3147 crush
->rules
[i
] = NULL
;
3153 crush
->rules
[i
] = reinterpret_cast<crush_rule
*>(calloc(1, crush_rule_size(len
)));
3154 crush
->rules
[i
]->len
= len
;
3155 decode(crush
->rules
[i
]->mask
, blp
);
3156 for (unsigned j
=0; j
<crush
->rules
[i
]->len
; j
++)
3157 decode(crush
->rules
[i
]->steps
[j
], blp
);
3161 // NOTE: we had a bug where we were incoding int instead of int32, which means the
3162 // 'key' field for these maps may be either 32 or 64 bits, depending. tolerate
3163 // both by assuming the string is always non-empty.
3164 decode_32_or_64_string_map(type_map
, blp
);
3165 decode_32_or_64_string_map(name_map
, blp
);
3166 decode_32_or_64_string_map(rule_name_map
, blp
);
3170 decode(crush
->choose_local_tries
, blp
);
3171 decode(crush
->choose_local_fallback_tries
, blp
);
3172 decode(crush
->choose_total_tries
, blp
);
3175 decode(crush
->chooseleaf_descend_once
, blp
);
3178 decode(crush
->chooseleaf_vary_r
, blp
);
3181 decode(crush
->straw_calc_version
, blp
);
3184 decode(crush
->allowed_bucket_algs
, blp
);
3187 decode(crush
->chooseleaf_stable
, blp
);
3190 decode(class_map
, blp
);
3191 decode(class_name
, blp
);
3192 for (auto &c
: class_name
)
3193 class_rname
[c
.second
] = c
.first
;
3194 decode(class_bucket
, blp
);
3197 __u32 choose_args_size
;
3198 decode(choose_args_size
, blp
);
3199 for (__u32 i
= 0; i
< choose_args_size
; i
++) {
3200 typename
decltype(choose_args
)::key_type choose_args_index
;
3201 decode(choose_args_index
, blp
);
3202 crush_choose_arg_map arg_map
;
3203 arg_map
.size
= crush
->max_buckets
;
3204 arg_map
.args
= static_cast<crush_choose_arg
*>(calloc(
3205 arg_map
.size
, sizeof(crush_choose_arg
)));
3208 for (__u32 j
= 0; j
< size
; j
++) {
3210 decode(bucket_index
, blp
);
3211 ceph_assert(bucket_index
< arg_map
.size
);
3212 crush_choose_arg
*arg
= &arg_map
.args
[bucket_index
];
3213 decode(arg
->weight_set_positions
, blp
);
3214 if (arg
->weight_set_positions
) {
3215 arg
->weight_set
= static_cast<crush_weight_set
*>(calloc(
3216 arg
->weight_set_positions
, sizeof(crush_weight_set
)));
3217 for (__u32 k
= 0; k
< arg
->weight_set_positions
; k
++) {
3218 crush_weight_set
*weight_set
= &arg
->weight_set
[k
];
3219 decode(weight_set
->size
, blp
);
3220 weight_set
->weights
= (__u32
*)calloc(
3221 weight_set
->size
, sizeof(__u32
));
3222 for (__u32 l
= 0; l
< weight_set
->size
; l
++)
3223 decode(weight_set
->weights
[l
], blp
);
3226 decode(arg
->ids_size
, blp
);
3227 if (arg
->ids_size
) {
3228 ceph_assert(arg
->ids_size
== crush
->buckets
[bucket_index
]->size
);
3229 arg
->ids
= (__s32
*)calloc(arg
->ids_size
, sizeof(__s32
));
3230 for (__u32 k
= 0; k
< arg
->ids_size
; k
++)
3231 decode(arg
->ids
[k
], blp
);
3234 choose_args
[choose_args_index
] = arg_map
;
3237 update_choose_args(nullptr); // in case we decode a legacy "corrupted" map
3241 crush_destroy(crush
);
3246 void CrushWrapper::decode_crush_bucket(crush_bucket
** bptr
, bufferlist::const_iterator
&blp
)
3258 case CRUSH_BUCKET_UNIFORM
:
3259 size
= sizeof(crush_bucket_uniform
);
3261 case CRUSH_BUCKET_LIST
:
3262 size
= sizeof(crush_bucket_list
);
3264 case CRUSH_BUCKET_TREE
:
3265 size
= sizeof(crush_bucket_tree
);
3267 case CRUSH_BUCKET_STRAW
:
3268 size
= sizeof(crush_bucket_straw
);
3270 case CRUSH_BUCKET_STRAW2
:
3271 size
= sizeof(crush_bucket_straw2
);
3276 snprintf(str
, sizeof(str
), "unsupported bucket algorithm: %d", alg
);
3277 throw ceph::buffer::malformed_input(str
);
3280 crush_bucket
*bucket
= reinterpret_cast<crush_bucket
*>(calloc(1, size
));
3283 decode(bucket
->id
, blp
);
3284 decode(bucket
->type
, blp
);
3285 decode(bucket
->alg
, blp
);
3286 decode(bucket
->hash
, blp
);
3287 decode(bucket
->weight
, blp
);
3288 decode(bucket
->size
, blp
);
3290 bucket
->items
= (__s32
*)calloc(1, bucket
->size
* sizeof(__s32
));
3291 for (unsigned j
= 0; j
< bucket
->size
; ++j
) {
3292 decode(bucket
->items
[j
], blp
);
3295 switch (bucket
->alg
) {
3296 case CRUSH_BUCKET_UNIFORM
:
3297 decode((reinterpret_cast<crush_bucket_uniform
*>(bucket
))->item_weight
, blp
);
3300 case CRUSH_BUCKET_LIST
: {
3301 crush_bucket_list
* cbl
= reinterpret_cast<crush_bucket_list
*>(bucket
);
3302 cbl
->item_weights
= (__u32
*)calloc(1, bucket
->size
* sizeof(__u32
));
3303 cbl
->sum_weights
= (__u32
*)calloc(1, bucket
->size
* sizeof(__u32
));
3305 for (unsigned j
= 0; j
< bucket
->size
; ++j
) {
3306 decode(cbl
->item_weights
[j
], blp
);
3307 decode(cbl
->sum_weights
[j
], blp
);
3312 case CRUSH_BUCKET_TREE
: {
3313 crush_bucket_tree
* cbt
= reinterpret_cast<crush_bucket_tree
*>(bucket
);
3314 decode(cbt
->num_nodes
, blp
);
3315 cbt
->node_weights
= (__u32
*)calloc(1, cbt
->num_nodes
* sizeof(__u32
));
3316 for (unsigned j
=0; j
<cbt
->num_nodes
; j
++) {
3317 decode(cbt
->node_weights
[j
], blp
);
3322 case CRUSH_BUCKET_STRAW
: {
3323 crush_bucket_straw
* cbs
= reinterpret_cast<crush_bucket_straw
*>(bucket
);
3324 cbs
->straws
= (__u32
*)calloc(1, bucket
->size
* sizeof(__u32
));
3325 cbs
->item_weights
= (__u32
*)calloc(1, bucket
->size
* sizeof(__u32
));
3326 for (unsigned j
= 0; j
< bucket
->size
; ++j
) {
3327 decode(cbs
->item_weights
[j
], blp
);
3328 decode(cbs
->straws
[j
], blp
);
3333 case CRUSH_BUCKET_STRAW2
: {
3334 crush_bucket_straw2
* cbs
= reinterpret_cast<crush_bucket_straw2
*>(bucket
);
3335 cbs
->item_weights
= (__u32
*)calloc(1, bucket
->size
* sizeof(__u32
));
3336 for (unsigned j
= 0; j
< bucket
->size
; ++j
) {
3337 decode(cbs
->item_weights
[j
], blp
);
3343 // We should have handled this case in the first switch statement
3350 void CrushWrapper::dump(Formatter
*f
) const
3352 f
->open_array_section("devices");
3353 for (int i
=0; i
<get_max_devices(); i
++) {
3354 f
->open_object_section("device");
3355 f
->dump_int("id", i
);
3356 const char *n
= get_item_name(i
);
3358 f
->dump_string("name", n
);
3361 sprintf(name
, "device%d", i
);
3362 f
->dump_string("name", name
);
3364 const char *device_class
= get_item_class(i
);
3365 if (device_class
!= NULL
)
3366 f
->dump_string("class", device_class
);
3371 f
->open_array_section("types");
3372 int n
= get_num_type_names();
3373 for (int i
=0; n
; i
++) {
3374 const char *name
= get_type_name(i
);
3377 f
->open_object_section("type");
3378 f
->dump_int("type_id", 0);
3379 f
->dump_string("name", "device");
3385 f
->open_object_section("type");
3386 f
->dump_int("type_id", i
);
3387 f
->dump_string("name", name
);
3392 f
->open_array_section("buckets");
3393 for (int bucket
= -1; bucket
> -1-get_max_buckets(); --bucket
) {
3394 if (!bucket_exists(bucket
))
3396 f
->open_object_section("bucket");
3397 f
->dump_int("id", bucket
);
3398 if (get_item_name(bucket
))
3399 f
->dump_string("name", get_item_name(bucket
));
3400 f
->dump_int("type_id", get_bucket_type(bucket
));
3401 if (get_type_name(get_bucket_type(bucket
)))
3402 f
->dump_string("type_name", get_type_name(get_bucket_type(bucket
)));
3403 f
->dump_int("weight", get_bucket_weight(bucket
));
3404 f
->dump_string("alg", crush_bucket_alg_name(get_bucket_alg(bucket
)));
3405 f
->dump_string("hash", crush_hash_name(get_bucket_hash(bucket
)));
3406 f
->open_array_section("items");
3407 for (int j
=0; j
<get_bucket_size(bucket
); j
++) {
3408 f
->open_object_section("item");
3409 f
->dump_int("id", get_bucket_item(bucket
, j
));
3410 f
->dump_int("weight", get_bucket_item_weight(bucket
, j
));
3411 f
->dump_int("pos", j
);
3419 f
->open_array_section("rules");
3423 f
->open_object_section("tunables");
3427 dump_choose_args(f
);
3431 // depth first walker
3433 typedef CrushTreeDumper::Item Item
;
3434 const CrushWrapper
*crush
;
3435 const CrushTreeDumper::name_map_t
& weight_set_names
;
3437 explicit TreeDumper(const CrushWrapper
*crush
,
3438 const CrushTreeDumper::name_map_t
& wsnames
)
3439 : crush(crush
), weight_set_names(wsnames
) {}
3441 void dump(Formatter
*f
) {
3443 crush
->find_roots(&roots
);
3444 for (set
<int>::iterator root
= roots
.begin(); root
!= roots
.end(); ++root
) {
3445 dump_item(Item(*root
, 0, 0, crush
->get_bucket_weightf(*root
)), f
);
3450 void dump_item(const Item
& qi
, Formatter
* f
) {
3451 if (qi
.is_bucket()) {
3452 f
->open_object_section("bucket");
3453 CrushTreeDumper::dump_item_fields(crush
, weight_set_names
, qi
, f
);
3454 dump_bucket_children(qi
, f
);
3457 f
->open_object_section("device");
3458 CrushTreeDumper::dump_item_fields(crush
, weight_set_names
, qi
, f
);
3463 void dump_bucket_children(const Item
& parent
, Formatter
* f
) {
3464 f
->open_array_section("items");
3465 const int max_pos
= crush
->get_bucket_size(parent
.id
);
3466 for (int pos
= 0; pos
< max_pos
; pos
++) {
3467 int id
= crush
->get_bucket_item(parent
.id
, pos
);
3468 float weight
= crush
->get_bucket_item_weightf(parent
.id
, pos
);
3469 dump_item(Item(id
, parent
.id
, parent
.depth
+ 1, weight
), f
);
3476 void CrushWrapper::dump_tree(
3478 const CrushTreeDumper::name_map_t
& weight_set_names
) const
3481 TreeDumper(this, weight_set_names
).dump(f
);
3484 void CrushWrapper::dump_tunables(Formatter
*f
) const
3486 f
->dump_int("choose_local_tries", get_choose_local_tries());
3487 f
->dump_int("choose_local_fallback_tries", get_choose_local_fallback_tries());
3488 f
->dump_int("choose_total_tries", get_choose_total_tries());
3489 f
->dump_int("chooseleaf_descend_once", get_chooseleaf_descend_once());
3490 f
->dump_int("chooseleaf_vary_r", get_chooseleaf_vary_r());
3491 f
->dump_int("chooseleaf_stable", get_chooseleaf_stable());
3492 f
->dump_int("straw_calc_version", get_straw_calc_version());
3493 f
->dump_int("allowed_bucket_algs", get_allowed_bucket_algs());
3495 // be helpful about it
3496 if (has_jewel_tunables())
3497 f
->dump_string("profile", "jewel");
3498 else if (has_hammer_tunables())
3499 f
->dump_string("profile", "hammer");
3500 else if (has_firefly_tunables())
3501 f
->dump_string("profile", "firefly");
3502 else if (has_bobtail_tunables())
3503 f
->dump_string("profile", "bobtail");
3504 else if (has_argonaut_tunables())
3505 f
->dump_string("profile", "argonaut");
3507 f
->dump_string("profile", "unknown");
3508 f
->dump_int("optimal_tunables", (int)has_optimal_tunables());
3509 f
->dump_int("legacy_tunables", (int)has_legacy_tunables());
3511 // be helpful about minimum version required
3512 f
->dump_string("minimum_required_version", get_min_required_version());
3514 f
->dump_int("require_feature_tunables", (int)has_nondefault_tunables());
3515 f
->dump_int("require_feature_tunables2", (int)has_nondefault_tunables2());
3516 f
->dump_int("has_v2_rules", (int)has_v2_rules());
3517 f
->dump_int("require_feature_tunables3", (int)has_nondefault_tunables3());
3518 f
->dump_int("has_v3_rules", (int)has_v3_rules());
3519 f
->dump_int("has_v4_buckets", (int)has_v4_buckets());
3520 f
->dump_int("require_feature_tunables5", (int)has_nondefault_tunables5());
3521 f
->dump_int("has_v5_rules", (int)has_v5_rules());
3524 void CrushWrapper::dump_choose_args(Formatter
*f
) const
3526 f
->open_object_section("choose_args");
3527 for (auto c
: choose_args
) {
3528 crush_choose_arg_map arg_map
= c
.second
;
3529 f
->open_array_section(stringify(c
.first
).c_str());
3530 for (__u32 i
= 0; i
< arg_map
.size
; i
++) {
3531 crush_choose_arg
*arg
= &arg_map
.args
[i
];
3532 if (arg
->weight_set_positions
== 0 &&
3535 f
->open_object_section("choose_args");
3536 int bucket_index
= i
;
3537 f
->dump_int("bucket_id", -1-bucket_index
);
3538 if (arg
->weight_set_positions
> 0) {
3539 f
->open_array_section("weight_set");
3540 for (__u32 j
= 0; j
< arg
->weight_set_positions
; j
++) {
3541 f
->open_array_section("weights");
3542 __u32
*weights
= arg
->weight_set
[j
].weights
;
3543 __u32 size
= arg
->weight_set
[j
].size
;
3544 for (__u32 k
= 0; k
< size
; k
++) {
3545 f
->dump_float("weight", (float)weights
[k
]/(float)0x10000);
3551 if (arg
->ids_size
> 0) {
3552 f
->open_array_section("ids");
3553 for (__u32 j
= 0; j
< arg
->ids_size
; j
++)
3554 f
->dump_int("id", arg
->ids
[j
]);
3564 void CrushWrapper::dump_rules(Formatter
*f
) const
3566 for (int i
=0; i
<get_max_rules(); i
++) {
3567 if (!rule_exists(i
))
3573 void CrushWrapper::dump_rule(int ruleset
, Formatter
*f
) const
3575 f
->open_object_section("rule");
3576 f
->dump_int("rule_id", ruleset
);
3577 if (get_rule_name(ruleset
))
3578 f
->dump_string("rule_name", get_rule_name(ruleset
));
3579 f
->dump_int("ruleset", get_rule_mask_ruleset(ruleset
));
3580 f
->dump_int("type", get_rule_mask_type(ruleset
));
3581 f
->dump_int("min_size", get_rule_mask_min_size(ruleset
));
3582 f
->dump_int("max_size", get_rule_mask_max_size(ruleset
));
3583 f
->open_array_section("steps");
3584 for (int j
=0; j
<get_rule_len(ruleset
); j
++) {
3585 f
->open_object_section("step");
3586 switch (get_rule_op(ruleset
, j
)) {
3587 case CRUSH_RULE_NOOP
:
3588 f
->dump_string("op", "noop");
3590 case CRUSH_RULE_TAKE
:
3591 f
->dump_string("op", "take");
3593 int item
= get_rule_arg1(ruleset
, j
);
3594 f
->dump_int("item", item
);
3596 const char *name
= get_item_name(item
);
3597 f
->dump_string("item_name", name
? name
: "");
3600 case CRUSH_RULE_EMIT
:
3601 f
->dump_string("op", "emit");
3603 case CRUSH_RULE_CHOOSE_FIRSTN
:
3604 f
->dump_string("op", "choose_firstn");
3605 f
->dump_int("num", get_rule_arg1(ruleset
, j
));
3606 f
->dump_string("type", get_type_name(get_rule_arg2(ruleset
, j
)));
3608 case CRUSH_RULE_CHOOSE_INDEP
:
3609 f
->dump_string("op", "choose_indep");
3610 f
->dump_int("num", get_rule_arg1(ruleset
, j
));
3611 f
->dump_string("type", get_type_name(get_rule_arg2(ruleset
, j
)));
3613 case CRUSH_RULE_CHOOSELEAF_FIRSTN
:
3614 f
->dump_string("op", "chooseleaf_firstn");
3615 f
->dump_int("num", get_rule_arg1(ruleset
, j
));
3616 f
->dump_string("type", get_type_name(get_rule_arg2(ruleset
, j
)));
3618 case CRUSH_RULE_CHOOSELEAF_INDEP
:
3619 f
->dump_string("op", "chooseleaf_indep");
3620 f
->dump_int("num", get_rule_arg1(ruleset
, j
));
3621 f
->dump_string("type", get_type_name(get_rule_arg2(ruleset
, j
)));
3623 case CRUSH_RULE_SET_CHOOSE_TRIES
:
3624 f
->dump_string("op", "set_choose_tries");
3625 f
->dump_int("num", get_rule_arg1(ruleset
, j
));
3627 case CRUSH_RULE_SET_CHOOSELEAF_TRIES
:
3628 f
->dump_string("op", "set_chooseleaf_tries");
3629 f
->dump_int("num", get_rule_arg1(ruleset
, j
));
3632 f
->dump_int("opcode", get_rule_op(ruleset
, j
));
3633 f
->dump_int("arg1", get_rule_arg1(ruleset
, j
));
3634 f
->dump_int("arg2", get_rule_arg2(ruleset
, j
));
3642 void CrushWrapper::list_rules(Formatter
*f
) const
3644 for (int rule
= 0; rule
< get_max_rules(); rule
++) {
3645 if (!rule_exists(rule
))
3647 f
->dump_string("name", get_rule_name(rule
));
3651 void CrushWrapper::list_rules(ostream
*ss
) const
3653 for (int rule
= 0; rule
< get_max_rules(); rule
++) {
3654 if (!rule_exists(rule
))
3656 *ss
<< get_rule_name(rule
) << "\n";
3660 class CrushTreePlainDumper
: public CrushTreeDumper::Dumper
<TextTable
> {
3662 typedef CrushTreeDumper::Dumper
<TextTable
> Parent
;
3664 explicit CrushTreePlainDumper(const CrushWrapper
*crush
,
3665 const CrushTreeDumper::name_map_t
& wsnames
)
3666 : Parent(crush
, wsnames
) {}
3667 explicit CrushTreePlainDumper(const CrushWrapper
*crush
,
3668 const CrushTreeDumper::name_map_t
& wsnames
,
3670 : Parent(crush
, wsnames
, show_shadow
) {}
3673 void dump(TextTable
*tbl
) {
3674 tbl
->define_column("ID", TextTable::LEFT
, TextTable::RIGHT
);
3675 tbl
->define_column("CLASS", TextTable::LEFT
, TextTable::RIGHT
);
3676 tbl
->define_column("WEIGHT", TextTable::LEFT
, TextTable::RIGHT
);
3677 for (auto& p
: crush
->choose_args
) {
3678 if (p
.first
== CrushWrapper::DEFAULT_CHOOSE_ARGS
) {
3679 tbl
->define_column("(compat)", TextTable::LEFT
, TextTable::RIGHT
);
3682 auto q
= weight_set_names
.find(p
.first
);
3683 name
= q
!= weight_set_names
.end() ? q
->second
:
3685 tbl
->define_column(name
.c_str(), TextTable::LEFT
, TextTable::RIGHT
);
3688 tbl
->define_column("TYPE NAME", TextTable::LEFT
, TextTable::LEFT
);
3693 void dump_item(const CrushTreeDumper::Item
&qi
, TextTable
*tbl
) override
{
3694 const char *c
= crush
->get_item_class(qi
.id
);
3699 << weightf_t(qi
.weight
);
3700 for (auto& p
: crush
->choose_args
) {
3701 if (qi
.parent
< 0) {
3702 const crush_choose_arg_map cmap
= crush
->choose_args_get(p
.first
);
3703 int bidx
= -1 - qi
.parent
;
3704 const crush_bucket
*b
= crush
->get_bucket(qi
.parent
);
3706 bidx
< (int)cmap
.size
&&
3707 cmap
.args
[bidx
].weight_set
&&
3708 cmap
.args
[bidx
].weight_set_positions
>= 1) {
3711 pos
< (int)cmap
.args
[bidx
].weight_set
[0].size
&&
3712 b
->items
[pos
] != qi
.id
;
3714 *tbl
<< weightf_t((float)cmap
.args
[bidx
].weight_set
[0].weights
[pos
] /
3722 for (int k
=0; k
< qi
.depth
; k
++) {
3725 if (qi
.is_bucket()) {
3726 ss
<< crush
->get_type_name(crush
->get_bucket_type(qi
.id
)) << " "
3727 << crush
->get_item_name(qi
.id
);
3729 ss
<< "osd." << qi
.id
;
3732 *tbl
<< TextTable::endrow
;
3737 class CrushTreeFormattingDumper
: public CrushTreeDumper::FormattingDumper
{
3739 typedef CrushTreeDumper::FormattingDumper Parent
;
3741 explicit CrushTreeFormattingDumper(
3742 const CrushWrapper
*crush
,
3743 const CrushTreeDumper::name_map_t
& wsnames
)
3744 : Parent(crush
, wsnames
) {}
3746 explicit CrushTreeFormattingDumper(
3747 const CrushWrapper
*crush
,
3748 const CrushTreeDumper::name_map_t
& wsnames
,
3750 : Parent(crush
, wsnames
, show_shadow
) {}
3752 void dump(Formatter
*f
) {
3753 f
->open_array_section("nodes");
3757 // There is no stray bucket whose id is a negative number, so just get
3758 // the max_id and iterate from 0 to max_id to dump stray osds.
3759 f
->open_array_section("stray");
3760 int32_t max_id
= -1;
3761 if (!crush
->name_map
.empty()) {
3762 max_id
= crush
->name_map
.rbegin()->first
;
3764 for (int32_t i
= 0; i
<= max_id
; i
++) {
3765 if (crush
->item_exists(i
) && !is_touched(i
) && should_dump(i
)) {
3766 dump_item(CrushTreeDumper::Item(i
, 0, 0, 0), f
);
3774 void CrushWrapper::dump_tree(
3777 const CrushTreeDumper::name_map_t
& weight_set_names
,
3778 bool show_shadow
) const
3782 CrushTreePlainDumper(this, weight_set_names
, show_shadow
).dump(&tbl
);
3786 CrushTreeFormattingDumper(this, weight_set_names
, show_shadow
).dump(f
);
3790 void CrushWrapper::generate_test_instances(list
<CrushWrapper
*>& o
)
3792 o
.push_back(new CrushWrapper
);
3797 * Determine the default CRUSH ruleset ID to be used with
3798 * newly created replicated pools.
3800 * @returns a ruleset ID (>=0) or -1 if no suitable ruleset found
3802 int CrushWrapper::get_osd_pool_default_crush_replicated_ruleset(CephContext
*cct
)
3804 int crush_ruleset
= cct
->_conf
.get_val
<int64_t>("osd_pool_default_crush_rule");
3805 if (crush_ruleset
< 0) {
3806 crush_ruleset
= find_first_ruleset(pg_pool_t::TYPE_REPLICATED
);
3807 } else if (!ruleset_exists(crush_ruleset
)) {
3808 crush_ruleset
= -1; // match find_first_ruleset() retval
3810 return crush_ruleset
;
3813 bool CrushWrapper::is_valid_crush_name(const string
& s
)
3817 for (string::const_iterator p
= s
.begin(); p
!= s
.end(); ++p
) {
3821 !(*p
>= '0' && *p
<= '9') &&
3822 !(*p
>= 'A' && *p
<= 'Z') &&
3823 !(*p
>= 'a' && *p
<= 'z'))
3829 bool CrushWrapper::is_valid_crush_loc(CephContext
*cct
,
3830 const map
<string
,string
>& loc
)
3832 for (map
<string
,string
>::const_iterator l
= loc
.begin(); l
!= loc
.end(); ++l
) {
3833 if (!is_valid_crush_name(l
->first
) ||
3834 !is_valid_crush_name(l
->second
)) {
3835 ldout(cct
, 1) << "loc["
3836 << l
->first
<< "] = '"
3837 << l
->second
<< "' not a valid crush name ([A-Za-z0-9_-.]+)"
3845 int CrushWrapper::_choose_type_stack(
3847 const vector
<pair
<int,int>>& stack
,
3848 const set
<int>& overfull
,
3849 const vector
<int>& underfull
,
3850 const vector
<int>& more_underfull
,
3851 const vector
<int>& orig
,
3852 vector
<int>::const_iterator
& i
,
3858 vector
<int> w
= *pw
;
3861 ldout(cct
, 10) << __func__
<< " stack " << stack
3866 ceph_assert(root_bucket
< 0);
3867 vector
<int> cumulative_fanout(stack
.size());
3869 for (int j
= (int)stack
.size() - 1; j
>= 0; --j
) {
3870 cumulative_fanout
[j
] = f
;
3871 f
*= stack
[j
].second
;
3873 ldout(cct
, 10) << __func__
<< " cumulative_fanout " << cumulative_fanout
3876 // identify underfull targets for each intermediate level.
3877 // this serves two purposes:
3878 // 1. we can tell when we are selecting a bucket that does not have any underfull
3879 // devices beneath it. that means that if the current input includes an overfull
3880 // device, we won't be able to find an underfull device with this parent to
3882 // 2. when we decide we should reject a bucket due to the above, this list gives us
3883 // a list of peers to consider that *do* have underfull devices available.. (we
3884 // are careful to pick one that has the same parent.)
3885 vector
<set
<int>> underfull_buckets
; // level -> set of buckets with >0 underfull item(s)
3886 underfull_buckets
.resize(stack
.size() - 1);
3887 for (auto osd
: underfull
) {
3889 for (int j
= (int)stack
.size() - 2; j
>= 0; --j
) {
3890 int type
= stack
[j
].first
;
3891 item
= get_parent_of_type(item
, type
, rule
);
3892 ldout(cct
, 10) << __func__
<< " underfull " << osd
<< " type " << type
3893 << " is " << item
<< dendl
;
3894 if (!subtree_contains(root_bucket
, item
)) {
3895 ldout(cct
, 20) << __func__
<< " not in root subtree " << root_bucket
<< dendl
;
3898 underfull_buckets
[j
].insert(item
);
3901 ldout(cct
, 20) << __func__
<< " underfull_buckets " << underfull_buckets
<< dendl
;
3903 for (unsigned j
= 0; j
< stack
.size(); ++j
) {
3904 int type
= stack
[j
].first
;
3905 int fanout
= stack
[j
].second
;
3906 int cum_fanout
= cumulative_fanout
[j
];
3907 ldout(cct
, 10) << " level " << j
<< ": type " << type
<< " fanout " << fanout
3908 << " cumulative " << cum_fanout
3909 << " w " << w
<< dendl
;
3912 if (i
== orig
.end()) {
3913 ldout(cct
, 10) << __func__
<< " end of orig, break 0" << dendl
;
3916 for (auto from
: w
) {
3917 ldout(cct
, 10) << " from " << from
<< dendl
;
3918 // identify leaves under each choice. we use this to check whether any of these
3919 // leaves are overfull. (if so, we need to make sure there are underfull candidates
3920 // to swap for them.)
3921 vector
<set
<int>> leaves
;
3922 leaves
.resize(fanout
);
3923 for (int pos
= 0; pos
< fanout
; ++pos
) {
3926 int item
= get_parent_of_type(*tmpi
, type
, rule
);
3929 while (n
-- && tmpi
!= orig
.end()) {
3930 leaves
[pos
].insert(*tmpi
++);
3932 ldout(cct
, 10) << __func__
<< " from " << *tmpi
<< " got " << item
3933 << " of type " << type
<< " over leaves " << leaves
[pos
] << dendl
;
3936 bool replaced
= false;
3937 if (overfull
.count(*i
)) {
3938 for (auto item
: underfull
) {
3939 ldout(cct
, 10) << __func__
<< " pos " << pos
3940 << " was " << *i
<< " considering " << item
3942 if (used
.count(item
)) {
3943 ldout(cct
, 20) << __func__
<< " in used " << used
<< dendl
;
3946 if (!subtree_contains(from
, item
)) {
3947 ldout(cct
, 20) << __func__
<< " not in subtree " << from
<< dendl
;
3950 if (std::find(orig
.begin(), orig
.end(), item
) != orig
.end()) {
3951 ldout(cct
, 20) << __func__
<< " in orig " << orig
<< dendl
;
3956 ldout(cct
, 10) << __func__
<< " pos " << pos
<< " replace "
3957 << *i
<< " -> " << item
<< dendl
;
3959 ceph_assert(i
!= orig
.end());
3964 for (auto item
: more_underfull
) {
3965 ldout(cct
, 10) << __func__
<< " more underfull pos " << pos
3966 << " was " << *i
<< " considering " << item
3968 if (used
.count(item
)) {
3969 ldout(cct
, 20) << __func__
<< " in used " << used
<< dendl
;
3972 if (!subtree_contains(from
, item
)) {
3973 ldout(cct
, 20) << __func__
<< " not in subtree " << from
<< dendl
;
3976 if (std::find(orig
.begin(), orig
.end(), item
) != orig
.end()) {
3977 ldout(cct
, 20) << __func__
<< " in orig " << orig
<< dendl
;
3982 ldout(cct
, 10) << __func__
<< " pos " << pos
<< " replace "
3983 << *i
<< " -> " << item
<< dendl
;
3985 assert(i
!= orig
.end());
3992 ldout(cct
, 10) << __func__
<< " pos " << pos
<< " keep " << *i
3994 ceph_assert(i
!= orig
.end());
3998 if (i
== orig
.end()) {
3999 ldout(cct
, 10) << __func__
<< " end of orig, break 1" << dendl
;
4004 if (j
+ 1 < stack
.size()) {
4005 // check if any buckets have overfull leaves but no underfull candidates
4006 for (int pos
= 0; pos
< fanout
; ++pos
) {
4007 if (underfull_buckets
[j
].count(o
[pos
]) == 0) {
4008 // are any leaves overfull?
4009 bool any_overfull
= false;
4010 for (auto osd
: leaves
[pos
]) {
4011 if (overfull
.count(osd
)) {
4012 any_overfull
= true;
4017 ldout(cct
, 10) << " bucket " << o
[pos
] << " has no underfull targets and "
4018 << ">0 leaves " << leaves
[pos
] << " is overfull; alts "
4019 << underfull_buckets
[j
]
4021 for (auto alt
: underfull_buckets
[j
]) {
4022 if (std::find(o
.begin(), o
.end(), alt
) == o
.end()) {
4023 // see if alt has the same parent
4025 get_parent_of_type(o
[pos
], stack
[j
-1].first
, rule
) ==
4026 get_parent_of_type(alt
, stack
[j
-1].first
, rule
)) {
4028 ldout(cct
, 10) << " replacing " << o
[pos
]
4029 << " (which has no underfull leaves) with " << alt
4031 << get_parent_of_type(alt
, stack
[j
-1].first
, rule
) << " type "
4032 << type
<< ")" << dendl
;
4034 ldout(cct
, 10) << " replacing " << o
[pos
]
4035 << " (which has no underfull leaves) with " << alt
4036 << " (first level)" << dendl
;
4040 ldout(cct
, 30) << " alt " << alt
<< " for " << o
[pos
]
4041 << " has different parent, skipping" << dendl
;
4049 if (i
== orig
.end()) {
4050 ldout(cct
, 10) << __func__
<< " end of orig, break 2" << dendl
;
4054 ldout(cct
, 10) << __func__
<< " w <- " << o
<< " was " << w
<< dendl
;
4061 int CrushWrapper::try_remap_rule(
4065 const set
<int>& overfull
,
4066 const vector
<int>& underfull
,
4067 const vector
<int>& more_underfull
,
4068 const vector
<int>& orig
,
4069 vector
<int> *out
) const
4071 const crush_map
*map
= crush
;
4072 const crush_rule
*rule
= get_rule(ruleno
);
4075 ldout(cct
, 10) << __func__
<< " ruleno " << ruleno
4076 << " numrep " << maxout
<< " overfull " << overfull
4077 << " underfull " << underfull
4078 << " more_underfull " << more_underfull
4081 vector
<int> w
; // working set
4084 auto i
= orig
.begin();
4087 vector
<pair
<int,int>> type_stack
; // (type, fan-out)
4088 int root_bucket
= 0;
4089 for (unsigned step
= 0; step
< rule
->len
; ++step
) {
4090 const crush_rule_step
*curstep
= &rule
->steps
[step
];
4091 ldout(cct
, 10) << __func__
<< " step " << step
<< " w " << w
<< dendl
;
4092 switch (curstep
->op
) {
4093 case CRUSH_RULE_TAKE
:
4094 if ((curstep
->arg1
>= 0 && curstep
->arg1
< map
->max_devices
) ||
4095 (-1-curstep
->arg1
>= 0 && -1-curstep
->arg1
< map
->max_buckets
&&
4096 map
->buckets
[-1-curstep
->arg1
])) {
4098 w
.push_back(curstep
->arg1
);
4099 root_bucket
= curstep
->arg1
;
4100 ldout(cct
, 10) << __func__
<< " take " << w
<< dendl
;
4102 ldout(cct
, 1) << " bad take value " << curstep
->arg1
<< dendl
;
4106 case CRUSH_RULE_CHOOSELEAF_FIRSTN
:
4107 case CRUSH_RULE_CHOOSELEAF_INDEP
:
4109 int numrep
= curstep
->arg1
;
4110 int type
= curstep
->arg2
;
4113 type_stack
.push_back(make_pair(type
, numrep
));
4115 type_stack
.push_back(make_pair(0, 1));
4116 int r
= _choose_type_stack(cct
, type_stack
, overfull
, underfull
, more_underfull
, orig
,
4117 i
, used
, &w
, root_bucket
, ruleno
);
4124 case CRUSH_RULE_CHOOSE_FIRSTN
:
4125 case CRUSH_RULE_CHOOSE_INDEP
:
4127 int numrep
= curstep
->arg1
;
4128 int type
= curstep
->arg2
;
4131 type_stack
.push_back(make_pair(type
, numrep
));
4135 case CRUSH_RULE_EMIT
:
4136 ldout(cct
, 10) << " emit " << w
<< dendl
;
4137 if (!type_stack
.empty()) {
4138 int r
= _choose_type_stack(cct
, type_stack
, overfull
, underfull
, more_underfull
, orig
,
4139 i
, used
, &w
, root_bucket
, ruleno
);
4144 for (auto item
: w
) {
4145 out
->push_back(item
);
4160 int CrushWrapper::_choose_args_adjust_item_weight_in_bucket(
4162 crush_choose_arg_map cmap
,
4165 const vector
<int>& weight
,
4169 int bidx
= -1 - bucketid
;
4170 crush_bucket
*b
= crush
->buckets
[bidx
];
4171 if (bidx
>= (int)cmap
.size
) {
4173 *ss
<< "no weight-set for bucket " << b
->id
;
4174 ldout(cct
, 10) << __func__
<< " no crush_choose_arg for bucket " << b
->id
4178 crush_choose_arg
*carg
= &cmap
.args
[bidx
];
4179 if (carg
->weight_set
== NULL
) {
4180 // create a weight-set for this bucket and populate it with the
4182 unsigned positions
= get_choose_args_positions(cmap
);
4183 carg
->weight_set_positions
= positions
;
4184 carg
->weight_set
= static_cast<crush_weight_set
*>(
4185 calloc(sizeof(crush_weight_set
), positions
));
4186 for (unsigned p
= 0; p
< positions
; ++p
) {
4187 carg
->weight_set
[p
].size
= b
->size
;
4188 carg
->weight_set
[p
].weights
= (__u32
*)calloc(b
->size
, sizeof(__u32
));
4189 for (unsigned i
= 0; i
< b
->size
; ++i
) {
4190 carg
->weight_set
[p
].weights
[i
] = crush_get_bucket_item_weight(b
, i
);
4195 if (carg
->weight_set_positions
!= weight
.size()) {
4197 *ss
<< "weight_set_positions != " << weight
.size() << " for bucket " << b
->id
;
4198 ldout(cct
, 10) << __func__
<< " weight_set_positions != " << weight
.size()
4199 << " for bucket " << b
->id
<< dendl
;
4202 for (unsigned i
= 0; i
< b
->size
; i
++) {
4203 if (b
->items
[i
] == id
) {
4204 for (unsigned j
= 0; j
< weight
.size(); ++j
) {
4205 carg
->weight_set
[j
].weights
[i
] = weight
[j
];
4207 ldout(cct
, 5) << __func__
<< " set " << id
<< " to " << weight
4208 << " in bucket " << b
->id
<< dendl
;
4213 vector
<int> bucket_weight(weight
.size(), 0);
4214 for (unsigned i
= 0; i
< b
->size
; i
++) {
4215 for (unsigned j
= 0; j
< weight
.size(); ++j
) {
4216 bucket_weight
[j
] += carg
->weight_set
[j
].weights
[i
];
4219 choose_args_adjust_item_weight(cct
, cmap
, b
->id
, bucket_weight
, nullptr);
4224 int CrushWrapper::choose_args_adjust_item_weight(
4226 crush_choose_arg_map cmap
,
4228 const vector
<int>& weight
,
4231 ldout(cct
, 5) << __func__
<< " " << id
<< " weight " << weight
<< dendl
;
4233 for (int bidx
= 0; bidx
< crush
->max_buckets
; bidx
++) {
4234 crush_bucket
*b
= crush
->buckets
[bidx
];
4238 changed
+= _choose_args_adjust_item_weight_in_bucket(
4239 cct
, cmap
, b
->id
, id
, weight
, ss
);
4243 *ss
<< "item " << id
<< " not found in crush map";