1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 #include "osd/osd_types.h"
5 #include "common/debug.h"
6 #include "common/Formatter.h"
7 #include "common/errno.h"
8 #include "common/TextTable.h"
9 #include "include/stringify.h"
11 #include "CrushWrapper.h"
12 #include "CrushTreeDumper.h"
14 #define dout_subsys ceph_subsys_crush
16 bool CrushWrapper::has_legacy_rule_ids() const
18 for (unsigned i
=0; i
<crush
->max_rules
; i
++) {
19 crush_rule
*r
= crush
->rules
[i
];
21 r
->mask
.ruleset
!= i
) {
28 std::map
<int, int> CrushWrapper::renumber_rules()
30 std::map
<int, int> result
;
31 for (unsigned i
=0; i
<crush
->max_rules
; i
++) {
32 crush_rule
*r
= crush
->rules
[i
];
33 if (r
&& r
->mask
.ruleset
!= i
) {
34 result
[r
->mask
.ruleset
] = i
;
41 bool CrushWrapper::has_non_straw2_buckets() const
43 for (int i
=0; i
<crush
->max_buckets
; ++i
) {
44 crush_bucket
*b
= crush
->buckets
[i
];
47 if (b
->alg
!= CRUSH_BUCKET_STRAW2
)
53 bool CrushWrapper::has_v2_rules() const
55 for (unsigned i
=0; i
<crush
->max_rules
; i
++) {
63 bool CrushWrapper::is_v2_rule(unsigned ruleid
) const
65 // check rule for use of indep or new SET_* rule steps
66 if (ruleid
>= crush
->max_rules
)
68 crush_rule
*r
= crush
->rules
[ruleid
];
71 for (unsigned j
=0; j
<r
->len
; j
++) {
72 if (r
->steps
[j
].op
== CRUSH_RULE_CHOOSE_INDEP
||
73 r
->steps
[j
].op
== CRUSH_RULE_CHOOSELEAF_INDEP
||
74 r
->steps
[j
].op
== CRUSH_RULE_SET_CHOOSE_TRIES
||
75 r
->steps
[j
].op
== CRUSH_RULE_SET_CHOOSELEAF_TRIES
) {
82 bool CrushWrapper::has_v3_rules() const
84 for (unsigned i
=0; i
<crush
->max_rules
; i
++) {
92 bool CrushWrapper::is_v3_rule(unsigned ruleid
) const
94 // check rule for use of SET_CHOOSELEAF_VARY_R step
95 if (ruleid
>= crush
->max_rules
)
97 crush_rule
*r
= crush
->rules
[ruleid
];
100 for (unsigned j
=0; j
<r
->len
; j
++) {
101 if (r
->steps
[j
].op
== CRUSH_RULE_SET_CHOOSELEAF_VARY_R
) {
108 bool CrushWrapper::has_v4_buckets() const
110 for (int i
=0; i
<crush
->max_buckets
; ++i
) {
111 crush_bucket
*b
= crush
->buckets
[i
];
114 if (b
->alg
== CRUSH_BUCKET_STRAW2
)
120 bool CrushWrapper::has_v5_rules() const
122 for (unsigned i
=0; i
<crush
->max_rules
; i
++) {
130 bool CrushWrapper::is_v5_rule(unsigned ruleid
) const
132 // check rule for use of SET_CHOOSELEAF_STABLE step
133 if (ruleid
>= crush
->max_rules
)
135 crush_rule
*r
= crush
->rules
[ruleid
];
138 for (unsigned j
=0; j
<r
->len
; j
++) {
139 if (r
->steps
[j
].op
== CRUSH_RULE_SET_CHOOSELEAF_STABLE
) {
146 bool CrushWrapper::has_choose_args() const
148 return !choose_args
.empty();
151 bool CrushWrapper::has_incompat_choose_args() const
153 if (choose_args
.empty())
155 if (choose_args
.size() > 1)
157 if (choose_args
.begin()->first
!= DEFAULT_CHOOSE_ARGS
)
159 crush_choose_arg_map arg_map
= choose_args
.begin()->second
;
160 for (__u32 i
= 0; i
< arg_map
.size
; i
++) {
161 crush_choose_arg
*arg
= &arg_map
.args
[i
];
162 if (arg
->weight_set_size
== 0 &&
165 if (arg
->weight_set_size
!= 1)
167 if (arg
->ids_size
!= 0)
173 int CrushWrapper::split_id_class(int i
, int *idout
, int *classout
) const
177 string name
= get_item_name(i
);
178 size_t pos
= name
.find("~");
179 if (pos
== string::npos
) {
184 string name_no_class
= name
.substr(0, pos
);
185 if (!name_exists(name_no_class
))
187 string class_name
= name
.substr(pos
+ 1);
188 if (!class_exists(class_name
))
190 *idout
= get_item_id(name_no_class
);
191 *classout
= get_class_id(class_name
);
195 int CrushWrapper::can_rename_item(const string
& srcname
,
196 const string
& dstname
,
199 if (name_exists(srcname
)) {
200 if (name_exists(dstname
)) {
201 *ss
<< "dstname = '" << dstname
<< "' already exists";
204 if (is_valid_crush_name(dstname
)) {
207 *ss
<< "dstname = '" << dstname
<< "' does not match [-_.0-9a-zA-Z]+";
211 if (name_exists(dstname
)) {
212 *ss
<< "srcname = '" << srcname
<< "' does not exist "
213 << "and dstname = '" << dstname
<< "' already exists";
216 *ss
<< "srcname = '" << srcname
<< "' does not exist";
222 int CrushWrapper::rename_item(const string
& srcname
,
223 const string
& dstname
,
226 int ret
= can_rename_item(srcname
, dstname
, ss
);
229 int oldid
= get_item_id(srcname
);
230 return set_item_name(oldid
, dstname
);
233 int CrushWrapper::can_rename_bucket(const string
& srcname
,
234 const string
& dstname
,
237 int ret
= can_rename_item(srcname
, dstname
, ss
);
240 int srcid
= get_item_id(srcname
);
242 *ss
<< "srcname = '" << srcname
<< "' is not a bucket "
243 << "because its id = " << srcid
<< " is >= 0";
249 int CrushWrapper::rename_bucket(const string
& srcname
,
250 const string
& dstname
,
253 int ret
= can_rename_bucket(srcname
, dstname
, ss
);
256 int oldid
= get_item_id(srcname
);
257 return set_item_name(oldid
, dstname
);
260 int CrushWrapper::rename_rule(const string
& srcname
,
261 const string
& dstname
,
264 if (!rule_exists(srcname
)) {
266 *ss
<< "source rule name '" << srcname
<< "' does not exist";
270 if (rule_exists(dstname
)) {
272 *ss
<< "destination rule name '" << dstname
<< "' already exists";
276 int rule_id
= get_rule_id(srcname
);
277 auto it
= rule_name_map
.find(rule_id
);
278 assert(it
!= rule_name_map
.end());
279 it
->second
= dstname
;
281 rule_name_rmap
.erase(srcname
);
282 rule_name_rmap
[dstname
] = rule_id
;
287 void CrushWrapper::find_takes(set
<int> *roots
) const
289 for (unsigned i
=0; i
<crush
->max_rules
; i
++) {
290 crush_rule
*r
= crush
->rules
[i
];
293 for (unsigned j
=0; j
<r
->len
; j
++) {
294 if (r
->steps
[j
].op
== CRUSH_RULE_TAKE
)
295 roots
->insert(r
->steps
[j
].arg1
);
300 void CrushWrapper::find_roots(set
<int> *roots
) const
302 for (int i
= 0; i
< crush
->max_buckets
; i
++) {
303 if (!crush
->buckets
[i
])
305 crush_bucket
*b
= crush
->buckets
[i
];
306 if (!_search_item_exists(b
->id
))
307 roots
->insert(b
->id
);
311 bool CrushWrapper::subtree_contains(int root
, int item
) const
317 return false; // root is a leaf
319 const crush_bucket
*b
= get_bucket(root
);
323 for (unsigned j
=0; j
<b
->size
; j
++) {
324 if (subtree_contains(b
->items
[j
], item
))
330 bool CrushWrapper::_maybe_remove_last_instance(CephContext
*cct
, int item
, bool unlink_only
)
333 if (_search_item_exists(item
)) {
336 if (item
< 0 && _bucket_is_in_use(item
)) {
340 if (item
< 0 && !unlink_only
) {
341 crush_bucket
*t
= get_bucket(item
);
342 ldout(cct
, 5) << "_maybe_remove_last_instance removing bucket " << item
<< dendl
;
343 crush_remove_bucket(crush
, t
);
344 if (class_bucket
.count(item
) != 0)
345 class_bucket
.erase(item
);
346 class_remove_item(item
);
348 if ((item
>= 0 || !unlink_only
) && name_map
.count(item
)) {
349 ldout(cct
, 5) << "_maybe_remove_last_instance removing name for item " << item
<< dendl
;
350 name_map
.erase(item
);
352 if (item
>= 0 && !unlink_only
) {
353 class_remove_item(item
);
356 rebuild_roots_with_classes();
360 int CrushWrapper::remove_root(int item
)
362 crush_bucket
*b
= get_bucket(item
);
364 // should be idempotent
365 // e.g.: we use 'crush link' to link same host into
366 // different roots, which as a result can cause different
367 // shadow trees reference same hosts too. This means
368 // we may need to destory the same buckets(hosts, racks, etc.)
369 // multiple times during rebuilding all shadow trees.
373 for (unsigned n
= 0; n
< b
->size
; n
++) {
374 if (b
->items
[n
] >= 0)
376 int r
= remove_root(b
->items
[n
]);
381 crush_remove_bucket(crush
, b
);
382 if (name_map
.count(item
) != 0) {
383 name_map
.erase(item
);
386 if (class_bucket
.count(item
) != 0)
387 class_bucket
.erase(item
);
388 class_remove_item(item
);
392 int CrushWrapper::remove_item(CephContext
*cct
, int item
, bool unlink_only
)
394 ldout(cct
, 5) << "remove_item " << item
395 << (unlink_only
? " unlink_only":"") << dendl
;
399 if (item
< 0 && !unlink_only
) {
400 crush_bucket
*t
= get_bucket(item
);
402 ldout(cct
, 1) << "remove_item bucket " << item
<< " does not exist"
408 ldout(cct
, 1) << "remove_item bucket " << item
<< " has " << t
->size
409 << " items, not empty" << dendl
;
412 if (_bucket_is_in_use(item
)) {
417 for (int i
= 0; i
< crush
->max_buckets
; i
++) {
418 if (!crush
->buckets
[i
])
420 crush_bucket
*b
= crush
->buckets
[i
];
422 for (unsigned i
=0; i
<b
->size
; ++i
) {
423 int id
= b
->items
[i
];
425 ldout(cct
, 5) << "remove_item removing item " << item
426 << " from bucket " << b
->id
<< dendl
;
427 for (auto& p
: choose_args
) {
428 // weight down each weight-set to 0 before we remove the item
429 vector
<int> weightv(get_choose_args_positions(p
.second
), 0);
430 choose_args_adjust_item_weight(cct
, p
.second
, item
, weightv
, nullptr);
432 bucket_remove_item(b
, item
);
433 adjust_item_weight(cct
, b
->id
, b
->weight
);
439 if (_maybe_remove_last_instance(cct
, item
, unlink_only
))
445 bool CrushWrapper::_search_item_exists(int item
) const
447 for (int i
= 0; i
< crush
->max_buckets
; i
++) {
448 if (!crush
->buckets
[i
])
450 crush_bucket
*b
= crush
->buckets
[i
];
451 for (unsigned j
=0; j
<b
->size
; ++j
) {
452 if (b
->items
[j
] == item
)
459 bool CrushWrapper::_bucket_is_in_use(int item
)
461 for (auto &i
: class_bucket
)
462 for (auto &j
: i
.second
)
463 if (j
.second
== item
)
465 for (unsigned i
= 0; i
< crush
->max_rules
; ++i
) {
466 crush_rule
*r
= crush
->rules
[i
];
469 for (unsigned j
= 0; j
< r
->len
; ++j
) {
470 if (r
->steps
[j
].op
== CRUSH_RULE_TAKE
) {
471 int step_item
= r
->steps
[j
].arg1
;
474 int res
= split_id_class(step_item
, &original_item
, &c
);
477 if (step_item
== item
|| original_item
== item
)
485 int CrushWrapper::_remove_item_under(
486 CephContext
*cct
, int item
, int ancestor
, bool unlink_only
)
488 ldout(cct
, 5) << "_remove_item_under " << item
<< " under " << ancestor
489 << (unlink_only
? " unlink_only":"") << dendl
;
495 if (!bucket_exists(ancestor
))
500 crush_bucket
*b
= get_bucket(ancestor
);
501 for (unsigned i
=0; i
<b
->size
; ++i
) {
502 int id
= b
->items
[i
];
504 ldout(cct
, 5) << "_remove_item_under removing item " << item
505 << " from bucket " << b
->id
<< dendl
;
506 for (auto& p
: choose_args
) {
507 // weight down each weight-set to 0 before we remove the item
508 vector
<int> weightv(get_choose_args_positions(p
.second
), 0);
509 _choose_args_adjust_item_weight_in_bucket(
510 cct
, p
.second
, b
->id
, item
, weightv
, nullptr);
512 bucket_remove_item(b
, item
);
513 adjust_item_weight(cct
, b
->id
, b
->weight
);
516 int r
= remove_item_under(cct
, item
, id
, unlink_only
);
524 int CrushWrapper::remove_item_under(
525 CephContext
*cct
, int item
, int ancestor
, bool unlink_only
)
527 ldout(cct
, 5) << "remove_item_under " << item
<< " under " << ancestor
528 << (unlink_only
? " unlink_only":"") << dendl
;
530 if (!unlink_only
&& _bucket_is_in_use(item
)) {
534 int ret
= _remove_item_under(cct
, item
, ancestor
, unlink_only
);
538 if (item
< 0 && !unlink_only
) {
539 crush_bucket
*t
= get_bucket(item
);
541 ldout(cct
, 1) << "remove_item_under bucket " << item
542 << " does not exist" << dendl
;
547 ldout(cct
, 1) << "remove_item_under bucket " << item
<< " has " << t
->size
548 << " items, not empty" << dendl
;
553 if (_maybe_remove_last_instance(cct
, item
, unlink_only
))
559 int CrushWrapper::get_common_ancestor_distance(CephContext
*cct
, int id
,
560 const std::multimap
<string
,string
>& loc
)
562 ldout(cct
, 5) << __func__
<< " " << id
<< " " << loc
<< dendl
;
563 if (!item_exists(id
))
565 map
<string
,string
> id_loc
= get_full_location(id
);
566 ldout(cct
, 20) << " id is at " << id_loc
<< dendl
;
568 for (map
<int,string
>::const_iterator p
= type_map
.begin();
571 map
<string
,string
>::iterator ip
= id_loc
.find(p
->second
);
572 if (ip
== id_loc
.end())
574 for (std::multimap
<string
,string
>::const_iterator q
= loc
.find(p
->second
);
577 if (q
->first
!= p
->second
)
579 if (q
->second
== ip
->second
)
586 int CrushWrapper::parse_loc_map(const std::vector
<string
>& args
,
587 std::map
<string
,string
> *ploc
)
590 for (unsigned i
= 0; i
< args
.size(); ++i
) {
591 const char *s
= args
[i
].c_str();
592 const char *pos
= strchr(s
, '=');
595 string
key(s
, 0, pos
-s
);
598 (*ploc
)[key
] = value
;
605 int CrushWrapper::parse_loc_multimap(const std::vector
<string
>& args
,
606 std::multimap
<string
,string
> *ploc
)
609 for (unsigned i
= 0; i
< args
.size(); ++i
) {
610 const char *s
= args
[i
].c_str();
611 const char *pos
= strchr(s
, '=');
614 string
key(s
, 0, pos
-s
);
617 ploc
->insert(make_pair(key
, value
));
624 bool CrushWrapper::check_item_loc(CephContext
*cct
, int item
, const map
<string
,string
>& loc
,
627 ldout(cct
, 5) << "check_item_loc item " << item
<< " loc " << loc
<< dendl
;
629 for (map
<int,string
>::const_iterator p
= type_map
.begin(); p
!= type_map
.end(); ++p
) {
634 // ignore types that aren't specified in loc
635 map
<string
,string
>::const_iterator q
= loc
.find(p
->second
);
636 if (q
== loc
.end()) {
637 ldout(cct
, 2) << "warning: did not specify location for '" << p
->second
<< "' level (levels are "
638 << type_map
<< ")" << dendl
;
642 if (!name_exists(q
->second
)) {
643 ldout(cct
, 5) << "check_item_loc bucket " << q
->second
<< " dne" << dendl
;
647 int id
= get_item_id(q
->second
);
649 ldout(cct
, 5) << "check_item_loc requested " << q
->second
<< " for type " << p
->second
650 << " is a device, not bucket" << dendl
;
654 assert(bucket_exists(id
));
655 crush_bucket
*b
= get_bucket(id
);
657 // see if item exists in this bucket
658 for (unsigned j
=0; j
<b
->size
; j
++) {
659 if (b
->items
[j
] == item
) {
660 ldout(cct
, 2) << "check_item_loc " << item
<< " exists in bucket " << b
->id
<< dendl
;
662 *weight
= crush_get_bucket_item_weight(b
, j
);
669 ldout(cct
, 2) << __func__
<< " item " << item
<< " loc " << loc
<< dendl
;
673 map
<string
, string
> CrushWrapper::get_full_location(int id
)
675 vector
<pair
<string
, string
> > full_location_ordered
;
676 map
<string
,string
> full_location
;
678 get_full_location_ordered(id
, full_location_ordered
);
680 std::copy(full_location_ordered
.begin(),
681 full_location_ordered
.end(),
682 std::inserter(full_location
, full_location
.begin()));
684 return full_location
;
687 int CrushWrapper::get_full_location_ordered(int id
, vector
<pair
<string
, string
> >& path
)
689 if (!item_exists(id
))
694 pair
<string
, string
> parent_coord
= get_immediate_parent(cur
, &ret
);
697 path
.push_back(parent_coord
);
698 cur
= get_item_id(parent_coord
.second
);
703 string
CrushWrapper::get_full_location_ordered_string(int id
)
705 vector
<pair
<string
, string
> > full_location_ordered
;
706 string full_location
;
707 get_full_location_ordered(id
, full_location_ordered
);
708 reverse(begin(full_location_ordered
), end(full_location_ordered
));
709 for(auto i
= full_location_ordered
.begin(); i
!= full_location_ordered
.end(); i
++) {
710 full_location
= full_location
+ i
->first
+ "=" + i
->second
;
711 if (i
!= full_location_ordered
.end() - 1) {
712 full_location
= full_location
+ ",";
715 return full_location
;
718 map
<int, string
> CrushWrapper::get_parent_hierarchy(int id
)
720 map
<int,string
> parent_hierarchy
;
721 pair
<string
, string
> parent_coord
= get_immediate_parent(id
);
724 // get the integer type for id and create a counter from there
725 int type_counter
= get_bucket_type(id
);
727 // if we get a negative type then we can assume that we have an OSD
728 // change behavior in get_item_type FIXME
729 if (type_counter
< 0)
732 // read the type map and get the name of the type with the largest ID
734 for (map
<int, string
>::iterator it
= type_map
.begin(); it
!= type_map
.end(); ++it
){
735 if ( (*it
).first
> high_type
)
736 high_type
= (*it
).first
;
739 parent_id
= get_item_id(parent_coord
.second
);
741 while (type_counter
< high_type
) {
743 parent_hierarchy
[ type_counter
] = parent_coord
.first
;
745 if (type_counter
< high_type
){
746 // get the coordinate information for the next parent
747 parent_coord
= get_immediate_parent(parent_id
);
748 parent_id
= get_item_id(parent_coord
.second
);
752 return parent_hierarchy
;
755 int CrushWrapper::get_children(int id
, list
<int> *children
)
762 crush_bucket
*b
= get_bucket(id
);
767 for (unsigned n
=0; n
<b
->size
; n
++) {
768 children
->push_back(b
->items
[n
]);
773 int CrushWrapper::get_rule_failure_domain(int rule_id
)
775 crush_rule
*rule
= get_rule(rule_id
);
779 int type
= 0; // default to osd-level
780 for (unsigned s
= 0; s
< rule
->len
; ++s
) {
781 if ((rule
->steps
[s
].op
== CRUSH_RULE_CHOOSE_FIRSTN
||
782 rule
->steps
[s
].op
== CRUSH_RULE_CHOOSE_INDEP
||
783 rule
->steps
[s
].op
== CRUSH_RULE_CHOOSELEAF_FIRSTN
||
784 rule
->steps
[s
].op
== CRUSH_RULE_CHOOSELEAF_INDEP
) &&
785 rule
->steps
[s
].arg2
> type
) {
786 type
= rule
->steps
[s
].arg2
;
792 int CrushWrapper::_get_leaves(int id
, list
<int> *leaves
)
798 leaves
->push_back(id
);
802 crush_bucket
*b
= get_bucket(id
);
807 for (unsigned n
= 0; n
< b
->size
; n
++) {
808 if (b
->items
[n
] >= 0) {
809 leaves
->push_back(b
->items
[n
]);
811 // is a bucket, do recursive call
812 int r
= _get_leaves(b
->items
[n
], leaves
);
819 return 0; // all is well
822 int CrushWrapper::get_leaves(const string
&name
, set
<int> *leaves
)
827 if (!name_exists(name
)) {
831 int id
= get_item_id(name
);
839 int r
= _get_leaves(id
, &unordered
);
844 for (auto &p
: unordered
) {
851 int CrushWrapper::insert_item(
852 CephContext
*cct
, int item
, float weight
, string name
,
853 const map
<string
,string
>& loc
) // typename -> bucketname
855 ldout(cct
, 5) << "insert_item item " << item
<< " weight " << weight
856 << " name " << name
<< " loc " << loc
<< dendl
;
858 if (!is_valid_crush_name(name
))
861 if (!is_valid_crush_loc(cct
, loc
))
864 int r
= validate_weightf(weight
);
869 if (name_exists(name
)) {
870 if (get_item_id(name
) != item
) {
871 ldout(cct
, 10) << "device name '" << name
<< "' already exists as id "
872 << get_item_id(name
) << dendl
;
876 set_item_name(item
, name
);
881 // create locations if locations don't exist and add child in
882 // location with 0 weight the more detail in the insert_item method
883 // declaration in CrushWrapper.h
884 for (auto p
= type_map
.begin(); p
!= type_map
.end(); ++p
) {
885 // ignore device type
889 // skip types that are unspecified
890 map
<string
,string
>::const_iterator q
= loc
.find(p
->second
);
891 if (q
== loc
.end()) {
892 ldout(cct
, 2) << "warning: did not specify location for '"
893 << p
->second
<< "' level (levels are "
894 << type_map
<< ")" << dendl
;
898 if (!name_exists(q
->second
)) {
899 ldout(cct
, 5) << "insert_item creating bucket " << q
->second
<< dendl
;
900 int empty
= 0, newid
;
901 int r
= add_bucket(0, 0,
902 CRUSH_HASH_DEFAULT
, p
->first
, 1, &cur
, &empty
, &newid
);
904 ldout(cct
, 1) << "add_bucket failure error: " << cpp_strerror(r
)
908 set_item_name(newid
, q
->second
);
914 // add to an existing bucket
915 int id
= get_item_id(q
->second
);
916 if (!bucket_exists(id
)) {
917 ldout(cct
, 1) << "insert_item doesn't have bucket " << id
<< dendl
;
921 // check that we aren't creating a cycle.
922 if (subtree_contains(id
, cur
)) {
923 ldout(cct
, 1) << "insert_item item " << cur
<< " already exists beneath "
928 // we have done sanity check above
929 crush_bucket
*b
= get_bucket(id
);
931 if (p
->first
!= b
->type
) {
932 ldout(cct
, 1) << "insert_item existing bucket has type "
933 << "'" << type_map
[b
->type
] << "' != "
934 << "'" << type_map
[p
->first
] << "'" << dendl
;
938 // are we forming a loop?
939 if (subtree_contains(cur
, b
->id
)) {
940 ldout(cct
, 1) << "insert_item " << cur
<< " already contains " << b
->id
941 << "; cannot form loop" << dendl
;
945 ldout(cct
, 5) << "insert_item adding " << cur
<< " weight " << weight
946 << " to bucket " << id
<< dendl
;
947 int r
= bucket_add_item(b
, cur
, 0);
952 // adjust the item's weight in location
953 if (adjust_item_weightf_in_loc(cct
, item
, weight
, loc
) > 0) {
954 if (item
>= crush
->max_devices
) {
955 crush
->max_devices
= item
+ 1;
956 ldout(cct
, 5) << "insert_item max_devices now " << crush
->max_devices
959 r
= rebuild_roots_with_classes();
961 ldout(cct
, 0) << __func__
<< " unable to rebuild roots with classes: "
962 << cpp_strerror(r
) << dendl
;
968 ldout(cct
, 1) << "error: didn't find anywhere to add item " << item
969 << " in " << loc
<< dendl
;
974 int CrushWrapper::move_bucket(
975 CephContext
*cct
, int id
, const map
<string
,string
>& loc
)
977 // sorry this only works for buckets
981 if (!item_exists(id
))
984 // get the name of the bucket we are trying to move for later
985 string id_name
= get_item_name(id
);
988 int bucket_weight
= detach_bucket(cct
, id
);
990 // insert the bucket back into the hierarchy
991 return insert_item(cct
, id
, bucket_weight
/ (float)0x10000, id_name
, loc
);
994 int CrushWrapper::detach_bucket(CephContext
*cct
, int item
)
1002 // check that the bucket that we want to detach exists
1003 assert(bucket_exists(item
));
1005 // get the bucket's weight
1006 crush_bucket
*b
= get_bucket(item
);
1007 unsigned bucket_weight
= b
->weight
;
1009 // get where the bucket is located
1010 pair
<string
, string
> bucket_location
= get_immediate_parent(item
);
1012 // get the id of the parent bucket
1013 int parent_id
= get_item_id(bucket_location
.second
);
1015 // get the parent bucket
1016 crush_bucket
*parent_bucket
= get_bucket(parent_id
);
1018 if (!IS_ERR(parent_bucket
)) {
1019 // zero out the bucket weight
1020 bucket_adjust_item_weight(cct
, parent_bucket
, item
, 0);
1021 adjust_item_weight(cct
, parent_bucket
->id
, parent_bucket
->weight
);
1022 for (auto& p
: choose_args
) {
1023 // weight down each weight-set to 0 before we remove the item
1024 vector
<int> weightv(get_choose_args_positions(p
.second
), 0);
1025 choose_args_adjust_item_weight(cct
, p
.second
, item
, weightv
, nullptr);
1028 // remove the bucket from the parent
1029 bucket_remove_item(parent_bucket
, item
);
1030 } else if (PTR_ERR(parent_bucket
) != -ENOENT
) {
1031 return PTR_ERR(parent_bucket
);
1034 // check that we're happy
1035 int test_weight
= 0;
1036 map
<string
,string
> test_location
;
1037 test_location
[ bucket_location
.first
] = (bucket_location
.second
);
1039 bool successful_detach
= !(check_item_loc(cct
, item
, test_location
,
1041 assert(successful_detach
);
1042 assert(test_weight
== 0);
1044 return bucket_weight
;
1047 int CrushWrapper::swap_bucket(CephContext
*cct
, int src
, int dst
)
1049 if (src
>= 0 || dst
>= 0)
1051 if (!item_exists(src
) || !item_exists(dst
))
1053 crush_bucket
*a
= get_bucket(src
);
1054 crush_bucket
*b
= get_bucket(dst
);
1055 unsigned aw
= a
->weight
;
1056 unsigned bw
= b
->weight
;
1059 adjust_item_weight(cct
, a
->id
, bw
);
1060 adjust_item_weight(cct
, b
->id
, aw
);
1063 map
<int,unsigned> tmp
;
1064 unsigned as
= a
->size
;
1065 unsigned bs
= b
->size
;
1066 for (unsigned i
= 0; i
< as
; ++i
) {
1067 int item
= a
->items
[0];
1068 int itemw
= crush_get_bucket_item_weight(a
, 0);
1070 bucket_remove_item(a
, item
);
1072 assert(a
->size
== 0);
1073 assert(b
->size
== bs
);
1074 for (unsigned i
= 0; i
< bs
; ++i
) {
1075 int item
= b
->items
[0];
1076 int itemw
= crush_get_bucket_item_weight(b
, 0);
1077 bucket_remove_item(b
, item
);
1078 bucket_add_item(a
, item
, itemw
);
1080 assert(a
->size
== bs
);
1081 assert(b
->size
== 0);
1082 for (auto t
: tmp
) {
1083 bucket_add_item(b
, t
.first
, t
.second
);
1085 assert(a
->size
== bs
);
1086 assert(b
->size
== as
);
1089 swap_names(src
, dst
);
1090 return rebuild_roots_with_classes();
1093 int CrushWrapper::link_bucket(
1094 CephContext
*cct
, int id
, const map
<string
,string
>& loc
)
1096 // sorry this only works for buckets
1100 if (!item_exists(id
))
1103 // get the name of the bucket we are trying to move for later
1104 string id_name
= get_item_name(id
);
1106 crush_bucket
*b
= get_bucket(id
);
1107 unsigned bucket_weight
= b
->weight
;
1109 return insert_item(cct
, id
, bucket_weight
/ (float)0x10000, id_name
, loc
);
1112 int CrushWrapper::create_or_move_item(
1113 CephContext
*cct
, int item
, float weight
, string name
,
1114 const map
<string
,string
>& loc
) // typename -> bucketname
1119 if (!is_valid_crush_name(name
))
1122 if (check_item_loc(cct
, item
, loc
, &old_iweight
)) {
1123 ldout(cct
, 5) << "create_or_move_item " << item
<< " already at " << loc
1126 if (_search_item_exists(item
)) {
1127 weight
= get_item_weightf(item
);
1128 ldout(cct
, 10) << "create_or_move_item " << item
1129 << " exists with weight " << weight
<< dendl
;
1130 remove_item(cct
, item
, true);
1132 ldout(cct
, 5) << "create_or_move_item adding " << item
1133 << " weight " << weight
1134 << " at " << loc
<< dendl
;
1135 ret
= insert_item(cct
, item
, weight
, name
, loc
);
1142 int CrushWrapper::update_item(
1143 CephContext
*cct
, int item
, float weight
, string name
,
1144 const map
<string
,string
>& loc
) // typename -> bucketname
1146 ldout(cct
, 5) << "update_item item " << item
<< " weight " << weight
1147 << " name " << name
<< " loc " << loc
<< dendl
;
1150 if (!is_valid_crush_name(name
))
1153 if (!is_valid_crush_loc(cct
, loc
))
1156 ret
= validate_weightf(weight
);
1161 // compare quantized (fixed-point integer) weights!
1162 int iweight
= (int)(weight
* (float)0x10000);
1164 if (check_item_loc(cct
, item
, loc
, &old_iweight
)) {
1165 ldout(cct
, 5) << "update_item " << item
<< " already at " << loc
<< dendl
;
1166 if (old_iweight
!= iweight
) {
1167 ldout(cct
, 5) << "update_item " << item
<< " adjusting weight "
1168 << ((float)old_iweight
/(float)0x10000) << " -> " << weight
1170 adjust_item_weight_in_loc(cct
, item
, iweight
, loc
);
1173 if (get_item_name(item
) != name
) {
1174 ldout(cct
, 5) << "update_item setting " << item
<< " name to " << name
1176 set_item_name(item
, name
);
1180 if (item_exists(item
)) {
1181 remove_item(cct
, item
, true);
1183 ldout(cct
, 5) << "update_item adding " << item
<< " weight " << weight
1184 << " at " << loc
<< dendl
;
1185 ret
= insert_item(cct
, item
, weight
, name
, loc
);
1192 int CrushWrapper::get_item_weight(int id
) const
1194 for (int bidx
= 0; bidx
< crush
->max_buckets
; bidx
++) {
1195 crush_bucket
*b
= crush
->buckets
[bidx
];
1200 for (unsigned i
= 0; i
< b
->size
; i
++)
1201 if (b
->items
[i
] == id
)
1202 return crush_get_bucket_item_weight(b
, i
);
1207 int CrushWrapper::get_item_weight_in_loc(int id
, const map
<string
,string
> &loc
)
1209 for (map
<string
,string
>::const_iterator l
= loc
.begin(); l
!= loc
.end(); ++l
) {
1211 int bid
= get_item_id(l
->second
);
1212 if (!bucket_exists(bid
))
1214 crush_bucket
*b
= get_bucket(bid
);
1215 for (unsigned int i
= 0; i
< b
->size
; i
++) {
1216 if (b
->items
[i
] == id
) {
1217 return crush_get_bucket_item_weight(b
, i
);
1224 int CrushWrapper::adjust_item_weight(CephContext
*cct
, int id
, int weight
)
1226 ldout(cct
, 5) << "adjust_item_weight " << id
<< " weight " << weight
<< dendl
;
1228 for (int bidx
= 0; bidx
< crush
->max_buckets
; bidx
++) {
1229 crush_bucket
*b
= crush
->buckets
[bidx
];
1232 for (unsigned i
= 0; i
< b
->size
; i
++) {
1233 if (b
->items
[i
] == id
) {
1234 int diff
= bucket_adjust_item_weight(cct
, b
, id
, weight
);
1235 ldout(cct
, 5) << "adjust_item_weight " << id
<< " diff " << diff
1236 << " in bucket " << bidx
<< dendl
;
1237 adjust_item_weight(cct
, -1 - bidx
, b
->weight
);
1247 int CrushWrapper::adjust_item_weight_in_loc(CephContext
*cct
, int id
, int weight
, const map
<string
,string
>& loc
)
1249 ldout(cct
, 5) << "adjust_item_weight_in_loc " << id
<< " weight " << weight
1250 << " in " << loc
<< dendl
;
1253 for (auto l
= loc
.begin(); l
!= loc
.end(); ++l
) {
1254 int bid
= get_item_id(l
->second
);
1255 if (!bucket_exists(bid
))
1257 crush_bucket
*b
= get_bucket(bid
);
1258 for (unsigned int i
= 0; i
< b
->size
; i
++) {
1259 if (b
->items
[i
] == id
) {
1260 int diff
= bucket_adjust_item_weight(cct
, b
, id
, weight
);
1261 ldout(cct
, 5) << "adjust_item_weight_in_loc " << id
<< " diff " << diff
1262 << " in bucket " << bid
<< dendl
;
1263 adjust_item_weight(cct
, bid
, b
->weight
);
1273 int CrushWrapper::adjust_subtree_weight(CephContext
*cct
, int id
, int weight
)
1275 ldout(cct
, 5) << __func__
<< " " << id
<< " weight " << weight
<< dendl
;
1276 crush_bucket
*b
= get_bucket(id
);
1280 list
<crush_bucket
*> q
;
1282 while (!q
.empty()) {
1285 int local_changed
= 0;
1286 for (unsigned i
=0; i
<b
->size
; ++i
) {
1287 int n
= b
->items
[i
];
1289 bucket_adjust_item_weight(cct
, b
, n
, weight
);
1293 crush_bucket
*sub
= get_bucket(n
);
1299 if (local_changed
) {
1300 adjust_item_weight(cct
, b
->id
, b
->weight
);
1306 bool CrushWrapper::check_item_present(int id
) const
1310 for (int bidx
= 0; bidx
< crush
->max_buckets
; bidx
++) {
1311 crush_bucket
*b
= crush
->buckets
[bidx
];
1314 for (unsigned i
= 0; i
< b
->size
; i
++)
1315 if (b
->items
[i
] == id
)
1322 pair
<string
,string
> CrushWrapper::get_immediate_parent(int id
, int *_ret
)
1325 for (int bidx
= 0; bidx
< crush
->max_buckets
; bidx
++) {
1326 crush_bucket
*b
= crush
->buckets
[bidx
];
1329 if (is_shadow_item(b
->id
))
1331 for (unsigned i
= 0; i
< b
->size
; i
++)
1332 if (b
->items
[i
] == id
) {
1333 string parent_id
= name_map
[b
->id
];
1334 string parent_bucket_type
= type_map
[b
->type
];
1337 return make_pair(parent_bucket_type
, parent_id
);
1344 return pair
<string
, string
>();
1347 int CrushWrapper::get_immediate_parent_id(int id
, int *parent
) const
1349 for (int bidx
= 0; bidx
< crush
->max_buckets
; bidx
++) {
1350 crush_bucket
*b
= crush
->buckets
[bidx
];
1353 if (is_shadow_item(b
->id
))
1355 for (unsigned i
= 0; i
< b
->size
; i
++) {
1356 if (b
->items
[i
] == id
) {
1365 int CrushWrapper::get_parent_of_type(int item
, int type
) const
1368 int r
= get_immediate_parent_id(item
, &item
);
1372 } while (get_bucket_type(item
) != type
);
1376 int CrushWrapper::rename_class(const string
& srcname
, const string
& dstname
)
1378 auto i
= class_rname
.find(srcname
);
1379 if (i
== class_rname
.end())
1381 auto j
= class_rname
.find(dstname
);
1382 if (j
!= class_rname
.end())
1385 int class_id
= i
->second
;
1386 assert(class_name
.count(class_id
));
1387 // rename any shadow buckets of old class name
1388 for (auto &it
: class_map
) {
1389 if (it
.first
< 0 && it
.second
== class_id
) {
1390 string old_name
= get_item_name(it
.first
);
1391 size_t pos
= old_name
.find("~");
1392 assert(pos
!= string::npos
);
1393 string name_no_class
= old_name
.substr(0, pos
);
1394 string old_class_name
= old_name
.substr(pos
+ 1);
1395 assert(old_class_name
== srcname
);
1396 string new_name
= name_no_class
+ "~" + dstname
;
1397 // we do not use set_item_name
1398 // because the name is intentionally invalid
1399 name_map
[it
.first
] = new_name
;
1405 class_rname
.erase(srcname
);
1406 class_name
.erase(class_id
);
1407 class_rname
[dstname
] = class_id
;
1408 class_name
[class_id
] = dstname
;
1412 int CrushWrapper::populate_classes(
1413 const std::map
<int32_t, map
<int32_t, int32_t>>& old_class_bucket
)
1415 // build set of previous used shadow ids
1416 set
<int32_t> used_ids
;
1417 for (auto& p
: old_class_bucket
) {
1418 for (auto& q
: p
.second
) {
1419 used_ids
.insert(q
.second
);
1422 // accumulate weight values for each carg and bucket as we go. because it is
1423 // depth first, we will have the nested bucket weights we need when we
1424 // finish constructing the containing buckets.
1425 map
<int,map
<int,vector
<int>>> cmap_item_weight
; // cargs -> bno -> weights
1427 find_nonshadow_roots(&roots
);
1428 for (auto &r
: roots
) {
1431 for (auto &c
: class_name
) {
1433 int res
= device_class_clone(r
, c
.first
, old_class_bucket
, used_ids
,
1434 &clone
, &cmap_item_weight
);
1442 int CrushWrapper::trim_roots_with_class()
1445 find_shadow_roots(&roots
);
1446 for (auto &r
: roots
) {
1449 int res
= remove_root(r
);
1453 // there is no need to reweight because we only remove from the
1458 int32_t CrushWrapper::_alloc_class_id() const {
1459 if (class_name
.empty()) {
1462 int32_t class_id
= class_name
.rbegin()->first
+ 1;
1463 if (class_id
>= 0) {
1466 // wrapped, pick a random start and do exhaustive search
1467 uint32_t upperlimit
= numeric_limits
<int32_t>::max();
1469 class_id
= rand() % upperlimit
;
1470 const auto start
= class_id
;
1472 if (!class_name
.count(class_id
)) {
1480 } while (class_id
!= start
);
1481 assert(0 == "no available class id");
1484 void CrushWrapper::reweight(CephContext
*cct
)
1488 for (set
<int>::iterator p
= roots
.begin(); p
!= roots
.end(); ++p
) {
1491 crush_bucket
*b
= get_bucket(*p
);
1492 ldout(cct
, 5) << "reweight bucket " << *p
<< dendl
;
1493 int r
= crush_reweight_bucket(crush
, b
);
1498 int CrushWrapper::add_simple_rule_at(
1499 string name
, string root_name
,
1500 string failure_domain_name
,
1501 string device_class
,
1502 string mode
, int rule_type
,
1506 if (rule_exists(name
)) {
1508 *err
<< "rule " << name
<< " exists";
1512 if (rule_exists(rno
)) {
1514 *err
<< "rule with ruleno " << rno
<< " exists";
1517 if (ruleset_exists(rno
)) {
1519 *err
<< "ruleset " << rno
<< " exists";
1523 for (rno
= 0; rno
< get_max_rules(); rno
++) {
1524 if (!rule_exists(rno
) && !ruleset_exists(rno
))
1528 if (!name_exists(root_name
)) {
1530 *err
<< "root item " << root_name
<< " does not exist";
1533 int root
= get_item_id(root_name
);
1535 if (failure_domain_name
.length()) {
1536 type
= get_type_id(failure_domain_name
);
1539 *err
<< "unknown type " << failure_domain_name
;
1543 if (device_class
.size()) {
1544 if (!class_exists(device_class
)) {
1546 *err
<< "device class " << device_class
<< " does not exist";
1549 int c
= get_class_id(device_class
);
1550 if (class_bucket
.count(root
) == 0 ||
1551 class_bucket
[root
].count(c
) == 0) {
1553 *err
<< "root " << root_name
<< " has no devices with class "
1557 root
= class_bucket
[root
][c
];
1559 if (mode
!= "firstn" && mode
!= "indep") {
1561 *err
<< "unknown mode " << mode
;
1566 if (mode
== "indep")
1568 int min_rep
= mode
== "firstn" ? 1 : 3;
1569 int max_rep
= mode
== "firstn" ? 10 : 20;
1570 //set the ruleset the same as rule_id(rno)
1571 crush_rule
*rule
= crush_make_rule(steps
, rno
, rule_type
, min_rep
, max_rep
);
1574 if (mode
== "indep") {
1575 crush_rule_set_step(rule
, step
++, CRUSH_RULE_SET_CHOOSELEAF_TRIES
, 5, 0);
1576 crush_rule_set_step(rule
, step
++, CRUSH_RULE_SET_CHOOSE_TRIES
, 100, 0);
1578 crush_rule_set_step(rule
, step
++, CRUSH_RULE_TAKE
, root
, 0);
1580 crush_rule_set_step(rule
, step
++,
1581 mode
== "firstn" ? CRUSH_RULE_CHOOSELEAF_FIRSTN
:
1582 CRUSH_RULE_CHOOSELEAF_INDEP
,
1586 crush_rule_set_step(rule
, step
++,
1587 mode
== "firstn" ? CRUSH_RULE_CHOOSE_FIRSTN
:
1588 CRUSH_RULE_CHOOSE_INDEP
,
1591 crush_rule_set_step(rule
, step
++, CRUSH_RULE_EMIT
, 0, 0);
1593 int ret
= crush_add_rule(crush
, rule
, rno
);
1595 *err
<< "failed to add rule " << rno
<< " because " << cpp_strerror(ret
);
1598 set_rule_name(rno
, name
);
1603 int CrushWrapper::add_simple_rule(
1604 string name
, string root_name
,
1605 string failure_domain_name
,
1606 string device_class
,
1607 string mode
, int rule_type
,
1610 return add_simple_rule_at(name
, root_name
, failure_domain_name
, device_class
,
1612 rule_type
, -1, err
);
1615 float CrushWrapper::_get_take_weight_osd_map(int root
,
1616 map
<int,float> *pmap
) const
1621 //breadth first iterate the OSD tree
1622 while (!q
.empty()) {
1623 int bno
= q
.front();
1625 crush_bucket
*b
= crush
->buckets
[-1-bno
];
1627 for (unsigned j
=0; j
<b
->size
; ++j
) {
1628 int item_id
= b
->items
[j
];
1629 if (item_id
>= 0) { //it's an OSD
1630 float w
= crush_get_bucket_item_weight(b
, j
);
1631 (*pmap
)[item_id
] = w
;
1633 } else { //not an OSD, expand the child later
1634 q
.push_back(item_id
);
1641 void CrushWrapper::_normalize_weight_map(float sum
,
1642 const map
<int,float>& m
,
1643 map
<int,float> *pmap
) const
1646 map
<int,float>::iterator q
= pmap
->find(p
.first
);
1647 if (q
== pmap
->end()) {
1648 (*pmap
)[p
.first
] = p
.second
/ sum
;
1650 q
->second
+= p
.second
/ sum
;
1655 int CrushWrapper::get_take_weight_osd_map(int root
, map
<int,float> *pmap
) const
1658 float sum
= _get_take_weight_osd_map(root
, &m
);
1659 _normalize_weight_map(sum
, m
, pmap
);
1663 int CrushWrapper::get_rule_weight_osd_map(unsigned ruleno
,
1664 map
<int,float> *pmap
) const
1666 if (ruleno
>= crush
->max_rules
)
1668 if (crush
->rules
[ruleno
] == NULL
)
1670 crush_rule
*rule
= crush
->rules
[ruleno
];
1672 // build a weight map for each TAKE in the rule, and then merge them
1674 // FIXME: if there are multiple takes that place a different number of
1675 // objects we do not take that into account. (Also, note that doing this
1676 // right is also a function of the pool, since the crush rule
1677 // might choose 2 + choose 2 but pool size may only be 3.)
1678 for (unsigned i
=0; i
<rule
->len
; ++i
) {
1681 if (rule
->steps
[i
].op
== CRUSH_RULE_TAKE
) {
1682 int n
= rule
->steps
[i
].arg1
;
1687 sum
+= _get_take_weight_osd_map(n
, &m
);
1690 _normalize_weight_map(sum
, m
, pmap
);
1696 int CrushWrapper::remove_rule(int ruleno
)
1698 if (ruleno
>= (int)crush
->max_rules
)
1700 if (crush
->rules
[ruleno
] == NULL
)
1702 crush_destroy_rule(crush
->rules
[ruleno
]);
1703 crush
->rules
[ruleno
] = NULL
;
1704 rule_name_map
.erase(ruleno
);
1706 return rebuild_roots_with_classes();
1709 int CrushWrapper::bucket_adjust_item_weight(CephContext
*cct
, crush_bucket
*bucket
, int item
, int weight
)
1711 if (cct
->_conf
->osd_crush_update_weight_set
) {
1713 for (position
= 0; position
< bucket
->size
; position
++)
1714 if (bucket
->items
[position
] == item
)
1716 assert(position
!= bucket
->size
);
1717 for (auto &w
: choose_args
) {
1718 crush_choose_arg_map
&arg_map
= w
.second
;
1719 crush_choose_arg
*arg
= &arg_map
.args
[-1-bucket
->id
];
1720 for (__u32 j
= 0; j
< arg
->weight_set_size
; j
++) {
1721 crush_weight_set
*weight_set
= &arg
->weight_set
[j
];
1722 weight_set
->weights
[position
] = weight
;
1726 return crush_bucket_adjust_item_weight(crush
, bucket
, item
, weight
);
1729 int CrushWrapper::add_bucket(
1730 int bucketno
, int alg
, int hash
, int type
, int size
,
1731 int *items
, int *weights
, int *idout
)
1734 alg
= get_default_bucket_alg();
1738 crush_bucket
*b
= crush_make_bucket(crush
, alg
, hash
, type
, size
, items
,
1742 int r
= crush_add_bucket(crush
, bucketno
, b
, idout
);
1743 int pos
= -1 - *idout
;
1744 for (auto& p
: choose_args
) {
1745 crush_choose_arg_map
& cmap
= p
.second
;
1747 if ((int)cmap
.size
<= pos
) {
1748 cmap
.args
= (crush_choose_arg
*)realloc(
1750 sizeof(crush_choose_arg
) * (pos
+ 1));
1752 memset(&cmap
.args
[cmap
.size
], 0,
1753 sizeof(crush_choose_arg
) * (pos
+ 1 - cmap
.size
));
1754 cmap
.size
= pos
+ 1;
1757 cmap
.args
= (crush_choose_arg
*)calloc(sizeof(crush_choose_arg
),
1760 cmap
.size
= pos
+ 1;
1763 int positions
= get_choose_args_positions(cmap
);
1764 crush_choose_arg
& carg
= cmap
.args
[pos
];
1765 carg
.weight_set
= (crush_weight_set
*)calloc(sizeof(crush_weight_set
),
1767 carg
.weight_set_size
= positions
;
1768 for (int ppos
= 0; ppos
< positions
; ++ppos
) {
1769 carg
.weight_set
[ppos
].weights
= (__u32
*)calloc(sizeof(__u32
), size
);
1770 carg
.weight_set
[ppos
].size
= size
;
1771 for (int bpos
= 0; bpos
< size
; ++bpos
) {
1772 carg
.weight_set
[ppos
].weights
[bpos
] = weights
[bpos
];
1780 int CrushWrapper::bucket_add_item(crush_bucket
*bucket
, int item
, int weight
)
1782 __u32 new_size
= bucket
->size
+ 1;
1783 int r
= crush_bucket_add_item(crush
, bucket
, item
, weight
);
1787 for (auto &w
: choose_args
) {
1788 crush_choose_arg_map
&arg_map
= w
.second
;
1789 crush_choose_arg
*arg
= &arg_map
.args
[-1-bucket
->id
];
1790 for (__u32 j
= 0; j
< arg
->weight_set_size
; j
++) {
1791 crush_weight_set
*weight_set
= &arg
->weight_set
[j
];
1792 weight_set
->weights
= (__u32
*)realloc(weight_set
->weights
,
1793 new_size
* sizeof(__u32
));
1794 assert(weight_set
->size
+ 1 == new_size
);
1795 weight_set
->weights
[weight_set
->size
] = weight
;
1796 weight_set
->size
= new_size
;
1798 if (arg
->ids_size
) {
1799 arg
->ids
= (__s32
*)realloc(arg
->ids
, new_size
* sizeof(__s32
));
1800 assert(arg
->ids_size
+ 1 == new_size
);
1801 arg
->ids
[arg
->ids_size
] = item
;
1802 arg
->ids_size
= new_size
;
1808 int CrushWrapper::bucket_remove_item(crush_bucket
*bucket
, int item
)
1810 __u32 new_size
= bucket
->size
- 1;
1812 for (position
= 0; position
< bucket
->size
; position
++)
1813 if (bucket
->items
[position
] == item
)
1815 assert(position
!= bucket
->size
);
1816 int r
= crush_bucket_remove_item(crush
, bucket
, item
);
1820 for (auto &w
: choose_args
) {
1821 crush_choose_arg_map
&arg_map
= w
.second
;
1822 crush_choose_arg
*arg
= &arg_map
.args
[-1-bucket
->id
];
1823 for (__u32 j
= 0; j
< arg
->weight_set_size
; j
++) {
1824 crush_weight_set
*weight_set
= &arg
->weight_set
[j
];
1825 assert(weight_set
->size
- 1 == new_size
);
1826 for (__u32 k
= position
; k
< new_size
; k
++)
1827 weight_set
->weights
[k
] = weight_set
->weights
[k
+1];
1829 weight_set
->weights
= (__u32
*)realloc(weight_set
->weights
,
1830 new_size
* sizeof(__u32
));
1832 weight_set
->weights
= NULL
;
1834 weight_set
->size
= new_size
;
1836 if (arg
->ids_size
) {
1837 assert(arg
->ids_size
- 1 == new_size
);
1838 for (__u32 k
= position
; k
< new_size
; k
++)
1839 arg
->ids
[k
] = arg
->ids
[k
+1];
1841 arg
->ids
= (__s32
*)realloc(arg
->ids
, new_size
* sizeof(__s32
));
1845 arg
->ids_size
= new_size
;
1851 int CrushWrapper::bucket_set_alg(int bid
, int alg
)
1853 crush_bucket
*b
= get_bucket(bid
);
1861 int CrushWrapper::update_device_class(int id
,
1862 const string
& class_name
,
1866 assert(item_exists(id
));
1867 auto old_class_name
= get_item_class(id
);
1868 if (old_class_name
&& old_class_name
!= class_name
) {
1869 *ss
<< "osd." << id
<< " has already bound to class '" << old_class_name
1870 << "', can not reset class to '" << class_name
<< "'; "
1871 << "use 'ceph osd crush rm-device-class <osd>' to "
1872 << "remove old class first";
1876 int class_id
= get_or_create_class_id(class_name
);
1878 *ss
<< name
<< " id " << id
<< " is negative";
1882 if (class_map
.count(id
) != 0 && class_map
[id
] == class_id
) {
1883 *ss
<< name
<< " already set to class " << class_name
;
1887 set_item_class(id
, class_id
);
1889 int r
= rebuild_roots_with_classes();
1895 int CrushWrapper::remove_device_class(CephContext
*cct
, int id
, ostream
*ss
)
1898 const char *name
= get_item_name(id
);
1900 *ss
<< "osd." << id
<< " does not have a name";
1904 const char *class_name
= get_item_class(id
);
1906 *ss
<< "osd." << id
<< " has not been bound to a specific class yet";
1909 class_remove_item(id
);
1911 int r
= rebuild_roots_with_classes();
1913 *ss
<< "unable to rebuild roots with class '" << class_name
<< "' "
1914 << "of osd." << id
<< ": " << cpp_strerror(r
);
1920 int CrushWrapper::device_class_clone(
1921 int original_id
, int device_class
,
1922 const std::map
<int32_t, map
<int32_t, int32_t>>& old_class_bucket
,
1923 const std::set
<int32_t>& used_ids
,
1925 map
<int,map
<int,vector
<int>>> *cmap_item_weight
)
1927 const char *item_name
= get_item_name(original_id
);
1928 if (item_name
== NULL
)
1930 const char *class_name
= get_class_name(device_class
);
1931 if (class_name
== NULL
)
1933 string copy_name
= item_name
+ string("~") + class_name
;
1934 if (name_exists(copy_name
)) {
1935 *clone
= get_item_id(copy_name
);
1939 crush_bucket
*original
= get_bucket(original_id
);
1940 assert(!IS_ERR(original
));
1941 crush_bucket
*copy
= crush_make_bucket(crush
,
1948 vector
<unsigned> item_orig_pos
; // new item pos -> orig item pos
1949 for (unsigned i
= 0; i
< original
->size
; i
++) {
1950 int item
= original
->items
[i
];
1951 int weight
= crush_get_bucket_item_weight(original
, i
);
1953 if (class_map
.count(item
) != 0 && class_map
[item
] == device_class
) {
1954 int res
= crush_bucket_add_item(crush
, copy
, item
, weight
);
1962 int res
= device_class_clone(item
, device_class
, old_class_bucket
,
1963 used_ids
, &child_copy_id
,
1967 crush_bucket
*child_copy
= get_bucket(child_copy_id
);
1968 assert(!IS_ERR(child_copy
));
1969 res
= crush_bucket_add_item(crush
, copy
, child_copy_id
,
1970 child_copy
->weight
);
1974 item_orig_pos
.push_back(i
);
1976 assert(item_orig_pos
.size() == copy
->size
);
1979 if (old_class_bucket
.count(original_id
) &&
1980 old_class_bucket
.at(original_id
).count(device_class
)) {
1981 bno
= old_class_bucket
.at(original_id
).at(device_class
);
1983 // pick a new shadow bucket id that is not used by the current map
1984 // *or* any previous shadow buckets.
1986 while (((-1-bno
) < crush
->max_buckets
&& crush
->buckets
[-1-bno
]) ||
1987 used_ids
.count(bno
)) {
1991 int res
= crush_add_bucket(crush
, bno
, copy
, clone
);
1994 assert(!bno
|| bno
== *clone
);
1996 res
= set_item_class(*clone
, device_class
);
2000 // we do not use set_item_name because the name is intentionally invalid
2001 name_map
[*clone
] = copy_name
;
2003 name_rmap
[copy_name
] = *clone
;
2004 class_bucket
[original_id
][device_class
] = *clone
;
2006 // set up choose_args for the new bucket.
2007 for (auto& w
: choose_args
) {
2008 crush_choose_arg_map
& cmap
= w
.second
;
2009 if (-1-bno
>= (int)cmap
.size
) {
2010 unsigned new_size
= -1-bno
+ 1;
2011 cmap
.args
= (crush_choose_arg
*)realloc(cmap
.args
,
2012 new_size
* sizeof(cmap
.args
[0]));
2014 memset(cmap
.args
+ cmap
.size
, 0,
2015 (new_size
- cmap
.size
) * sizeof(cmap
.args
[0]));
2016 cmap
.size
= new_size
;
2018 auto& o
= cmap
.args
[-1-original_id
];
2019 auto& n
= cmap
.args
[-1-bno
];
2020 n
.ids_size
= 0; // FIXME: implement me someday
2021 n
.weight_set_size
= o
.weight_set_size
;
2022 n
.weight_set
= (crush_weight_set
*)calloc(
2023 n
.weight_set_size
, sizeof(crush_weight_set
));
2024 for (size_t s
= 0; s
< n
.weight_set_size
; ++s
) {
2025 n
.weight_set
[s
].size
= copy
->size
;
2026 n
.weight_set
[s
].weights
= (__u32
*)calloc(copy
->size
, sizeof(__u32
));
2028 for (size_t s
= 0; s
< n
.weight_set_size
; ++s
) {
2029 vector
<int> bucket_weights(n
.weight_set_size
);
2030 for (size_t i
= 0; i
< copy
->size
; ++i
) {
2031 int item
= copy
->items
[i
];
2033 n
.weight_set
[s
].weights
[i
] = o
.weight_set
[s
].weights
[item_orig_pos
[i
]];
2035 n
.weight_set
[s
].weights
[i
] = (*cmap_item_weight
)[w
.first
][item
][s
];
2037 bucket_weights
[s
] += n
.weight_set
[s
].weights
[i
];
2039 (*cmap_item_weight
)[w
.first
][bno
] = bucket_weights
;
2045 int CrushWrapper::get_rules_by_class(const string
&class_name
, set
<int> *rules
)
2049 if (!class_exists(class_name
)) {
2052 int class_id
= get_class_id(class_name
);
2053 for (unsigned i
= 0; i
< crush
->max_rules
; ++i
) {
2054 crush_rule
*r
= crush
->rules
[i
];
2057 for (unsigned j
= 0; j
< r
->len
; ++j
) {
2058 if (r
->steps
[j
].op
== CRUSH_RULE_TAKE
) {
2059 int step_item
= r
->steps
[j
].arg1
;
2062 int res
= split_id_class(step_item
, &original_item
, &c
);
2066 if (c
!= -1 && c
== class_id
) {
2076 // return rules that might reference the given osd
2077 int CrushWrapper::get_rules_by_osd(int osd
, set
<int> *rules
)
2084 for (unsigned i
= 0; i
< crush
->max_rules
; ++i
) {
2085 crush_rule
*r
= crush
->rules
[i
];
2088 for (unsigned j
= 0; j
< r
->len
; ++j
) {
2089 if (r
->steps
[j
].op
== CRUSH_RULE_TAKE
) {
2090 int step_item
= r
->steps
[j
].arg1
;
2091 list
<int> unordered
;
2092 int rc
= _get_leaves(step_item
, &unordered
);
2094 return rc
; // propagate fatal errors!
2097 for (auto &o
: unordered
) {
2114 bool CrushWrapper::_class_is_dead(int class_id
)
2116 for (auto &p
: class_map
) {
2117 if (p
.first
>= 0 && p
.second
== class_id
) {
2121 for (unsigned i
= 0; i
< crush
->max_rules
; ++i
) {
2122 crush_rule
*r
= crush
->rules
[i
];
2125 for (unsigned j
= 0; j
< r
->len
; ++j
) {
2126 if (r
->steps
[j
].op
== CRUSH_RULE_TAKE
) {
2127 int root
= r
->steps
[j
].arg1
;
2128 for (auto &p
: class_bucket
) {
2130 if (q
.count(class_id
) && q
[class_id
] == root
) {
2137 // no more referenced by any devices or crush rules
2141 void CrushWrapper::cleanup_dead_classes()
2143 auto p
= class_name
.begin();
2144 while (p
!= class_name
.end()) {
2145 if (_class_is_dead(p
->first
)) {
2146 string n
= p
->second
;
2148 remove_class_name(n
);
2155 int CrushWrapper::rebuild_roots_with_classes()
2157 std::map
<int32_t, map
<int32_t, int32_t> > old_class_bucket
= class_bucket
;
2158 cleanup_dead_classes();
2159 int r
= trim_roots_with_class();
2162 class_bucket
.clear();
2163 return populate_classes(old_class_bucket
);
2166 void CrushWrapper::encode(bufferlist
& bl
, uint64_t features
) const
2170 __u32 magic
= CRUSH_MAGIC
;
2171 ::encode(magic
, bl
);
2173 ::encode(crush
->max_buckets
, bl
);
2174 ::encode(crush
->max_rules
, bl
);
2175 ::encode(crush
->max_devices
, bl
);
2177 bool encode_compat_choose_args
= false;
2178 crush_choose_arg_map arg_map
;
2179 memset(&arg_map
, '\0', sizeof(arg_map
));
2180 if (has_choose_args() &&
2181 !HAVE_FEATURE(features
, CRUSH_CHOOSE_ARGS
)) {
2182 assert(!has_incompat_choose_args());
2183 encode_compat_choose_args
= true;
2184 arg_map
= choose_args
.begin()->second
;
2188 for (int i
=0; i
<crush
->max_buckets
; i
++) {
2190 if (crush
->buckets
[i
]) alg
= crush
->buckets
[i
]->alg
;
2195 ::encode(crush
->buckets
[i
]->id
, bl
);
2196 ::encode(crush
->buckets
[i
]->type
, bl
);
2197 ::encode(crush
->buckets
[i
]->alg
, bl
);
2198 ::encode(crush
->buckets
[i
]->hash
, bl
);
2199 ::encode(crush
->buckets
[i
]->weight
, bl
);
2200 ::encode(crush
->buckets
[i
]->size
, bl
);
2201 for (unsigned j
=0; j
<crush
->buckets
[i
]->size
; j
++)
2202 ::encode(crush
->buckets
[i
]->items
[j
], bl
);
2204 switch (crush
->buckets
[i
]->alg
) {
2205 case CRUSH_BUCKET_UNIFORM
:
2206 ::encode((reinterpret_cast<crush_bucket_uniform
*>(crush
->buckets
[i
]))->item_weight
, bl
);
2209 case CRUSH_BUCKET_LIST
:
2210 for (unsigned j
=0; j
<crush
->buckets
[i
]->size
; j
++) {
2211 ::encode((reinterpret_cast<crush_bucket_list
*>(crush
->buckets
[i
]))->item_weights
[j
], bl
);
2212 ::encode((reinterpret_cast<crush_bucket_list
*>(crush
->buckets
[i
]))->sum_weights
[j
], bl
);
2216 case CRUSH_BUCKET_TREE
:
2217 ::encode((reinterpret_cast<crush_bucket_tree
*>(crush
->buckets
[i
]))->num_nodes
, bl
);
2218 for (unsigned j
=0; j
<(reinterpret_cast<crush_bucket_tree
*>(crush
->buckets
[i
]))->num_nodes
; j
++)
2219 ::encode((reinterpret_cast<crush_bucket_tree
*>(crush
->buckets
[i
]))->node_weights
[j
], bl
);
2222 case CRUSH_BUCKET_STRAW
:
2223 for (unsigned j
=0; j
<crush
->buckets
[i
]->size
; j
++) {
2224 ::encode((reinterpret_cast<crush_bucket_straw
*>(crush
->buckets
[i
]))->item_weights
[j
], bl
);
2225 ::encode((reinterpret_cast<crush_bucket_straw
*>(crush
->buckets
[i
]))->straws
[j
], bl
);
2229 case CRUSH_BUCKET_STRAW2
:
2232 if (encode_compat_choose_args
&&
2233 arg_map
.args
[i
].weight_set_size
> 0) {
2234 weights
= arg_map
.args
[i
].weight_set
[0].weights
;
2236 weights
= (reinterpret_cast<crush_bucket_straw2
*>(crush
->buckets
[i
]))->item_weights
;
2238 for (unsigned j
=0; j
<crush
->buckets
[i
]->size
; j
++) {
2239 ::encode(weights
[j
], bl
);
2251 for (unsigned i
=0; i
<crush
->max_rules
; i
++) {
2252 __u32 yes
= crush
->rules
[i
] ? 1:0;
2257 ::encode(crush
->rules
[i
]->len
, bl
);
2258 ::encode(crush
->rules
[i
]->mask
, bl
);
2259 for (unsigned j
=0; j
<crush
->rules
[i
]->len
; j
++)
2260 ::encode(crush
->rules
[i
]->steps
[j
], bl
);
2264 ::encode(type_map
, bl
);
2265 ::encode(name_map
, bl
);
2266 ::encode(rule_name_map
, bl
);
2269 ::encode(crush
->choose_local_tries
, bl
);
2270 ::encode(crush
->choose_local_fallback_tries
, bl
);
2271 ::encode(crush
->choose_total_tries
, bl
);
2272 ::encode(crush
->chooseleaf_descend_once
, bl
);
2273 ::encode(crush
->chooseleaf_vary_r
, bl
);
2274 ::encode(crush
->straw_calc_version
, bl
);
2275 ::encode(crush
->allowed_bucket_algs
, bl
);
2276 if (features
& CEPH_FEATURE_CRUSH_TUNABLES5
) {
2277 ::encode(crush
->chooseleaf_stable
, bl
);
2280 if (HAVE_FEATURE(features
, SERVER_LUMINOUS
)) {
2282 ::encode(class_map
, bl
);
2283 ::encode(class_name
, bl
);
2284 ::encode(class_bucket
, bl
);
2287 __u32 size
= (__u32
)choose_args
.size();
2289 for (auto c
: choose_args
) {
2290 ::encode(c
.first
, bl
);
2291 crush_choose_arg_map arg_map
= c
.second
;
2293 for (__u32 i
= 0; i
< arg_map
.size
; i
++) {
2294 crush_choose_arg
*arg
= &arg_map
.args
[i
];
2295 if (arg
->weight_set_size
== 0 &&
2301 for (__u32 i
= 0; i
< arg_map
.size
; i
++) {
2302 crush_choose_arg
*arg
= &arg_map
.args
[i
];
2303 if (arg
->weight_set_size
== 0 &&
2307 ::encode(arg
->weight_set_size
, bl
);
2308 for (__u32 j
= 0; j
< arg
->weight_set_size
; j
++) {
2309 crush_weight_set
*weight_set
= &arg
->weight_set
[j
];
2310 ::encode(weight_set
->size
, bl
);
2311 for (__u32 k
= 0; k
< weight_set
->size
; k
++)
2312 ::encode(weight_set
->weights
[k
], bl
);
2314 ::encode(arg
->ids_size
, bl
);
2315 for (__u32 j
= 0; j
< arg
->ids_size
; j
++)
2316 ::encode(arg
->ids
[j
], bl
);
2322 static void decode_32_or_64_string_map(map
<int32_t,string
>& m
, bufferlist::iterator
& blp
)
2332 ::decode(strlen
, blp
);
2334 // der, key was actually 64-bits!
2335 ::decode(strlen
, blp
);
2337 ::decode_nohead(strlen
, m
[key
], blp
);
2341 void CrushWrapper::decode(bufferlist::iterator
& blp
)
2346 ::decode(magic
, blp
);
2347 if (magic
!= CRUSH_MAGIC
)
2348 throw buffer::malformed_input("bad magic number");
2350 ::decode(crush
->max_buckets
, blp
);
2351 ::decode(crush
->max_rules
, blp
);
2352 ::decode(crush
->max_devices
, blp
);
2354 // legacy tunables, unless we decode something newer
2355 set_tunables_legacy();
2359 crush
->buckets
= (crush_bucket
**)calloc(1, crush
->max_buckets
* sizeof(crush_bucket
*));
2360 for (int i
=0; i
<crush
->max_buckets
; i
++) {
2361 decode_crush_bucket(&crush
->buckets
[i
], blp
);
2365 crush
->rules
= (crush_rule
**)calloc(1, crush
->max_rules
* sizeof(crush_rule
*));
2366 for (unsigned i
= 0; i
< crush
->max_rules
; ++i
) {
2370 crush
->rules
[i
] = NULL
;
2376 crush
->rules
[i
] = reinterpret_cast<crush_rule
*>(calloc(1, crush_rule_size(len
)));
2377 crush
->rules
[i
]->len
= len
;
2378 ::decode(crush
->rules
[i
]->mask
, blp
);
2379 for (unsigned j
=0; j
<crush
->rules
[i
]->len
; j
++)
2380 ::decode(crush
->rules
[i
]->steps
[j
], blp
);
2384 // NOTE: we had a bug where we were incoding int instead of int32, which means the
2385 // 'key' field for these maps may be either 32 or 64 bits, depending. tolerate
2386 // both by assuming the string is always non-empty.
2387 decode_32_or_64_string_map(type_map
, blp
);
2388 decode_32_or_64_string_map(name_map
, blp
);
2389 decode_32_or_64_string_map(rule_name_map
, blp
);
2393 ::decode(crush
->choose_local_tries
, blp
);
2394 ::decode(crush
->choose_local_fallback_tries
, blp
);
2395 ::decode(crush
->choose_total_tries
, blp
);
2398 ::decode(crush
->chooseleaf_descend_once
, blp
);
2401 ::decode(crush
->chooseleaf_vary_r
, blp
);
2404 ::decode(crush
->straw_calc_version
, blp
);
2407 ::decode(crush
->allowed_bucket_algs
, blp
);
2410 ::decode(crush
->chooseleaf_stable
, blp
);
2413 ::decode(class_map
, blp
);
2414 ::decode(class_name
, blp
);
2415 for (auto &c
: class_name
)
2416 class_rname
[c
.second
] = c
.first
;
2417 ::decode(class_bucket
, blp
);
2420 __u32 choose_args_size
;
2421 ::decode(choose_args_size
, blp
);
2422 for (__u32 i
= 0; i
< choose_args_size
; i
++) {
2423 typename
decltype(choose_args
)::key_type choose_args_index
;
2424 ::decode(choose_args_index
, blp
);
2425 crush_choose_arg_map arg_map
;
2426 arg_map
.size
= crush
->max_buckets
;
2427 arg_map
.args
= (crush_choose_arg
*)calloc(
2428 arg_map
.size
, sizeof(crush_choose_arg
));
2430 ::decode(size
, blp
);
2431 for (__u32 j
= 0; j
< size
; j
++) {
2433 ::decode(bucket_index
, blp
);
2434 assert(bucket_index
< arg_map
.size
);
2435 crush_choose_arg
*arg
= &arg_map
.args
[bucket_index
];
2436 ::decode(arg
->weight_set_size
, blp
);
2437 if (arg
->weight_set_size
) {
2438 arg
->weight_set
= (crush_weight_set
*)calloc(
2439 arg
->weight_set_size
, sizeof(crush_weight_set
));
2440 for (__u32 k
= 0; k
< arg
->weight_set_size
; k
++) {
2441 crush_weight_set
*weight_set
= &arg
->weight_set
[k
];
2442 ::decode(weight_set
->size
, blp
);
2443 weight_set
->weights
= (__u32
*)calloc(
2444 weight_set
->size
, sizeof(__u32
));
2445 for (__u32 l
= 0; l
< weight_set
->size
; l
++)
2446 ::decode(weight_set
->weights
[l
], blp
);
2449 ::decode(arg
->ids_size
, blp
);
2450 if (arg
->ids_size
) {
2451 assert(arg
->ids_size
== crush
->buckets
[bucket_index
]->size
);
2452 arg
->ids
= (__s32
*)calloc(arg
->ids_size
, sizeof(__s32
));
2453 for (__u32 k
= 0; k
< arg
->ids_size
; k
++)
2454 ::decode(arg
->ids
[k
], blp
);
2457 choose_args
[choose_args_index
] = arg_map
;
2463 crush_destroy(crush
);
2468 void CrushWrapper::decode_crush_bucket(crush_bucket
** bptr
, bufferlist::iterator
&blp
)
2479 case CRUSH_BUCKET_UNIFORM
:
2480 size
= sizeof(crush_bucket_uniform
);
2482 case CRUSH_BUCKET_LIST
:
2483 size
= sizeof(crush_bucket_list
);
2485 case CRUSH_BUCKET_TREE
:
2486 size
= sizeof(crush_bucket_tree
);
2488 case CRUSH_BUCKET_STRAW
:
2489 size
= sizeof(crush_bucket_straw
);
2491 case CRUSH_BUCKET_STRAW2
:
2492 size
= sizeof(crush_bucket_straw2
);
2497 snprintf(str
, sizeof(str
), "unsupported bucket algorithm: %d", alg
);
2498 throw buffer::malformed_input(str
);
2501 crush_bucket
*bucket
= reinterpret_cast<crush_bucket
*>(calloc(1, size
));
2504 ::decode(bucket
->id
, blp
);
2505 ::decode(bucket
->type
, blp
);
2506 ::decode(bucket
->alg
, blp
);
2507 ::decode(bucket
->hash
, blp
);
2508 ::decode(bucket
->weight
, blp
);
2509 ::decode(bucket
->size
, blp
);
2511 bucket
->items
= (__s32
*)calloc(1, bucket
->size
* sizeof(__s32
));
2512 for (unsigned j
= 0; j
< bucket
->size
; ++j
) {
2513 ::decode(bucket
->items
[j
], blp
);
2516 switch (bucket
->alg
) {
2517 case CRUSH_BUCKET_UNIFORM
:
2518 ::decode((reinterpret_cast<crush_bucket_uniform
*>(bucket
))->item_weight
, blp
);
2521 case CRUSH_BUCKET_LIST
: {
2522 crush_bucket_list
* cbl
= reinterpret_cast<crush_bucket_list
*>(bucket
);
2523 cbl
->item_weights
= (__u32
*)calloc(1, bucket
->size
* sizeof(__u32
));
2524 cbl
->sum_weights
= (__u32
*)calloc(1, bucket
->size
* sizeof(__u32
));
2526 for (unsigned j
= 0; j
< bucket
->size
; ++j
) {
2527 ::decode(cbl
->item_weights
[j
], blp
);
2528 ::decode(cbl
->sum_weights
[j
], blp
);
2533 case CRUSH_BUCKET_TREE
: {
2534 crush_bucket_tree
* cbt
= reinterpret_cast<crush_bucket_tree
*>(bucket
);
2535 ::decode(cbt
->num_nodes
, blp
);
2536 cbt
->node_weights
= (__u32
*)calloc(1, cbt
->num_nodes
* sizeof(__u32
));
2537 for (unsigned j
=0; j
<cbt
->num_nodes
; j
++) {
2538 ::decode(cbt
->node_weights
[j
], blp
);
2543 case CRUSH_BUCKET_STRAW
: {
2544 crush_bucket_straw
* cbs
= reinterpret_cast<crush_bucket_straw
*>(bucket
);
2545 cbs
->straws
= (__u32
*)calloc(1, bucket
->size
* sizeof(__u32
));
2546 cbs
->item_weights
= (__u32
*)calloc(1, bucket
->size
* sizeof(__u32
));
2547 for (unsigned j
= 0; j
< bucket
->size
; ++j
) {
2548 ::decode(cbs
->item_weights
[j
], blp
);
2549 ::decode(cbs
->straws
[j
], blp
);
2554 case CRUSH_BUCKET_STRAW2
: {
2555 crush_bucket_straw2
* cbs
= reinterpret_cast<crush_bucket_straw2
*>(bucket
);
2556 cbs
->item_weights
= (__u32
*)calloc(1, bucket
->size
* sizeof(__u32
));
2557 for (unsigned j
= 0; j
< bucket
->size
; ++j
) {
2558 ::decode(cbs
->item_weights
[j
], blp
);
2564 // We should have handled this case in the first switch statement
2571 void CrushWrapper::dump(Formatter
*f
) const
2573 f
->open_array_section("devices");
2574 for (int i
=0; i
<get_max_devices(); i
++) {
2575 f
->open_object_section("device");
2576 f
->dump_int("id", i
);
2577 const char *n
= get_item_name(i
);
2579 f
->dump_string("name", n
);
2582 sprintf(name
, "device%d", i
);
2583 f
->dump_string("name", name
);
2585 const char *device_class
= get_item_class(i
);
2586 if (device_class
!= NULL
)
2587 f
->dump_string("class", device_class
);
2592 f
->open_array_section("types");
2593 int n
= get_num_type_names();
2594 for (int i
=0; n
; i
++) {
2595 const char *name
= get_type_name(i
);
2598 f
->open_object_section("type");
2599 f
->dump_int("type_id", 0);
2600 f
->dump_string("name", "device");
2606 f
->open_object_section("type");
2607 f
->dump_int("type_id", i
);
2608 f
->dump_string("name", name
);
2613 f
->open_array_section("buckets");
2614 for (int bucket
= -1; bucket
> -1-get_max_buckets(); --bucket
) {
2615 if (!bucket_exists(bucket
))
2617 f
->open_object_section("bucket");
2618 f
->dump_int("id", bucket
);
2619 if (get_item_name(bucket
))
2620 f
->dump_string("name", get_item_name(bucket
));
2621 f
->dump_int("type_id", get_bucket_type(bucket
));
2622 if (get_type_name(get_bucket_type(bucket
)))
2623 f
->dump_string("type_name", get_type_name(get_bucket_type(bucket
)));
2624 f
->dump_int("weight", get_bucket_weight(bucket
));
2625 f
->dump_string("alg", crush_bucket_alg_name(get_bucket_alg(bucket
)));
2626 f
->dump_string("hash", crush_hash_name(get_bucket_hash(bucket
)));
2627 f
->open_array_section("items");
2628 for (int j
=0; j
<get_bucket_size(bucket
); j
++) {
2629 f
->open_object_section("item");
2630 f
->dump_int("id", get_bucket_item(bucket
, j
));
2631 f
->dump_int("weight", get_bucket_item_weight(bucket
, j
));
2632 f
->dump_int("pos", j
);
2640 f
->open_array_section("rules");
2644 f
->open_object_section("tunables");
2648 dump_choose_args(f
);
2652 // depth first walker
2654 typedef CrushTreeDumper::Item Item
;
2655 const CrushWrapper
*crush
;
2656 const CrushTreeDumper::name_map_t
& weight_set_names
;
2658 explicit TreeDumper(const CrushWrapper
*crush
,
2659 const CrushTreeDumper::name_map_t
& wsnames
)
2660 : crush(crush
), weight_set_names(wsnames
) {}
2662 void dump(Formatter
*f
) {
2664 crush
->find_roots(&roots
);
2665 for (set
<int>::iterator root
= roots
.begin(); root
!= roots
.end(); ++root
) {
2666 dump_item(Item(*root
, 0, 0, crush
->get_bucket_weightf(*root
)), f
);
2671 void dump_item(const Item
& qi
, Formatter
* f
) {
2672 if (qi
.is_bucket()) {
2673 f
->open_object_section("bucket");
2674 CrushTreeDumper::dump_item_fields(crush
, weight_set_names
, qi
, f
);
2675 dump_bucket_children(qi
, f
);
2678 f
->open_object_section("device");
2679 CrushTreeDumper::dump_item_fields(crush
, weight_set_names
, qi
, f
);
2684 void dump_bucket_children(const Item
& parent
, Formatter
* f
) {
2685 f
->open_array_section("items");
2686 const int max_pos
= crush
->get_bucket_size(parent
.id
);
2687 for (int pos
= 0; pos
< max_pos
; pos
++) {
2688 int id
= crush
->get_bucket_item(parent
.id
, pos
);
2689 float weight
= crush
->get_bucket_item_weightf(parent
.id
, pos
);
2690 dump_item(Item(id
, parent
.id
, parent
.depth
+ 1, weight
), f
);
2697 void CrushWrapper::dump_tree(
2699 const CrushTreeDumper::name_map_t
& weight_set_names
) const
2702 TreeDumper(this, weight_set_names
).dump(f
);
2705 void CrushWrapper::dump_tunables(Formatter
*f
) const
2707 f
->dump_int("choose_local_tries", get_choose_local_tries());
2708 f
->dump_int("choose_local_fallback_tries", get_choose_local_fallback_tries());
2709 f
->dump_int("choose_total_tries", get_choose_total_tries());
2710 f
->dump_int("chooseleaf_descend_once", get_chooseleaf_descend_once());
2711 f
->dump_int("chooseleaf_vary_r", get_chooseleaf_vary_r());
2712 f
->dump_int("chooseleaf_stable", get_chooseleaf_stable());
2713 f
->dump_int("straw_calc_version", get_straw_calc_version());
2714 f
->dump_int("allowed_bucket_algs", get_allowed_bucket_algs());
2716 // be helpful about it
2717 if (has_jewel_tunables())
2718 f
->dump_string("profile", "jewel");
2719 else if (has_hammer_tunables())
2720 f
->dump_string("profile", "hammer");
2721 else if (has_firefly_tunables())
2722 f
->dump_string("profile", "firefly");
2723 else if (has_bobtail_tunables())
2724 f
->dump_string("profile", "bobtail");
2725 else if (has_argonaut_tunables())
2726 f
->dump_string("profile", "argonaut");
2728 f
->dump_string("profile", "unknown");
2729 f
->dump_int("optimal_tunables", (int)has_optimal_tunables());
2730 f
->dump_int("legacy_tunables", (int)has_legacy_tunables());
2732 // be helpful about minimum version required
2733 f
->dump_string("minimum_required_version", get_min_required_version());
2735 f
->dump_int("require_feature_tunables", (int)has_nondefault_tunables());
2736 f
->dump_int("require_feature_tunables2", (int)has_nondefault_tunables2());
2737 f
->dump_int("has_v2_rules", (int)has_v2_rules());
2738 f
->dump_int("require_feature_tunables3", (int)has_nondefault_tunables3());
2739 f
->dump_int("has_v3_rules", (int)has_v3_rules());
2740 f
->dump_int("has_v4_buckets", (int)has_v4_buckets());
2741 f
->dump_int("require_feature_tunables5", (int)has_nondefault_tunables5());
2742 f
->dump_int("has_v5_rules", (int)has_v5_rules());
2745 void CrushWrapper::dump_choose_args(Formatter
*f
) const
2747 f
->open_object_section("choose_args");
2748 for (auto c
: choose_args
) {
2749 crush_choose_arg_map arg_map
= c
.second
;
2750 f
->open_array_section(stringify(c
.first
).c_str());
2751 for (__u32 i
= 0; i
< arg_map
.size
; i
++) {
2752 crush_choose_arg
*arg
= &arg_map
.args
[i
];
2753 if (arg
->weight_set_size
== 0 &&
2756 f
->open_object_section("choose_args");
2757 int bucket_index
= i
;
2758 f
->dump_int("bucket_id", -1-bucket_index
);
2759 if (arg
->weight_set_size
> 0) {
2760 f
->open_array_section("weight_set");
2761 for (__u32 j
= 0; j
< arg
->weight_set_size
; j
++) {
2762 f
->open_array_section("weights");
2763 __u32
*weights
= arg
->weight_set
[j
].weights
;
2764 __u32 size
= arg
->weight_set
[j
].size
;
2765 for (__u32 k
= 0; k
< size
; k
++) {
2766 f
->dump_float("weight", (float)weights
[k
]/(float)0x10000);
2772 if (arg
->ids_size
> 0) {
2773 f
->open_array_section("ids");
2774 for (__u32 j
= 0; j
< arg
->ids_size
; j
++)
2775 f
->dump_int("id", arg
->ids
[j
]);
2785 void CrushWrapper::dump_rules(Formatter
*f
) const
2787 for (int i
=0; i
<get_max_rules(); i
++) {
2788 if (!rule_exists(i
))
2794 void CrushWrapper::dump_rule(int ruleset
, Formatter
*f
) const
2796 f
->open_object_section("rule");
2797 f
->dump_int("rule_id", ruleset
);
2798 if (get_rule_name(ruleset
))
2799 f
->dump_string("rule_name", get_rule_name(ruleset
));
2800 f
->dump_int("ruleset", get_rule_mask_ruleset(ruleset
));
2801 f
->dump_int("type", get_rule_mask_type(ruleset
));
2802 f
->dump_int("min_size", get_rule_mask_min_size(ruleset
));
2803 f
->dump_int("max_size", get_rule_mask_max_size(ruleset
));
2804 f
->open_array_section("steps");
2805 for (int j
=0; j
<get_rule_len(ruleset
); j
++) {
2806 f
->open_object_section("step");
2807 switch (get_rule_op(ruleset
, j
)) {
2808 case CRUSH_RULE_NOOP
:
2809 f
->dump_string("op", "noop");
2811 case CRUSH_RULE_TAKE
:
2812 f
->dump_string("op", "take");
2814 int item
= get_rule_arg1(ruleset
, j
);
2815 f
->dump_int("item", item
);
2817 const char *name
= get_item_name(item
);
2818 f
->dump_string("item_name", name
? name
: "");
2821 case CRUSH_RULE_EMIT
:
2822 f
->dump_string("op", "emit");
2824 case CRUSH_RULE_CHOOSE_FIRSTN
:
2825 f
->dump_string("op", "choose_firstn");
2826 f
->dump_int("num", get_rule_arg1(ruleset
, j
));
2827 f
->dump_string("type", get_type_name(get_rule_arg2(ruleset
, j
)));
2829 case CRUSH_RULE_CHOOSE_INDEP
:
2830 f
->dump_string("op", "choose_indep");
2831 f
->dump_int("num", get_rule_arg1(ruleset
, j
));
2832 f
->dump_string("type", get_type_name(get_rule_arg2(ruleset
, j
)));
2834 case CRUSH_RULE_CHOOSELEAF_FIRSTN
:
2835 f
->dump_string("op", "chooseleaf_firstn");
2836 f
->dump_int("num", get_rule_arg1(ruleset
, j
));
2837 f
->dump_string("type", get_type_name(get_rule_arg2(ruleset
, j
)));
2839 case CRUSH_RULE_CHOOSELEAF_INDEP
:
2840 f
->dump_string("op", "chooseleaf_indep");
2841 f
->dump_int("num", get_rule_arg1(ruleset
, j
));
2842 f
->dump_string("type", get_type_name(get_rule_arg2(ruleset
, j
)));
2844 case CRUSH_RULE_SET_CHOOSE_TRIES
:
2845 f
->dump_string("op", "set_choose_tries");
2846 f
->dump_int("num", get_rule_arg1(ruleset
, j
));
2848 case CRUSH_RULE_SET_CHOOSELEAF_TRIES
:
2849 f
->dump_string("op", "set_chooseleaf_tries");
2850 f
->dump_int("num", get_rule_arg1(ruleset
, j
));
2853 f
->dump_int("opcode", get_rule_op(ruleset
, j
));
2854 f
->dump_int("arg1", get_rule_arg1(ruleset
, j
));
2855 f
->dump_int("arg2", get_rule_arg2(ruleset
, j
));
2863 void CrushWrapper::list_rules(Formatter
*f
) const
2865 for (int rule
= 0; rule
< get_max_rules(); rule
++) {
2866 if (!rule_exists(rule
))
2868 f
->dump_string("name", get_rule_name(rule
));
2872 void CrushWrapper::list_rules(ostream
*ss
) const
2874 for (int rule
= 0; rule
< get_max_rules(); rule
++) {
2875 if (!rule_exists(rule
))
2877 *ss
<< get_rule_name(rule
) << "\n";
2881 class CrushTreePlainDumper
: public CrushTreeDumper::Dumper
<TextTable
> {
2883 typedef CrushTreeDumper::Dumper
<TextTable
> Parent
;
2885 explicit CrushTreePlainDumper(const CrushWrapper
*crush
,
2886 const CrushTreeDumper::name_map_t
& wsnames
)
2887 : Parent(crush
, wsnames
) {}
2888 explicit CrushTreePlainDumper(const CrushWrapper
*crush
,
2889 const CrushTreeDumper::name_map_t
& wsnames
,
2891 : Parent(crush
, wsnames
, show_shadow
) {}
2894 void dump(TextTable
*tbl
) {
2895 tbl
->define_column("ID", TextTable::LEFT
, TextTable::RIGHT
);
2896 tbl
->define_column("CLASS", TextTable::LEFT
, TextTable::RIGHT
);
2897 tbl
->define_column("WEIGHT", TextTable::LEFT
, TextTable::RIGHT
);
2898 for (auto& p
: crush
->choose_args
) {
2899 if (p
.first
== CrushWrapper::DEFAULT_CHOOSE_ARGS
) {
2900 tbl
->define_column("(compat)", TextTable::LEFT
, TextTable::RIGHT
);
2903 auto q
= weight_set_names
.find(p
.first
);
2904 name
= q
!= weight_set_names
.end() ? q
->second
:
2906 tbl
->define_column(name
.c_str(), TextTable::LEFT
, TextTable::RIGHT
);
2909 tbl
->define_column("TYPE NAME", TextTable::LEFT
, TextTable::LEFT
);
2914 void dump_item(const CrushTreeDumper::Item
&qi
, TextTable
*tbl
) override
{
2915 const char *c
= crush
->get_item_class(qi
.id
);
2920 << weightf_t(qi
.weight
);
2921 for (auto& p
: crush
->choose_args
) {
2922 if (qi
.parent
< 0) {
2923 const crush_choose_arg_map cmap
= crush
->choose_args_get(p
.first
);
2924 int bidx
= -1 - qi
.parent
;
2925 const crush_bucket
*b
= crush
->get_bucket(qi
.parent
);
2927 bidx
< (int)cmap
.size
&&
2928 cmap
.args
[bidx
].weight_set
&&
2929 cmap
.args
[bidx
].weight_set_size
>= 1) {
2932 pos
< (int)cmap
.args
[bidx
].weight_set
[0].size
&&
2933 b
->items
[pos
] != qi
.id
;
2935 *tbl
<< weightf_t((float)cmap
.args
[bidx
].weight_set
[0].weights
[pos
] /
2943 for (int k
=0; k
< qi
.depth
; k
++) {
2946 if (qi
.is_bucket()) {
2947 ss
<< crush
->get_type_name(crush
->get_bucket_type(qi
.id
)) << " "
2948 << crush
->get_item_name(qi
.id
);
2950 ss
<< "osd." << qi
.id
;
2953 *tbl
<< TextTable::endrow
;
2958 class CrushTreeFormattingDumper
: public CrushTreeDumper::FormattingDumper
{
2960 typedef CrushTreeDumper::FormattingDumper Parent
;
2962 explicit CrushTreeFormattingDumper(
2963 const CrushWrapper
*crush
,
2964 const CrushTreeDumper::name_map_t
& wsnames
)
2965 : Parent(crush
, wsnames
) {}
2967 explicit CrushTreeFormattingDumper(
2968 const CrushWrapper
*crush
,
2969 const CrushTreeDumper::name_map_t
& wsnames
,
2971 : Parent(crush
, wsnames
, show_shadow
) {}
2973 void dump(Formatter
*f
) {
2974 f
->open_array_section("nodes");
2977 f
->open_array_section("stray");
2983 void CrushWrapper::dump_tree(
2986 const CrushTreeDumper::name_map_t
& weight_set_names
,
2987 bool show_shadow
) const
2991 CrushTreePlainDumper(this, weight_set_names
, show_shadow
).dump(&tbl
);
2995 CrushTreeFormattingDumper(this, weight_set_names
, show_shadow
).dump(f
);
2999 void CrushWrapper::generate_test_instances(list
<CrushWrapper
*>& o
)
3001 o
.push_back(new CrushWrapper
);
3006 * Determine the default CRUSH ruleset ID to be used with
3007 * newly created replicated pools.
3009 * @returns a ruleset ID (>=0) or -1 if no suitable ruleset found
3011 int CrushWrapper::get_osd_pool_default_crush_replicated_ruleset(CephContext
*cct
)
3013 int crush_ruleset
= cct
->_conf
->osd_pool_default_crush_rule
;
3014 if (crush_ruleset
< 0) {
3015 crush_ruleset
= find_first_ruleset(pg_pool_t::TYPE_REPLICATED
);
3016 } else if (!ruleset_exists(crush_ruleset
)) {
3017 crush_ruleset
= -1; // match find_first_ruleset() retval
3019 return crush_ruleset
;
3022 bool CrushWrapper::is_valid_crush_name(const string
& s
)
3026 for (string::const_iterator p
= s
.begin(); p
!= s
.end(); ++p
) {
3030 !(*p
>= '0' && *p
<= '9') &&
3031 !(*p
>= 'A' && *p
<= 'Z') &&
3032 !(*p
>= 'a' && *p
<= 'z'))
3038 bool CrushWrapper::is_valid_crush_loc(CephContext
*cct
,
3039 const map
<string
,string
>& loc
)
3041 for (map
<string
,string
>::const_iterator l
= loc
.begin(); l
!= loc
.end(); ++l
) {
3042 if (!is_valid_crush_name(l
->first
) ||
3043 !is_valid_crush_name(l
->second
)) {
3044 ldout(cct
, 1) << "loc["
3045 << l
->first
<< "] = '"
3046 << l
->second
<< "' not a valid crush name ([A-Za-z0-9_-.]+)"
3054 int CrushWrapper::_choose_type_stack(
3056 const vector
<pair
<int,int>>& stack
,
3057 const set
<int>& overfull
,
3058 const vector
<int>& underfull
,
3059 const vector
<int>& orig
,
3060 vector
<int>::const_iterator
& i
,
3062 vector
<int> *pw
) const
3064 vector
<int> w
= *pw
;
3067 ldout(cct
, 10) << __func__
<< " stack " << stack
3073 vector
<int> cumulative_fanout(stack
.size());
3075 for (int j
= (int)stack
.size() - 1; j
>= 0; --j
) {
3076 cumulative_fanout
[j
] = f
;
3077 f
*= stack
[j
].second
;
3079 ldout(cct
, 10) << __func__
<< " cumulative_fanout " << cumulative_fanout
3082 // identify underful targets for each intermediate level.
3083 // this serves two purposes:
3084 // 1. we can tell when we are selecting a bucket that does not have any underfull
3085 // devices beneath it. that means that if the current input includes an overfull
3086 // device, we won't be able to find an underfull device with this parent to
3088 // 2. when we decide we should reject a bucket due to the above, this list gives us
3089 // a list of peers to consider that *do* have underfull devices available.. (we
3090 // are careful to pick one that has the same parent.)
3091 vector
<set
<int>> underfull_buckets
; // level -> set of buckets with >0 underfull item(s)
3092 underfull_buckets
.resize(stack
.size() - 1);
3093 for (auto osd
: underfull
) {
3095 for (int j
= (int)stack
.size() - 2; j
>= 0; --j
) {
3096 int type
= stack
[j
].first
;
3097 item
= get_parent_of_type(item
, type
);
3098 ldout(cct
, 10) << __func__
<< " underfull " << osd
<< " type " << type
3099 << " is " << item
<< dendl
;
3100 underfull_buckets
[j
].insert(item
);
3103 ldout(cct
, 20) << __func__
<< " underfull_buckets " << underfull_buckets
<< dendl
;
3105 for (unsigned j
= 0; j
< stack
.size(); ++j
) {
3106 int type
= stack
[j
].first
;
3107 int fanout
= stack
[j
].second
;
3108 int cum_fanout
= cumulative_fanout
[j
];
3109 ldout(cct
, 10) << " level " << j
<< ": type " << type
<< " fanout " << fanout
3110 << " cumulative " << cum_fanout
3111 << " w " << w
<< dendl
;
3114 for (auto from
: w
) {
3115 ldout(cct
, 10) << " from " << from
<< dendl
;
3116 // identify leaves under each choice. we use this to check whether any of these
3117 // leaves are overfull. (if so, we need to make sure there are underfull candidates
3118 // to swap for them.)
3119 vector
<set
<int>> leaves
;
3120 leaves
.resize(fanout
);
3121 for (int pos
= 0; pos
< fanout
; ++pos
) {
3124 int item
= get_parent_of_type(*tmpi
, type
);
3127 while (n
-- && tmpi
!= orig
.end()) {
3128 leaves
[pos
].insert(*tmpi
++);
3130 ldout(cct
, 10) << __func__
<< " from " << *tmpi
<< " got " << item
3131 << " of type " << type
<< " over leaves " << leaves
[pos
] << dendl
;
3134 bool replaced
= false;
3135 if (overfull
.count(*i
)) {
3136 for (auto item
: underfull
) {
3137 ldout(cct
, 10) << __func__
<< " pos " << pos
3138 << " was " << *i
<< " considering " << item
3140 if (used
.count(item
)) {
3141 ldout(cct
, 20) << __func__
<< " in used " << used
<< dendl
;
3144 if (!subtree_contains(from
, item
)) {
3145 ldout(cct
, 20) << __func__
<< " not in subtree " << from
<< dendl
;
3148 if (std::find(orig
.begin(), orig
.end(), item
) != orig
.end()) {
3149 ldout(cct
, 20) << __func__
<< " in orig " << orig
<< dendl
;
3154 ldout(cct
, 10) << __func__
<< " pos " << pos
<< " replace "
3155 << *i
<< " -> " << item
<< dendl
;
3162 ldout(cct
, 10) << __func__
<< " pos " << pos
<< " keep " << *i
3167 if (i
== orig
.end()) {
3168 ldout(cct
, 10) << __func__
<< " end of orig, break 1" << dendl
;
3173 if (j
+ 1 < stack
.size()) {
3174 // check if any buckets have overfull leaves but no underfull candidates
3175 for (int pos
= 0; pos
< fanout
; ++pos
) {
3176 if (underfull_buckets
[j
].count(o
[pos
]) == 0) {
3177 // are any leaves overfull?
3178 bool any_overfull
= false;
3179 for (auto osd
: leaves
[pos
]) {
3180 if (overfull
.count(osd
)) {
3181 any_overfull
= true;
3185 ldout(cct
, 10) << " bucket " << o
[pos
] << " has no underfull targets and "
3186 << ">0 leaves " << leaves
[pos
] << " is overfull; alts "
3187 << underfull_buckets
[j
]
3189 for (auto alt
: underfull_buckets
[j
]) {
3190 if (std::find(o
.begin(), o
.end(), alt
) == o
.end()) {
3191 // see if alt has the same parent
3193 get_parent_of_type(o
[pos
], stack
[j
-1].first
) ==
3194 get_parent_of_type(alt
, stack
[j
-1].first
)) {
3196 ldout(cct
, 10) << " replacing " << o
[pos
]
3197 << " (which has no underfull leaves) with " << alt
3199 << get_parent_of_type(alt
, stack
[j
-1].first
) << " type "
3200 << type
<< ")" << dendl
;
3202 ldout(cct
, 10) << " replacing " << o
[pos
]
3203 << " (which has no underfull leaves) with " << alt
3204 << " (first level)" << dendl
;
3208 ldout(cct
, 30) << " alt " << alt
<< " for " << o
[pos
]
3209 << " has different parent, skipping" << dendl
;
3217 if (i
== orig
.end()) {
3218 ldout(cct
, 10) << __func__
<< " end of orig, break 2" << dendl
;
3222 ldout(cct
, 10) << __func__
<< " w <- " << o
<< " was " << w
<< dendl
;
3229 int CrushWrapper::try_remap_rule(
3233 const set
<int>& overfull
,
3234 const vector
<int>& underfull
,
3235 const vector
<int>& orig
,
3236 vector
<int> *out
) const
3238 const crush_map
*map
= crush
;
3239 const crush_rule
*rule
= get_rule(ruleno
);
3242 ldout(cct
, 10) << __func__
<< " ruleno " << ruleno
3243 << " numrep " << maxout
<< " overfull " << overfull
3244 << " underfull " << underfull
<< " orig " << orig
3246 vector
<int> w
; // working set
3249 auto i
= orig
.begin();
3252 vector
<pair
<int,int>> type_stack
; // (type, fan-out)
3254 for (unsigned step
= 0; step
< rule
->len
; ++step
) {
3255 const crush_rule_step
*curstep
= &rule
->steps
[step
];
3256 ldout(cct
, 10) << __func__
<< " step " << step
<< " w " << w
<< dendl
;
3257 switch (curstep
->op
) {
3258 case CRUSH_RULE_TAKE
:
3259 if ((curstep
->arg1
>= 0 && curstep
->arg1
< map
->max_devices
) ||
3260 (-1-curstep
->arg1
>= 0 && -1-curstep
->arg1
< map
->max_buckets
&&
3261 map
->buckets
[-1-curstep
->arg1
])) {
3263 w
.push_back(curstep
->arg1
);
3264 ldout(cct
, 10) << __func__
<< " take " << w
<< dendl
;
3266 ldout(cct
, 1) << " bad take value " << curstep
->arg1
<< dendl
;
3270 case CRUSH_RULE_CHOOSELEAF_FIRSTN
:
3271 case CRUSH_RULE_CHOOSELEAF_INDEP
:
3273 int numrep
= curstep
->arg1
;
3274 int type
= curstep
->arg2
;
3277 type_stack
.push_back(make_pair(type
, numrep
));
3278 type_stack
.push_back(make_pair(0, 1));
3279 int r
= _choose_type_stack(cct
, type_stack
, overfull
, underfull
, orig
,
3287 case CRUSH_RULE_CHOOSE_FIRSTN
:
3288 case CRUSH_RULE_CHOOSE_INDEP
:
3290 int numrep
= curstep
->arg1
;
3291 int type
= curstep
->arg2
;
3294 type_stack
.push_back(make_pair(type
, numrep
));
3298 case CRUSH_RULE_EMIT
:
3299 ldout(cct
, 10) << " emit " << w
<< dendl
;
3300 if (!type_stack
.empty()) {
3301 int r
= _choose_type_stack(cct
, type_stack
, overfull
, underfull
, orig
,
3307 for (auto item
: w
) {
3308 out
->push_back(item
);
3323 int CrushWrapper::_choose_args_adjust_item_weight_in_bucket(
3325 crush_choose_arg_map cmap
,
3328 const vector
<int>& weight
,
3332 int bidx
= -1 - bucketid
;
3333 crush_bucket
*b
= crush
->buckets
[bidx
];
3334 if (bidx
>= (int)cmap
.size
) {
3336 *ss
<< "no weight-set for bucket " << b
->id
;
3337 ldout(cct
, 10) << __func__
<< " no crush_choose_arg for bucket " << b
->id
3341 crush_choose_arg
*carg
= &cmap
.args
[bidx
];
3342 if (carg
->weight_set
== NULL
) {
3344 *ss
<< "no weight-set for bucket " << b
->id
;
3345 ldout(cct
, 10) << __func__
<< " no weight_set for bucket " << b
->id
3349 if (carg
->weight_set_size
!= weight
.size()) {
3351 *ss
<< "weight_set_size != " << weight
.size() << " for bucket " << b
->id
;
3352 ldout(cct
, 10) << __func__
<< " weight_set_size != " << weight
.size()
3353 << " for bucket " << b
->id
<< dendl
;
3356 for (unsigned i
= 0; i
< b
->size
; i
++) {
3357 if (b
->items
[i
] == id
) {
3358 for (unsigned j
= 0; j
< weight
.size(); ++j
) {
3359 carg
->weight_set
[j
].weights
[i
] = weight
[j
];
3361 ldout(cct
, 5) << __func__
<< " set " << id
<< " to " << weight
3362 << " in bucket " << b
->id
<< dendl
;
3367 vector
<int> bucket_weight(weight
.size(), 0);
3368 for (unsigned i
= 0; i
< b
->size
; i
++) {
3369 for (unsigned j
= 0; j
< weight
.size(); ++j
) {
3370 bucket_weight
[j
] += carg
->weight_set
[j
].weights
[i
];
3373 choose_args_adjust_item_weight(cct
, cmap
, b
->id
, bucket_weight
, nullptr);
3378 int CrushWrapper::choose_args_adjust_item_weight(
3380 crush_choose_arg_map cmap
,
3382 const vector
<int>& weight
,
3385 ldout(cct
, 5) << __func__
<< " " << id
<< " weight " << weight
<< dendl
;
3387 for (int bidx
= 0; bidx
< crush
->max_buckets
; bidx
++) {
3388 crush_bucket
*b
= crush
->buckets
[bidx
];
3392 changed
+= _choose_args_adjust_item_weight_in_bucket(
3393 cct
, cmap
, b
->id
, id
, weight
, ss
);
3397 *ss
<< "item " << id
<< " not found in crush map";