]> git.proxmox.com Git - ceph.git/blame - ceph/src/crush/CrushWrapper.cc
bump version to 12.2.1-pve3
[ceph.git] / ceph / src / crush / CrushWrapper.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3
4#include "osd/osd_types.h"
5#include "common/debug.h"
6#include "common/Formatter.h"
7#include "common/errno.h"
c07f9fc5 8#include "common/TextTable.h"
7c673cae
FG
9#include "include/stringify.h"
10
11#include "CrushWrapper.h"
12#include "CrushTreeDumper.h"
13
14#define dout_subsys ceph_subsys_crush
15
31f18b77
FG
16bool CrushWrapper::has_legacy_rulesets() const
17{
18 for (unsigned i=0; i<crush->max_rules; i++) {
19 crush_rule *r = crush->rules[i];
20 if (r &&
21 r->mask.ruleset != i) {
22 return true;
23 }
24 }
25 return false;
26}
27
28int CrushWrapper::renumber_rules_by_ruleset()
29{
30 int max_ruleset = 0;
31 for (unsigned i=0; i<crush->max_rules; i++) {
32 crush_rule *r = crush->rules[i];
33 if (r && r->mask.ruleset >= max_ruleset) {
34 max_ruleset = r->mask.ruleset + 1;
35 }
36 }
37 struct crush_rule **newrules =
38 (crush_rule**)calloc(1, max_ruleset * sizeof(crush_rule*));
39 for (unsigned i=0; i<crush->max_rules; i++) {
40 crush_rule *r = crush->rules[i];
41 if (!r)
42 continue;
43 if (newrules[r->mask.ruleset]) {
44 // collision, we can't do it.
45 free(newrules);
46 return -EINVAL;
47 }
48 newrules[r->mask.ruleset] = r;
49 }
50
51 // success, swap!
52 free(crush->rules);
53 crush->rules = newrules;
54 crush->max_rules = max_ruleset;
55 return 0;
56}
57
58bool CrushWrapper::has_multirule_rulesets() const
59{
60 for (unsigned i=0; i<crush->max_rules; i++) {
61 crush_rule *r = crush->rules[i];
62 if (!r)
63 continue;
64 for (unsigned j=i+1; j<crush->max_rules; j++) {
65 crush_rule *s = crush->rules[j];
66 if (!s)
67 continue;
68 if (r->mask.ruleset == s->mask.ruleset)
69 return true;
70 }
71 }
72 return false;
73}
74
c07f9fc5
FG
75bool CrushWrapper::has_non_straw2_buckets() const
76{
77 for (int i=0; i<crush->max_buckets; ++i) {
78 crush_bucket *b = crush->buckets[i];
79 if (!b)
80 continue;
81 if (b->alg != CRUSH_BUCKET_STRAW2)
82 return true;
83 }
84 return false;
85}
86
7c673cae
FG
87bool CrushWrapper::has_v2_rules() const
88{
89 for (unsigned i=0; i<crush->max_rules; i++) {
90 if (is_v2_rule(i)) {
91 return true;
92 }
93 }
94 return false;
95}
96
97bool CrushWrapper::is_v2_rule(unsigned ruleid) const
98{
99 // check rule for use of indep or new SET_* rule steps
100 if (ruleid >= crush->max_rules)
101 return false;
102 crush_rule *r = crush->rules[ruleid];
103 if (!r)
104 return false;
105 for (unsigned j=0; j<r->len; j++) {
106 if (r->steps[j].op == CRUSH_RULE_CHOOSE_INDEP ||
107 r->steps[j].op == CRUSH_RULE_CHOOSELEAF_INDEP ||
108 r->steps[j].op == CRUSH_RULE_SET_CHOOSE_TRIES ||
109 r->steps[j].op == CRUSH_RULE_SET_CHOOSELEAF_TRIES) {
110 return true;
111 }
112 }
113 return false;
114}
115
116bool CrushWrapper::has_v3_rules() const
117{
118 for (unsigned i=0; i<crush->max_rules; i++) {
119 if (is_v3_rule(i)) {
120 return true;
121 }
122 }
123 return false;
124}
125
126bool CrushWrapper::is_v3_rule(unsigned ruleid) const
127{
128 // check rule for use of SET_CHOOSELEAF_VARY_R step
129 if (ruleid >= crush->max_rules)
130 return false;
131 crush_rule *r = crush->rules[ruleid];
132 if (!r)
133 return false;
134 for (unsigned j=0; j<r->len; j++) {
135 if (r->steps[j].op == CRUSH_RULE_SET_CHOOSELEAF_VARY_R) {
136 return true;
137 }
138 }
139 return false;
140}
141
142bool CrushWrapper::has_v4_buckets() const
143{
144 for (int i=0; i<crush->max_buckets; ++i) {
145 crush_bucket *b = crush->buckets[i];
146 if (!b)
147 continue;
148 if (b->alg == CRUSH_BUCKET_STRAW2)
149 return true;
150 }
151 return false;
152}
153
154bool CrushWrapper::has_v5_rules() const
155{
156 for (unsigned i=0; i<crush->max_rules; i++) {
157 if (is_v5_rule(i)) {
158 return true;
159 }
160 }
161 return false;
162}
163
164bool CrushWrapper::is_v5_rule(unsigned ruleid) const
165{
166 // check rule for use of SET_CHOOSELEAF_STABLE step
167 if (ruleid >= crush->max_rules)
168 return false;
169 crush_rule *r = crush->rules[ruleid];
170 if (!r)
171 return false;
172 for (unsigned j=0; j<r->len; j++) {
173 if (r->steps[j].op == CRUSH_RULE_SET_CHOOSELEAF_STABLE) {
174 return true;
175 }
176 }
177 return false;
178}
179
31f18b77 180bool CrushWrapper::has_choose_args() const
7c673cae
FG
181{
182 return !choose_args.empty();
183}
184
31f18b77 185bool CrushWrapper::has_incompat_choose_args() const
7c673cae 186{
31f18b77
FG
187 if (choose_args.empty())
188 return false;
189 if (choose_args.size() > 1)
190 return true;
c07f9fc5
FG
191 if (choose_args.begin()->first != DEFAULT_CHOOSE_ARGS)
192 return true;
31f18b77
FG
193 crush_choose_arg_map arg_map = choose_args.begin()->second;
194 for (__u32 i = 0; i < arg_map.size; i++) {
195 crush_choose_arg *arg = &arg_map.args[i];
196 if (arg->weight_set_size == 0 &&
197 arg->ids_size == 0)
198 continue;
199 if (arg->weight_set_size != 1)
200 return true;
201 if (arg->ids_size != 0)
202 return true;
203 }
204 return false;
7c673cae
FG
205}
206
207int CrushWrapper::split_id_class(int i, int *idout, int *classout) const
208{
209 if (!item_exists(i))
210 return -EINVAL;
211 string name = get_item_name(i);
212 size_t pos = name.find("~");
213 if (pos == string::npos) {
214 *idout = i;
215 *classout = -1;
216 return 0;
217 }
218 string name_no_class = name.substr(0, pos);
219 if (!name_exists(name_no_class))
220 return -ENOENT;
221 string class_name = name.substr(pos + 1);
222 if (!class_exists(class_name))
223 return -ENOENT;
224 *idout = get_item_id(name_no_class);
225 *classout = get_class_id(class_name);
226 return 0;
227}
228
229int CrushWrapper::can_rename_item(const string& srcname,
230 const string& dstname,
231 ostream *ss) const
232{
233 if (name_exists(srcname)) {
234 if (name_exists(dstname)) {
235 *ss << "dstname = '" << dstname << "' already exists";
236 return -EEXIST;
237 }
238 if (is_valid_crush_name(dstname)) {
239 return 0;
240 } else {
241 *ss << "dstname = '" << dstname << "' does not match [-_.0-9a-zA-Z]+";
242 return -EINVAL;
243 }
244 } else {
245 if (name_exists(dstname)) {
246 *ss << "srcname = '" << srcname << "' does not exist "
247 << "and dstname = '" << dstname << "' already exists";
248 return -EALREADY;
249 } else {
250 *ss << "srcname = '" << srcname << "' does not exist";
251 return -ENOENT;
252 }
253 }
254}
255
256int CrushWrapper::rename_item(const string& srcname,
257 const string& dstname,
258 ostream *ss)
259{
260 int ret = can_rename_item(srcname, dstname, ss);
261 if (ret < 0)
262 return ret;
263 int oldid = get_item_id(srcname);
264 return set_item_name(oldid, dstname);
265}
266
267int CrushWrapper::can_rename_bucket(const string& srcname,
268 const string& dstname,
269 ostream *ss) const
270{
271 int ret = can_rename_item(srcname, dstname, ss);
272 if (ret)
273 return ret;
274 int srcid = get_item_id(srcname);
275 if (srcid >= 0) {
276 *ss << "srcname = '" << srcname << "' is not a bucket "
277 << "because its id = " << srcid << " is >= 0";
278 return -ENOTDIR;
279 }
280 return 0;
281}
282
283int CrushWrapper::rename_bucket(const string& srcname,
284 const string& dstname,
285 ostream *ss)
286{
287 int ret = can_rename_bucket(srcname, dstname, ss);
288 if (ret < 0)
289 return ret;
290 int oldid = get_item_id(srcname);
291 return set_item_name(oldid, dstname);
292}
293
b5b8bbf5
FG
294int CrushWrapper::rename_rule(const string& srcname,
295 const string& dstname,
296 ostream *ss)
297{
298 if (!rule_exists(srcname)) {
299 if (ss) {
300 *ss << "source rule name '" << srcname << "' does not exist";
301 }
302 return -ENOENT;
303 }
304 if (rule_exists(dstname)) {
305 if (ss) {
306 *ss << "destination rule name '" << dstname << "' already exists";
307 }
308 return -EEXIST;
309 }
310 int rule_id = get_rule_id(srcname);
311 auto it = rule_name_map.find(rule_id);
312 assert(it != rule_name_map.end());
313 it->second = dstname;
314 if (have_rmaps) {
315 rule_name_rmap.erase(srcname);
316 rule_name_rmap[dstname] = rule_id;
317 }
318 return 0;
319}
320
7c673cae
FG
321void CrushWrapper::find_takes(set<int>& roots) const
322{
323 for (unsigned i=0; i<crush->max_rules; i++) {
324 crush_rule *r = crush->rules[i];
325 if (!r)
326 continue;
327 for (unsigned j=0; j<r->len; j++) {
328 if (r->steps[j].op == CRUSH_RULE_TAKE)
329 roots.insert(r->steps[j].arg1);
330 }
331 }
332}
333
334void CrushWrapper::find_roots(set<int>& roots) const
335{
336 for (int i = 0; i < crush->max_buckets; i++) {
337 if (!crush->buckets[i])
338 continue;
339 crush_bucket *b = crush->buckets[i];
340 if (!_search_item_exists(b->id))
341 roots.insert(b->id);
342 }
343}
344
345bool CrushWrapper::subtree_contains(int root, int item) const
346{
347 if (root == item)
348 return true;
349
350 if (root >= 0)
351 return false; // root is a leaf
352
353 const crush_bucket *b = get_bucket(root);
354 if (IS_ERR(b))
355 return false;
356
357 for (unsigned j=0; j<b->size; j++) {
358 if (subtree_contains(b->items[j], item))
359 return true;
360 }
361 return false;
362}
363
364bool CrushWrapper::_maybe_remove_last_instance(CephContext *cct, int item, bool unlink_only)
365{
366 // last instance?
367 if (_search_item_exists(item)) {
368 return false;
369 }
370 if (item < 0 && _bucket_is_in_use(item)) {
371 return false;
372 }
373
374 if (item < 0 && !unlink_only) {
375 crush_bucket *t = get_bucket(item);
376 ldout(cct, 5) << "_maybe_remove_last_instance removing bucket " << item << dendl;
377 crush_remove_bucket(crush, t);
378 if (class_bucket.count(item) != 0)
379 class_bucket.erase(item);
224ce89b 380 class_remove_item(item);
7c673cae
FG
381 }
382 if ((item >= 0 || !unlink_only) && name_map.count(item)) {
383 ldout(cct, 5) << "_maybe_remove_last_instance removing name for item " << item << dendl;
384 name_map.erase(item);
385 have_rmaps = false;
c07f9fc5
FG
386 if (item >= 0 && !unlink_only) {
387 class_remove_item(item);
388 }
7c673cae 389 }
d2e6a577 390 rebuild_roots_with_classes();
7c673cae
FG
391 return true;
392}
393
35e4c445 394int CrushWrapper::remove_root(int item)
7c673cae 395{
7c673cae 396 crush_bucket *b = get_bucket(item);
d2e6a577
FG
397 if (IS_ERR(b)) {
398 // should be idempotent
399 // e.g.: we use 'crush link' to link same host into
400 // different roots, which as a result can cause different
401 // shadow trees reference same hosts too. This means
402 // we may need to destory the same buckets(hosts, racks, etc.)
403 // multiple times during rebuilding all shadow trees.
404 return 0;
405 }
7c673cae
FG
406
407 for (unsigned n = 0; n < b->size; n++) {
408 if (b->items[n] >= 0)
409 continue;
35e4c445 410 int r = remove_root(b->items[n]);
7c673cae
FG
411 if (r < 0)
412 return r;
413 }
414
415 crush_remove_bucket(crush, b);
416 if (name_map.count(item) != 0) {
417 name_map.erase(item);
418 have_rmaps = false;
419 }
420 if (class_bucket.count(item) != 0)
421 class_bucket.erase(item);
c07f9fc5 422 class_remove_item(item);
7c673cae
FG
423 return 0;
424}
425
426int CrushWrapper::remove_item(CephContext *cct, int item, bool unlink_only)
427{
c07f9fc5
FG
428 ldout(cct, 5) << "remove_item " << item
429 << (unlink_only ? " unlink_only":"") << dendl;
7c673cae
FG
430
431 int ret = -ENOENT;
432
433 if (item < 0 && !unlink_only) {
434 crush_bucket *t = get_bucket(item);
435 if (IS_ERR(t)) {
c07f9fc5
FG
436 ldout(cct, 1) << "remove_item bucket " << item << " does not exist"
437 << dendl;
7c673cae
FG
438 return -ENOENT;
439 }
440
441 if (t->size) {
442 ldout(cct, 1) << "remove_item bucket " << item << " has " << t->size
443 << " items, not empty" << dendl;
444 return -ENOTEMPTY;
445 }
446 if (_bucket_is_in_use(item)) {
447 return -EBUSY;
448 }
449 }
450
451 for (int i = 0; i < crush->max_buckets; i++) {
452 if (!crush->buckets[i])
453 continue;
454 crush_bucket *b = crush->buckets[i];
455
456 for (unsigned i=0; i<b->size; ++i) {
457 int id = b->items[i];
458 if (id == item) {
459 ldout(cct, 5) << "remove_item removing item " << item
460 << " from bucket " << b->id << dendl;
c07f9fc5
FG
461 for (auto& p : choose_args) {
462 // weight down each weight-set to 0 before we remove the item
463 vector<int> weightv(get_choose_args_positions(p.second), 0);
464 choose_args_adjust_item_weight(cct, p.second, item, weightv, nullptr);
465 }
31f18b77 466 bucket_remove_item(b, item);
7c673cae
FG
467 adjust_item_weight(cct, b->id, b->weight);
468 ret = 0;
469 }
470 }
471 }
472
473 if (_maybe_remove_last_instance(cct, item, unlink_only))
474 ret = 0;
475
476 return ret;
477}
478
479bool CrushWrapper::_search_item_exists(int item) const
480{
481 for (int i = 0; i < crush->max_buckets; i++) {
482 if (!crush->buckets[i])
483 continue;
484 crush_bucket *b = crush->buckets[i];
485 for (unsigned j=0; j<b->size; ++j) {
486 if (b->items[j] == item)
487 return true;
488 }
489 }
490 return false;
491}
492
493bool CrushWrapper::_bucket_is_in_use(int item)
494{
495 for (auto &i : class_bucket)
496 for (auto &j : i.second)
497 if (j.second == item)
498 return true;
499 for (unsigned i = 0; i < crush->max_rules; ++i) {
500 crush_rule *r = crush->rules[i];
501 if (!r)
502 continue;
503 for (unsigned j = 0; j < r->len; ++j) {
504 if (r->steps[j].op == CRUSH_RULE_TAKE) {
505 int step_item = r->steps[j].arg1;
506 int original_item;
507 int c;
508 int res = split_id_class(step_item, &original_item, &c);
509 if (res < 0)
510 return false;
511 if (step_item == item || original_item == item)
512 return true;
513 }
514 }
515 }
516 return false;
517}
518
c07f9fc5
FG
519int CrushWrapper::_remove_item_under(
520 CephContext *cct, int item, int ancestor, bool unlink_only)
7c673cae
FG
521{
522 ldout(cct, 5) << "_remove_item_under " << item << " under " << ancestor
523 << (unlink_only ? " unlink_only":"") << dendl;
524
525 if (ancestor >= 0) {
526 return -EINVAL;
527 }
528
529 if (!bucket_exists(ancestor))
530 return -EINVAL;
531
532 int ret = -ENOENT;
533
534 crush_bucket *b = get_bucket(ancestor);
535 for (unsigned i=0; i<b->size; ++i) {
536 int id = b->items[i];
537 if (id == item) {
c07f9fc5
FG
538 ldout(cct, 5) << "_remove_item_under removing item " << item
539 << " from bucket " << b->id << dendl;
c07f9fc5
FG
540 for (auto& p : choose_args) {
541 // weight down each weight-set to 0 before we remove the item
542 vector<int> weightv(get_choose_args_positions(p.second), 0);
543 _choose_args_adjust_item_weight_in_bucket(
544 cct, p.second, b->id, item, weightv, nullptr);
545 }
35e4c445 546 bucket_remove_item(b, item);
7c673cae
FG
547 adjust_item_weight(cct, b->id, b->weight);
548 ret = 0;
549 } else if (id < 0) {
550 int r = remove_item_under(cct, item, id, unlink_only);
551 if (r == 0)
552 ret = 0;
553 }
554 }
555 return ret;
556}
557
c07f9fc5
FG
558int CrushWrapper::remove_item_under(
559 CephContext *cct, int item, int ancestor, bool unlink_only)
7c673cae
FG
560{
561 ldout(cct, 5) << "remove_item_under " << item << " under " << ancestor
562 << (unlink_only ? " unlink_only":"") << dendl;
563
564 if (!unlink_only && _bucket_is_in_use(item)) {
565 return -EBUSY;
566 }
567
568 int ret = _remove_item_under(cct, item, ancestor, unlink_only);
569 if (ret < 0)
570 return ret;
571
572 if (item < 0 && !unlink_only) {
573 crush_bucket *t = get_bucket(item);
574 if (IS_ERR(t)) {
575 ldout(cct, 1) << "remove_item_under bucket " << item
576 << " does not exist" << dendl;
577 return -ENOENT;
578 }
579
580 if (t->size) {
581 ldout(cct, 1) << "remove_item_under bucket " << item << " has " << t->size
582 << " items, not empty" << dendl;
583 return -ENOTEMPTY;
584 }
585 }
586
587 if (_maybe_remove_last_instance(cct, item, unlink_only))
588 ret = 0;
589
590 return ret;
591}
592
593int CrushWrapper::get_common_ancestor_distance(CephContext *cct, int id,
594 const std::multimap<string,string>& loc)
595{
596 ldout(cct, 5) << __func__ << " " << id << " " << loc << dendl;
597 if (!item_exists(id))
598 return -ENOENT;
599 map<string,string> id_loc = get_full_location(id);
600 ldout(cct, 20) << " id is at " << id_loc << dendl;
601
602 for (map<int,string>::const_iterator p = type_map.begin();
603 p != type_map.end();
604 ++p) {
605 map<string,string>::iterator ip = id_loc.find(p->second);
606 if (ip == id_loc.end())
607 continue;
608 for (std::multimap<string,string>::const_iterator q = loc.find(p->second);
609 q != loc.end();
610 ++q) {
611 if (q->first != p->second)
612 break;
613 if (q->second == ip->second)
614 return p->first;
615 }
616 }
617 return -ERANGE;
618}
619
620int CrushWrapper::parse_loc_map(const std::vector<string>& args,
621 std::map<string,string> *ploc)
622{
623 ploc->clear();
624 for (unsigned i = 0; i < args.size(); ++i) {
625 const char *s = args[i].c_str();
626 const char *pos = strchr(s, '=');
627 if (!pos)
628 return -EINVAL;
629 string key(s, 0, pos-s);
630 string value(pos+1);
631 if (value.length())
632 (*ploc)[key] = value;
633 else
634 return -EINVAL;
635 }
636 return 0;
637}
638
639int CrushWrapper::parse_loc_multimap(const std::vector<string>& args,
640 std::multimap<string,string> *ploc)
641{
642 ploc->clear();
643 for (unsigned i = 0; i < args.size(); ++i) {
644 const char *s = args[i].c_str();
645 const char *pos = strchr(s, '=');
646 if (!pos)
647 return -EINVAL;
648 string key(s, 0, pos-s);
649 string value(pos+1);
650 if (value.length())
651 ploc->insert(make_pair(key, value));
652 else
653 return -EINVAL;
654 }
655 return 0;
656}
657
658bool CrushWrapper::check_item_loc(CephContext *cct, int item, const map<string,string>& loc,
659 int *weight)
660{
661 ldout(cct, 5) << "check_item_loc item " << item << " loc " << loc << dendl;
662
663 for (map<int,string>::const_iterator p = type_map.begin(); p != type_map.end(); ++p) {
664 // ignore device
665 if (p->first == 0)
666 continue;
667
668 // ignore types that aren't specified in loc
669 map<string,string>::const_iterator q = loc.find(p->second);
670 if (q == loc.end()) {
671 ldout(cct, 2) << "warning: did not specify location for '" << p->second << "' level (levels are "
672 << type_map << ")" << dendl;
673 continue;
674 }
675
676 if (!name_exists(q->second)) {
677 ldout(cct, 5) << "check_item_loc bucket " << q->second << " dne" << dendl;
678 return false;
679 }
680
681 int id = get_item_id(q->second);
682 if (id >= 0) {
683 ldout(cct, 5) << "check_item_loc requested " << q->second << " for type " << p->second
684 << " is a device, not bucket" << dendl;
685 return false;
686 }
687
688 assert(bucket_exists(id));
689 crush_bucket *b = get_bucket(id);
690
691 // see if item exists in this bucket
692 for (unsigned j=0; j<b->size; j++) {
693 if (b->items[j] == item) {
694 ldout(cct, 2) << "check_item_loc " << item << " exists in bucket " << b->id << dendl;
695 if (weight)
696 *weight = crush_get_bucket_item_weight(b, j);
697 return true;
698 }
699 }
700 return false;
701 }
702
703 ldout(cct, 1) << "check_item_loc item " << item << " loc " << loc << dendl;
704 return false;
705}
706
707map<string, string> CrushWrapper::get_full_location(int id)
708{
709 vector<pair<string, string> > full_location_ordered;
710 map<string,string> full_location;
711
712 get_full_location_ordered(id, full_location_ordered);
713
714 std::copy(full_location_ordered.begin(),
715 full_location_ordered.end(),
716 std::inserter(full_location, full_location.begin()));
717
718 return full_location;
719}
720
721int CrushWrapper::get_full_location_ordered(int id, vector<pair<string, string> >& path)
722{
723 if (!item_exists(id))
724 return -ENOENT;
725 int cur = id;
726 int ret;
727 while (true) {
728 pair<string, string> parent_coord = get_immediate_parent(cur, &ret);
729 if (ret != 0)
730 break;
731 path.push_back(parent_coord);
732 cur = get_item_id(parent_coord.second);
733 }
734 return 0;
735}
736
31f18b77
FG
737string CrushWrapper::get_full_location_ordered_string(int id)
738{
739 vector<pair<string, string> > full_location_ordered;
740 string full_location;
741 get_full_location_ordered(id, full_location_ordered);
742 reverse(begin(full_location_ordered), end(full_location_ordered));
743 for(auto i = full_location_ordered.begin(); i != full_location_ordered.end(); i++) {
744 full_location = full_location + i->first + "=" + i->second;
745 if (i != full_location_ordered.end() - 1) {
746 full_location = full_location + ",";
747 }
748 }
749 return full_location;
750}
7c673cae
FG
751
752map<int, string> CrushWrapper::get_parent_hierarchy(int id)
753{
754 map<int,string> parent_hierarchy;
755 pair<string, string> parent_coord = get_immediate_parent(id);
756 int parent_id;
757
758 // get the integer type for id and create a counter from there
759 int type_counter = get_bucket_type(id);
760
761 // if we get a negative type then we can assume that we have an OSD
762 // change behavior in get_item_type FIXME
763 if (type_counter < 0)
764 type_counter = 0;
765
766 // read the type map and get the name of the type with the largest ID
767 int high_type = 0;
768 for (map<int, string>::iterator it = type_map.begin(); it != type_map.end(); ++it){
769 if ( (*it).first > high_type )
770 high_type = (*it).first;
771 }
772
773 parent_id = get_item_id(parent_coord.second);
774
775 while (type_counter < high_type) {
776 type_counter++;
777 parent_hierarchy[ type_counter ] = parent_coord.first;
778
779 if (type_counter < high_type){
780 // get the coordinate information for the next parent
781 parent_coord = get_immediate_parent(parent_id);
782 parent_id = get_item_id(parent_coord.second);
783 }
784 }
785
786 return parent_hierarchy;
787}
788
789int CrushWrapper::get_children(int id, list<int> *children)
790{
791 // leaf?
792 if (id >= 0) {
793 return 0;
794 }
795
796 crush_bucket *b = get_bucket(id);
797 if (IS_ERR(b)) {
798 return -ENOENT;
799 }
800
801 for (unsigned n=0; n<b->size; n++) {
802 children->push_back(b->items[n]);
803 }
804 return b->size;
805}
806
31f18b77
FG
807int CrushWrapper::_get_leaves(int id, list<int> *leaves)
808{
809 assert(leaves);
7c673cae 810
31f18b77
FG
811 // Already leaf?
812 if (id >= 0) {
813 leaves->push_back(id);
814 return 0;
815 }
816
817 crush_bucket *b = get_bucket(id);
818 if (IS_ERR(b)) {
819 return -ENOENT;
820 }
821
822 for (unsigned n = 0; n < b->size; n++) {
823 if (b->items[n] >= 0) {
824 leaves->push_back(b->items[n]);
825 } else {
826 // is a bucket, do recursive call
827 int r = _get_leaves(b->items[n], leaves);
828 if (r < 0) {
829 return r;
830 }
831 }
832 }
833
834 return 0; // all is well
835}
836
837int CrushWrapper::get_leaves(const string &name, set<int> *leaves)
7c673cae 838{
31f18b77
FG
839 assert(leaves);
840 leaves->clear();
841
842 if (!name_exists(name)) {
843 return -ENOENT;
844 }
845
846 int id = get_item_id(name);
847 if (id >= 0) {
848 // already leaf
849 leaves->insert(id);
850 return 0;
7c673cae
FG
851 }
852
31f18b77
FG
853 list<int> unordered;
854 int r = _get_leaves(id, &unordered);
855 if (r < 0) {
856 return r;
857 }
858
859 for (auto &p : unordered) {
860 leaves->insert(p);
861 }
862
863 return 0;
864}
865
c07f9fc5
FG
866int CrushWrapper::insert_item(
867 CephContext *cct, int item, float weight, string name,
868 const map<string,string>& loc) // typename -> bucketname
31f18b77 869{
7c673cae
FG
870 ldout(cct, 5) << "insert_item item " << item << " weight " << weight
871 << " name " << name << " loc " << loc << dendl;
872
873 if (!is_valid_crush_name(name))
874 return -EINVAL;
875
876 if (!is_valid_crush_loc(cct, loc))
877 return -EINVAL;
878
224ce89b
WB
879 int r = validate_weightf(weight);
880 if (r < 0) {
881 return r;
882 }
883
7c673cae
FG
884 if (name_exists(name)) {
885 if (get_item_id(name) != item) {
886 ldout(cct, 10) << "device name '" << name << "' already exists as id "
887 << get_item_id(name) << dendl;
888 return -EEXIST;
889 }
890 } else {
891 set_item_name(item, name);
892 }
893
894 int cur = item;
895
c07f9fc5
FG
896 // create locations if locations don't exist and add child in
897 // location with 0 weight the more detail in the insert_item method
898 // declaration in CrushWrapper.h
899 for (auto p = type_map.begin(); p != type_map.end(); ++p) {
7c673cae
FG
900 // ignore device type
901 if (p->first == 0)
902 continue;
903
904 // skip types that are unspecified
905 map<string,string>::const_iterator q = loc.find(p->second);
906 if (q == loc.end()) {
c07f9fc5
FG
907 ldout(cct, 2) << "warning: did not specify location for '"
908 << p->second << "' level (levels are "
7c673cae
FG
909 << type_map << ")" << dendl;
910 continue;
911 }
912
913 if (!name_exists(q->second)) {
914 ldout(cct, 5) << "insert_item creating bucket " << q->second << dendl;
915 int empty = 0, newid;
916 int r = add_bucket(0, 0,
917 CRUSH_HASH_DEFAULT, p->first, 1, &cur, &empty, &newid);
918 if (r < 0) {
c07f9fc5
FG
919 ldout(cct, 1) << "add_bucket failure error: " << cpp_strerror(r)
920 << dendl;
7c673cae
FG
921 return r;
922 }
923 set_item_name(newid, q->second);
924
925 cur = newid;
926 continue;
927 }
928
929 // add to an existing bucket
930 int id = get_item_id(q->second);
931 if (!bucket_exists(id)) {
932 ldout(cct, 1) << "insert_item doesn't have bucket " << id << dendl;
933 return -EINVAL;
934 }
935
936 // check that we aren't creating a cycle.
937 if (subtree_contains(id, cur)) {
c07f9fc5
FG
938 ldout(cct, 1) << "insert_item item " << cur << " already exists beneath "
939 << id << dendl;
7c673cae
FG
940 return -EINVAL;
941 }
942
943 // we have done sanity check above
944 crush_bucket *b = get_bucket(id);
945
946 if (p->first != b->type) {
947 ldout(cct, 1) << "insert_item existing bucket has type "
948 << "'" << type_map[b->type] << "' != "
949 << "'" << type_map[p->first] << "'" << dendl;
950 return -EINVAL;
951 }
952
953 // are we forming a loop?
954 if (subtree_contains(cur, b->id)) {
955 ldout(cct, 1) << "insert_item " << cur << " already contains " << b->id
956 << "; cannot form loop" << dendl;
957 return -ELOOP;
958 }
959
960 ldout(cct, 5) << "insert_item adding " << cur << " weight " << weight
961 << " to bucket " << id << dendl;
31f18b77 962 int r = bucket_add_item(b, cur, 0);
7c673cae
FG
963 assert (!r);
964 break;
965 }
966
967 // adjust the item's weight in location
c07f9fc5 968 if (adjust_item_weightf_in_loc(cct, item, weight, loc) > 0) {
7c673cae
FG
969 if (item >= crush->max_devices) {
970 crush->max_devices = item + 1;
c07f9fc5
FG
971 ldout(cct, 5) << "insert_item max_devices now " << crush->max_devices
972 << dendl;
973 }
974 r = rebuild_roots_with_classes();
975 if (r < 0) {
976 ldout(cct, 0) << __func__ << " unable to rebuild roots with classes: "
977 << cpp_strerror(r) << dendl;
978 return r;
7c673cae
FG
979 }
980 return 0;
981 }
982
c07f9fc5
FG
983 ldout(cct, 1) << "error: didn't find anywhere to add item " << item
984 << " in " << loc << dendl;
7c673cae
FG
985 return -EINVAL;
986}
987
c07f9fc5
FG
988
989int CrushWrapper::move_bucket(
990 CephContext *cct, int id, const map<string,string>& loc)
7c673cae 991{
7c673cae
FG
992 // sorry this only works for buckets
993 if (id >= 0)
994 return -EINVAL;
995
996 if (!item_exists(id))
997 return -ENOENT;
998
999 // get the name of the bucket we are trying to move for later
1000 string id_name = get_item_name(id);
1001
1002 // detach the bucket
1003 int bucket_weight = detach_bucket(cct, id);
1004
1005 // insert the bucket back into the hierarchy
1006 return insert_item(cct, id, bucket_weight / (float)0x10000, id_name, loc);
1007}
1008
c07f9fc5
FG
1009int CrushWrapper::detach_bucket(CephContext *cct, int item)
1010{
1011 if (!crush)
1012 return (-EINVAL);
1013
1014 if (item >= 0)
1015 return (-EINVAL);
1016
1017 // check that the bucket that we want to detach exists
1018 assert(bucket_exists(item));
1019
1020 // get the bucket's weight
1021 crush_bucket *b = get_bucket(item);
1022 unsigned bucket_weight = b->weight;
1023
1024 // get where the bucket is located
1025 pair<string, string> bucket_location = get_immediate_parent(item);
1026
1027 // get the id of the parent bucket
1028 int parent_id = get_item_id(bucket_location.second);
1029
1030 // get the parent bucket
1031 crush_bucket *parent_bucket = get_bucket(parent_id);
1032
1033 if (!IS_ERR(parent_bucket)) {
1034 // zero out the bucket weight
1035 bucket_adjust_item_weight(cct, parent_bucket, item, 0);
1036 adjust_item_weight(cct, parent_bucket->id, parent_bucket->weight);
1037 for (auto& p : choose_args) {
1038 // weight down each weight-set to 0 before we remove the item
1039 vector<int> weightv(get_choose_args_positions(p.second), 0);
1040 choose_args_adjust_item_weight(cct, p.second, item, weightv, nullptr);
1041 }
1042
1043 // remove the bucket from the parent
1044 bucket_remove_item(parent_bucket, item);
1045 } else if (PTR_ERR(parent_bucket) != -ENOENT) {
1046 return PTR_ERR(parent_bucket);
1047 }
1048
1049 // check that we're happy
1050 int test_weight = 0;
1051 map<string,string> test_location;
1052 test_location[ bucket_location.first ] = (bucket_location.second);
1053
1054 bool successful_detach = !(check_item_loc(cct, item, test_location,
1055 &test_weight));
1056 assert(successful_detach);
1057 assert(test_weight == 0);
1058
1059 return bucket_weight;
1060}
1061
31f18b77 1062int CrushWrapper::swap_bucket(CephContext *cct, int src, int dst)
7c673cae 1063{
31f18b77
FG
1064 if (src >= 0 || dst >= 0)
1065 return -EINVAL;
1066 if (!item_exists(src) || !item_exists(dst))
1067 return -EINVAL;
1068 crush_bucket *a = get_bucket(src);
1069 crush_bucket *b = get_bucket(dst);
1070 unsigned aw = a->weight;
1071 unsigned bw = b->weight;
1072
1073 // swap weights
1074 adjust_item_weight(cct, a->id, bw);
1075 adjust_item_weight(cct, b->id, aw);
1076
1077 // swap items
1078 map<int,unsigned> tmp;
1079 unsigned as = a->size;
1080 unsigned bs = b->size;
1081 for (unsigned i = 0; i < as; ++i) {
1082 int item = a->items[0];
1083 int itemw = crush_get_bucket_item_weight(a, 0);
1084 tmp[item] = itemw;
1085 bucket_remove_item(a, item);
1086 }
1087 assert(a->size == 0);
1088 assert(b->size == bs);
1089 for (unsigned i = 0; i < bs; ++i) {
1090 int item = b->items[0];
1091 int itemw = crush_get_bucket_item_weight(b, 0);
1092 bucket_remove_item(b, item);
1093 bucket_add_item(a, item, itemw);
1094 }
1095 assert(a->size == bs);
1096 assert(b->size == 0);
1097 for (auto t : tmp) {
1098 bucket_add_item(b, t.first, t.second);
1099 }
1100 assert(a->size == bs);
1101 assert(b->size == as);
1102
1103 // swap names
1104 swap_names(src, dst);
b5b8bbf5 1105 return rebuild_roots_with_classes();
31f18b77 1106}
7c673cae 1107
c07f9fc5
FG
1108int CrushWrapper::link_bucket(
1109 CephContext *cct, int id, const map<string,string>& loc)
31f18b77 1110{
7c673cae
FG
1111 // sorry this only works for buckets
1112 if (id >= 0)
1113 return -EINVAL;
1114
1115 if (!item_exists(id))
1116 return -ENOENT;
1117
1118 // get the name of the bucket we are trying to move for later
1119 string id_name = get_item_name(id);
1120
1121 crush_bucket *b = get_bucket(id);
1122 unsigned bucket_weight = b->weight;
1123
1124 return insert_item(cct, id, bucket_weight / (float)0x10000, id_name, loc);
1125}
1126
c07f9fc5
FG
1127int CrushWrapper::create_or_move_item(
1128 CephContext *cct, int item, float weight, string name,
1129 const map<string,string>& loc) // typename -> bucketname
7c673cae 1130{
7c673cae
FG
1131 int ret = 0;
1132 int old_iweight;
1133
1134 if (!is_valid_crush_name(name))
1135 return -EINVAL;
1136
1137 if (check_item_loc(cct, item, loc, &old_iweight)) {
c07f9fc5
FG
1138 ldout(cct, 5) << "create_or_move_item " << item << " already at " << loc
1139 << dendl;
7c673cae
FG
1140 } else {
1141 if (_search_item_exists(item)) {
1142 weight = get_item_weightf(item);
c07f9fc5
FG
1143 ldout(cct, 10) << "create_or_move_item " << item
1144 << " exists with weight " << weight << dendl;
7c673cae
FG
1145 remove_item(cct, item, true);
1146 }
c07f9fc5
FG
1147 ldout(cct, 5) << "create_or_move_item adding " << item
1148 << " weight " << weight
7c673cae
FG
1149 << " at " << loc << dendl;
1150 ret = insert_item(cct, item, weight, name, loc);
1151 if (ret == 0)
1152 ret = 1; // changed
1153 }
1154 return ret;
1155}
1156
c07f9fc5
FG
1157int CrushWrapper::update_item(
1158 CephContext *cct, int item, float weight, string name,
1159 const map<string,string>& loc) // typename -> bucketname
7c673cae 1160{
7c673cae
FG
1161 ldout(cct, 5) << "update_item item " << item << " weight " << weight
1162 << " name " << name << " loc " << loc << dendl;
1163 int ret = 0;
1164
1165 if (!is_valid_crush_name(name))
1166 return -EINVAL;
1167
1168 if (!is_valid_crush_loc(cct, loc))
1169 return -EINVAL;
1170
224ce89b
WB
1171 ret = validate_weightf(weight);
1172 if (ret < 0) {
1173 return ret;
1174 }
1175
7c673cae
FG
1176 // compare quantized (fixed-point integer) weights!
1177 int iweight = (int)(weight * (float)0x10000);
1178 int old_iweight;
1179 if (check_item_loc(cct, item, loc, &old_iweight)) {
1180 ldout(cct, 5) << "update_item " << item << " already at " << loc << dendl;
1181 if (old_iweight != iweight) {
1182 ldout(cct, 5) << "update_item " << item << " adjusting weight "
c07f9fc5
FG
1183 << ((float)old_iweight/(float)0x10000) << " -> " << weight
1184 << dendl;
7c673cae
FG
1185 adjust_item_weight_in_loc(cct, item, iweight, loc);
1186 ret = 1;
1187 }
1188 if (get_item_name(item) != name) {
c07f9fc5
FG
1189 ldout(cct, 5) << "update_item setting " << item << " name to " << name
1190 << dendl;
7c673cae
FG
1191 set_item_name(item, name);
1192 ret = 1;
1193 }
1194 } else {
1195 if (item_exists(item)) {
1196 remove_item(cct, item, true);
1197 }
1198 ldout(cct, 5) << "update_item adding " << item << " weight " << weight
1199 << " at " << loc << dendl;
1200 ret = insert_item(cct, item, weight, name, loc);
1201 if (ret == 0)
1202 ret = 1; // changed
1203 }
1204 return ret;
1205}
1206
1207int CrushWrapper::get_item_weight(int id) const
1208{
1209 for (int bidx = 0; bidx < crush->max_buckets; bidx++) {
1210 crush_bucket *b = crush->buckets[bidx];
1211 if (b == NULL)
1212 continue;
1213 if (b->id == id)
1214 return b->weight;
1215 for (unsigned i = 0; i < b->size; i++)
1216 if (b->items[i] == id)
1217 return crush_get_bucket_item_weight(b, i);
1218 }
1219 return -ENOENT;
1220}
1221
1222int CrushWrapper::get_item_weight_in_loc(int id, const map<string,string> &loc)
1223{
1224 for (map<string,string>::const_iterator l = loc.begin(); l != loc.end(); ++l) {
1225
1226 int bid = get_item_id(l->second);
1227 if (!bucket_exists(bid))
1228 continue;
1229 crush_bucket *b = get_bucket(bid);
1230 for (unsigned int i = 0; i < b->size; i++) {
1231 if (b->items[i] == id) {
1232 return crush_get_bucket_item_weight(b, i);
1233 }
1234 }
1235 }
1236 return -ENOENT;
1237}
1238
1239int CrushWrapper::adjust_item_weight(CephContext *cct, int id, int weight)
1240{
1241 ldout(cct, 5) << "adjust_item_weight " << id << " weight " << weight << dendl;
1242 int changed = 0;
1243 for (int bidx = 0; bidx < crush->max_buckets; bidx++) {
1244 crush_bucket *b = crush->buckets[bidx];
1245 if (b == 0)
1246 continue;
1247 for (unsigned i = 0; i < b->size; i++) {
1248 if (b->items[i] == id) {
31f18b77 1249 int diff = bucket_adjust_item_weight(cct, b, id, weight);
c07f9fc5
FG
1250 ldout(cct, 5) << "adjust_item_weight " << id << " diff " << diff
1251 << " in bucket " << bidx << dendl;
7c673cae
FG
1252 adjust_item_weight(cct, -1 - bidx, b->weight);
1253 changed++;
1254 }
1255 }
1256 }
1257 if (!changed)
1258 return -ENOENT;
1259 return changed;
1260}
1261
1262int CrushWrapper::adjust_item_weight_in_loc(CephContext *cct, int id, int weight, const map<string,string>& loc)
1263{
c07f9fc5
FG
1264 ldout(cct, 5) << "adjust_item_weight_in_loc " << id << " weight " << weight
1265 << " in " << loc << dendl;
7c673cae
FG
1266 int changed = 0;
1267
c07f9fc5 1268 for (auto l = loc.begin(); l != loc.end(); ++l) {
7c673cae
FG
1269 int bid = get_item_id(l->second);
1270 if (!bucket_exists(bid))
1271 continue;
1272 crush_bucket *b = get_bucket(bid);
1273 for (unsigned int i = 0; i < b->size; i++) {
1274 if (b->items[i] == id) {
31f18b77 1275 int diff = bucket_adjust_item_weight(cct, b, id, weight);
c07f9fc5
FG
1276 ldout(cct, 5) << "adjust_item_weight_in_loc " << id << " diff " << diff
1277 << " in bucket " << bid << dendl;
7c673cae
FG
1278 adjust_item_weight(cct, bid, b->weight);
1279 changed++;
1280 }
1281 }
1282 }
1283 if (!changed)
1284 return -ENOENT;
1285 return changed;
1286}
1287
1288int CrushWrapper::adjust_subtree_weight(CephContext *cct, int id, int weight)
1289{
1290 ldout(cct, 5) << __func__ << " " << id << " weight " << weight << dendl;
1291 crush_bucket *b = get_bucket(id);
1292 if (IS_ERR(b))
1293 return PTR_ERR(b);
1294 int changed = 0;
1295 list<crush_bucket*> q;
1296 q.push_back(b);
1297 while (!q.empty()) {
1298 b = q.front();
1299 q.pop_front();
1300 int local_changed = 0;
1301 for (unsigned i=0; i<b->size; ++i) {
1302 int n = b->items[i];
1303 if (n >= 0) {
31f18b77 1304 bucket_adjust_item_weight(cct, b, n, weight);
7c673cae
FG
1305 ++changed;
1306 ++local_changed;
1307 } else {
1308 crush_bucket *sub = get_bucket(n);
1309 if (IS_ERR(sub))
1310 continue;
1311 q.push_back(sub);
1312 }
1313 }
1314 if (local_changed) {
1315 adjust_item_weight(cct, b->id, b->weight);
1316 }
1317 }
1318 return changed;
1319}
1320
1321bool CrushWrapper::check_item_present(int id) const
1322{
1323 bool found = false;
1324
1325 for (int bidx = 0; bidx < crush->max_buckets; bidx++) {
1326 crush_bucket *b = crush->buckets[bidx];
1327 if (b == 0)
1328 continue;
1329 for (unsigned i = 0; i < b->size; i++)
1330 if (b->items[i] == id)
1331 found = true;
1332 }
1333 return found;
1334}
1335
1336
1337pair<string,string> CrushWrapper::get_immediate_parent(int id, int *_ret)
1338{
1339
1340 for (int bidx = 0; bidx < crush->max_buckets; bidx++) {
1341 crush_bucket *b = crush->buckets[bidx];
1342 if (b == 0)
1343 continue;
c07f9fc5 1344 if (is_shadow_item(b->id))
224ce89b 1345 continue;
7c673cae
FG
1346 for (unsigned i = 0; i < b->size; i++)
1347 if (b->items[i] == id) {
1348 string parent_id = name_map[b->id];
1349 string parent_bucket_type = type_map[b->type];
1350 if (_ret)
1351 *_ret = 0;
1352 return make_pair(parent_bucket_type, parent_id);
1353 }
1354 }
1355
1356 if (_ret)
1357 *_ret = -ENOENT;
1358
1359 return pair<string, string>();
1360}
1361
1362int CrushWrapper::get_immediate_parent_id(int id, int *parent) const
1363{
1364 for (int bidx = 0; bidx < crush->max_buckets; bidx++) {
1365 crush_bucket *b = crush->buckets[bidx];
1366 if (b == 0)
1367 continue;
c07f9fc5 1368 if (is_shadow_item(b->id))
224ce89b 1369 continue;
7c673cae
FG
1370 for (unsigned i = 0; i < b->size; i++) {
1371 if (b->items[i] == id) {
1372 *parent = b->id;
1373 return 0;
1374 }
1375 }
1376 }
1377 return -ENOENT;
1378}
1379
31f18b77
FG
1380int CrushWrapper::get_parent_of_type(int item, int type) const
1381{
1382 do {
1383 int r = get_immediate_parent_id(item, &item);
1384 if (r < 0) {
1385 return 0;
1386 }
1387 } while (get_bucket_type(item) != type);
1388 return item;
1389}
1390
35e4c445
FG
1391int CrushWrapper::rename_class(const string& srcname, const string& dstname)
1392{
1393 auto i = class_rname.find(srcname);
1394 if (i == class_rname.end())
1395 return -ENOENT;
1396 auto j = class_rname.find(dstname);
1397 if (j != class_rname.end())
1398 return -EEXIST;
1399
1400 int class_id = i->second;
1401 assert(class_name.count(class_id));
1402 // rename any shadow buckets of old class name
1403 for (auto &it: class_map) {
1404 if (it.first < 0 && it.second == class_id) {
1405 string old_name = get_item_name(it.first);
1406 size_t pos = old_name.find("~");
1407 assert(pos != string::npos);
1408 string name_no_class = old_name.substr(0, pos);
1409 string old_class_name = old_name.substr(pos + 1);
1410 assert(old_class_name == srcname);
1411 string new_name = name_no_class + "~" + dstname;
1412 // we do not use set_item_name
1413 // because the name is intentionally invalid
1414 name_map[it.first] = new_name;
1415 have_rmaps = false;
1416 }
1417 }
1418
1419 // rename class
1420 class_rname.erase(srcname);
1421 class_name.erase(class_id);
1422 class_rname[dstname] = class_id;
1423 class_name[class_id] = dstname;
1424 return 0;
1425}
1426
d2e6a577
FG
1427int CrushWrapper::populate_classes(
1428 const std::map<int32_t, map<int32_t, int32_t>>& old_class_bucket)
c07f9fc5 1429{
d2e6a577
FG
1430 // build set of previous used shadow ids
1431 set<int32_t> used_ids;
1432 for (auto& p : old_class_bucket) {
1433 for (auto& q : p.second) {
1434 used_ids.insert(q.second);
c07f9fc5 1435 }
c07f9fc5 1436 }
35e4c445
FG
1437 // accumulate weight values for each carg and bucket as we go. because it is
1438 // depth first, we will have the nested bucket weights we need when we
1439 // finish constructing the containing buckets.
1440 map<int,map<int,vector<int>>> cmap_item_weight; // cargs -> bno -> weights
7c673cae 1441 set<int> roots;
c07f9fc5 1442 find_nonshadow_roots(roots);
7c673cae
FG
1443 for (auto &r : roots) {
1444 if (r >= 0)
1445 continue;
7c673cae
FG
1446 for (auto &c : class_name) {
1447 int clone;
d2e6a577 1448 int res = device_class_clone(r, c.first, old_class_bucket, used_ids,
35e4c445 1449 &clone, &cmap_item_weight);
7c673cae
FG
1450 if (res < 0)
1451 return res;
1452 }
1453 }
1454 return 0;
1455}
1456
35e4c445 1457int CrushWrapper::trim_roots_with_class()
7c673cae
FG
1458{
1459 set<int> roots;
c07f9fc5 1460 find_shadow_roots(roots);
7c673cae
FG
1461 for (auto &r : roots) {
1462 if (r >= 0)
1463 continue;
35e4c445 1464 int res = remove_root(r);
7c673cae
FG
1465 if (res)
1466 return res;
1467 }
1468 // there is no need to reweight because we only remove from the
1469 // root and down
1470 return 0;
1471}
1472
224ce89b
WB
1473int32_t CrushWrapper::_alloc_class_id() const {
1474 if (class_name.empty()) {
1475 return 0;
1476 }
1477 int32_t class_id = class_name.rbegin()->first + 1;
1478 if (class_id >= 0) {
1479 return class_id;
1480 }
1481 // wrapped, pick a random start and do exhaustive search
1482 uint32_t upperlimit = numeric_limits<int32_t>::max();
1483 upperlimit++;
1484 class_id = rand() % upperlimit;
1485 const auto start = class_id;
1486 do {
1487 if (!class_name.count(class_id)) {
1488 return class_id;
1489 } else {
1490 class_id++;
1491 if (class_id < 0) {
1492 class_id = 0;
1493 }
1494 }
1495 } while (class_id != start);
1496 assert(0 == "no available class id");
1497}
1498
7c673cae
FG
1499void CrushWrapper::reweight(CephContext *cct)
1500{
1501 set<int> roots;
1502 find_roots(roots);
1503 for (set<int>::iterator p = roots.begin(); p != roots.end(); ++p) {
1504 if (*p >= 0)
1505 continue;
1506 crush_bucket *b = get_bucket(*p);
1507 ldout(cct, 5) << "reweight bucket " << *p << dendl;
1508 int r = crush_reweight_bucket(crush, b);
1509 assert(r == 0);
1510 }
1511}
1512
31f18b77
FG
1513int CrushWrapper::add_simple_rule_at(
1514 string name, string root_name,
1515 string failure_domain_name,
224ce89b 1516 string device_class,
31f18b77 1517 string mode, int rule_type,
224ce89b
WB
1518 int rno,
1519 ostream *err)
7c673cae
FG
1520{
1521 if (rule_exists(name)) {
1522 if (err)
1523 *err << "rule " << name << " exists";
1524 return -EEXIST;
1525 }
1526 if (rno >= 0) {
1527 if (rule_exists(rno)) {
1528 if (err)
1529 *err << "rule with ruleno " << rno << " exists";
1530 return -EEXIST;
1531 }
1532 if (ruleset_exists(rno)) {
1533 if (err)
1534 *err << "ruleset " << rno << " exists";
1535 return -EEXIST;
1536 }
1537 } else {
1538 for (rno = 0; rno < get_max_rules(); rno++) {
1539 if (!rule_exists(rno) && !ruleset_exists(rno))
1540 break;
1541 }
1542 }
1543 if (!name_exists(root_name)) {
1544 if (err)
1545 *err << "root item " << root_name << " does not exist";
1546 return -ENOENT;
1547 }
1548 int root = get_item_id(root_name);
1549 int type = 0;
1550 if (failure_domain_name.length()) {
1551 type = get_type_id(failure_domain_name);
1552 if (type < 0) {
1553 if (err)
1554 *err << "unknown type " << failure_domain_name;
1555 return -EINVAL;
1556 }
1557 }
224ce89b
WB
1558 if (device_class.size()) {
1559 if (!class_exists(device_class)) {
1560 if (err)
1561 *err << "device class " << device_class << " does not exist";
1562 return -EINVAL;
1563 }
1564 int c = get_class_id(device_class);
1565 if (class_bucket.count(root) == 0 ||
1566 class_bucket[root].count(c) == 0) {
1567 if (err)
1568 *err << "root " << root_name << " has no devices with class "
1569 << device_class;
1570 return -EINVAL;
1571 }
1572 root = class_bucket[root][c];
1573 }
7c673cae
FG
1574 if (mode != "firstn" && mode != "indep") {
1575 if (err)
1576 *err << "unknown mode " << mode;
1577 return -EINVAL;
1578 }
1579
1580 int steps = 3;
1581 if (mode == "indep")
1582 steps = 5;
1583 int min_rep = mode == "firstn" ? 1 : 3;
1584 int max_rep = mode == "firstn" ? 10 : 20;
1585 //set the ruleset the same as rule_id(rno)
1586 crush_rule *rule = crush_make_rule(steps, rno, rule_type, min_rep, max_rep);
1587 assert(rule);
1588 int step = 0;
1589 if (mode == "indep") {
1590 crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0);
1591 crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSE_TRIES, 100, 0);
1592 }
1593 crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, root, 0);
1594 if (type)
1595 crush_rule_set_step(rule, step++,
1596 mode == "firstn" ? CRUSH_RULE_CHOOSELEAF_FIRSTN :
1597 CRUSH_RULE_CHOOSELEAF_INDEP,
1598 CRUSH_CHOOSE_N,
1599 type);
1600 else
1601 crush_rule_set_step(rule, step++,
1602 mode == "firstn" ? CRUSH_RULE_CHOOSE_FIRSTN :
1603 CRUSH_RULE_CHOOSE_INDEP,
1604 CRUSH_CHOOSE_N,
1605 0);
1606 crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0);
1607
1608 int ret = crush_add_rule(crush, rule, rno);
1609 if(ret < 0) {
1610 *err << "failed to add rule " << rno << " because " << cpp_strerror(ret);
1611 return ret;
1612 }
1613 set_rule_name(rno, name);
1614 have_rmaps = false;
1615 return rno;
1616}
1617
31f18b77
FG
1618int CrushWrapper::add_simple_rule(
1619 string name, string root_name,
1620 string failure_domain_name,
224ce89b 1621 string device_class,
31f18b77
FG
1622 string mode, int rule_type,
1623 ostream *err)
7c673cae 1624{
224ce89b
WB
1625 return add_simple_rule_at(name, root_name, failure_domain_name, device_class,
1626 mode,
31f18b77 1627 rule_type, -1, err);
7c673cae
FG
1628}
1629
1630int CrushWrapper::get_rule_weight_osd_map(unsigned ruleno, map<int,float> *pmap)
1631{
1632 if (ruleno >= crush->max_rules)
1633 return -ENOENT;
1634 if (crush->rules[ruleno] == NULL)
1635 return -ENOENT;
1636 crush_rule *rule = crush->rules[ruleno];
1637
1638 // build a weight map for each TAKE in the rule, and then merge them
31f18b77
FG
1639
1640 // FIXME: if there are multiple takes that place a different number of
1641 // objects we do not take that into account. (Also, note that doing this
1642 // right is also a function of the pool, since the crush rule
1643 // might choose 2 + choose 2 but pool size may only be 3.)
7c673cae
FG
1644 for (unsigned i=0; i<rule->len; ++i) {
1645 map<int,float> m;
1646 float sum = 0;
1647 if (rule->steps[i].op == CRUSH_RULE_TAKE) {
1648 int n = rule->steps[i].arg1;
1649 if (n >= 0) {
1650 m[n] = 1.0;
1651 sum = 1.0;
1652 } else {
1653 list<int> q;
1654 q.push_back(n);
1655 //breadth first iterate the OSD tree
1656 while (!q.empty()) {
1657 int bno = q.front();
1658 q.pop_front();
1659 crush_bucket *b = crush->buckets[-1-bno];
1660 assert(b);
1661 for (unsigned j=0; j<b->size; ++j) {
1662 int item_id = b->items[j];
1663 if (item_id >= 0) { //it's an OSD
1664 float w = crush_get_bucket_item_weight(b, j);
1665 m[item_id] = w;
1666 sum += w;
1667 } else { //not an OSD, expand the child later
1668 q.push_back(item_id);
1669 }
1670 }
1671 }
1672 }
1673 }
1674 for (map<int,float>::iterator p = m.begin(); p != m.end(); ++p) {
1675 map<int,float>::iterator q = pmap->find(p->first);
1676 if (q == pmap->end()) {
1677 (*pmap)[p->first] = p->second / sum;
1678 } else {
1679 q->second += p->second / sum;
1680 }
1681 }
1682 }
1683
1684 return 0;
1685}
1686
1687int CrushWrapper::remove_rule(int ruleno)
1688{
1689 if (ruleno >= (int)crush->max_rules)
1690 return -ENOENT;
1691 if (crush->rules[ruleno] == NULL)
1692 return -ENOENT;
1693 crush_destroy_rule(crush->rules[ruleno]);
1694 crush->rules[ruleno] = NULL;
1695 rule_name_map.erase(ruleno);
1696 have_rmaps = false;
b5b8bbf5 1697 return rebuild_roots_with_classes();
7c673cae
FG
1698}
1699
31f18b77
FG
1700int CrushWrapper::bucket_adjust_item_weight(CephContext *cct, crush_bucket *bucket, int item, int weight)
1701{
1702 if (cct->_conf->osd_crush_update_weight_set) {
1703 unsigned position;
1704 for (position = 0; position < bucket->size; position++)
1705 if (bucket->items[position] == item)
1706 break;
1707 assert(position != bucket->size);
b5b8bbf5
FG
1708 for (auto &w : choose_args) {
1709 crush_choose_arg_map &arg_map = w.second;
31f18b77
FG
1710 crush_choose_arg *arg = &arg_map.args[-1-bucket->id];
1711 for (__u32 j = 0; j < arg->weight_set_size; j++) {
1712 crush_weight_set *weight_set = &arg->weight_set[j];
1713 weight_set->weights[position] = weight;
1714 }
1715 }
1716 }
1717 return crush_bucket_adjust_item_weight(crush, bucket, item, weight);
1718}
1719
c07f9fc5
FG
1720int CrushWrapper::add_bucket(
1721 int bucketno, int alg, int hash, int type, int size,
1722 int *items, int *weights, int *idout)
1723{
1724 if (alg == 0) {
1725 alg = get_default_bucket_alg();
1726 if (alg == 0)
1727 return -EINVAL;
1728 }
1729 crush_bucket *b = crush_make_bucket(crush, alg, hash, type, size, items,
1730 weights);
1731 assert(b);
b5b8bbf5 1732 assert(idout);
c07f9fc5 1733 int r = crush_add_bucket(crush, bucketno, b, idout);
b5b8bbf5 1734 int pos = -1 - *idout;
c07f9fc5
FG
1735 for (auto& p : choose_args) {
1736 crush_choose_arg_map& cmap = p.second;
1737 if (cmap.args) {
b5b8bbf5 1738 if ((int)cmap.size <= pos) {
c07f9fc5
FG
1739 cmap.args = (crush_choose_arg*)realloc(
1740 cmap.args,
b5b8bbf5
FG
1741 sizeof(crush_choose_arg) * (pos + 1));
1742 assert(cmap.args);
c07f9fc5 1743 memset(&cmap.args[cmap.size], 0,
b5b8bbf5
FG
1744 sizeof(crush_choose_arg) * (pos + 1 - cmap.size));
1745 cmap.size = pos + 1;
c07f9fc5
FG
1746 }
1747 } else {
1748 cmap.args = (crush_choose_arg*)calloc(sizeof(crush_choose_arg),
b5b8bbf5
FG
1749 pos + 1);
1750 assert(cmap.args);
1751 cmap.size = pos + 1;
c07f9fc5
FG
1752 }
1753 if (size > 0) {
1754 int positions = get_choose_args_positions(cmap);
b5b8bbf5 1755 crush_choose_arg& carg = cmap.args[pos];
c07f9fc5
FG
1756 carg.weight_set = (crush_weight_set*)calloc(sizeof(crush_weight_set),
1757 size);
1758 carg.weight_set_size = positions;
1759 for (int ppos = 0; ppos < positions; ++ppos) {
1760 carg.weight_set[ppos].weights = (__u32*)calloc(sizeof(__u32), size);
1761 carg.weight_set[ppos].size = size;
1762 for (int bpos = 0; bpos < size; ++bpos) {
1763 carg.weight_set[ppos].weights[bpos] = weights[bpos];
1764 }
1765 }
1766 }
1767 }
1768 return r;
1769}
1770
31f18b77
FG
1771int CrushWrapper::bucket_add_item(crush_bucket *bucket, int item, int weight)
1772{
1773 __u32 new_size = bucket->size + 1;
35e4c445
FG
1774 int r = crush_bucket_add_item(crush, bucket, item, weight);
1775 if (r < 0) {
1776 return r;
1777 }
b5b8bbf5
FG
1778 for (auto &w : choose_args) {
1779 crush_choose_arg_map &arg_map = w.second;
31f18b77
FG
1780 crush_choose_arg *arg = &arg_map.args[-1-bucket->id];
1781 for (__u32 j = 0; j < arg->weight_set_size; j++) {
1782 crush_weight_set *weight_set = &arg->weight_set[j];
c07f9fc5
FG
1783 weight_set->weights = (__u32*)realloc(weight_set->weights,
1784 new_size * sizeof(__u32));
31f18b77
FG
1785 assert(weight_set->size + 1 == new_size);
1786 weight_set->weights[weight_set->size] = weight;
1787 weight_set->size = new_size;
1788 }
1789 if (arg->ids_size) {
224ce89b 1790 arg->ids = (__s32 *)realloc(arg->ids, new_size * sizeof(__s32));
31f18b77
FG
1791 assert(arg->ids_size + 1 == new_size);
1792 arg->ids[arg->ids_size] = item;
1793 arg->ids_size = new_size;
1794 }
1795 }
35e4c445 1796 return 0;
31f18b77
FG
1797}
1798
1799int CrushWrapper::bucket_remove_item(crush_bucket *bucket, int item)
1800{
1801 __u32 new_size = bucket->size - 1;
1802 unsigned position;
1803 for (position = 0; position < bucket->size; position++)
1804 if (bucket->items[position] == item)
1805 break;
1806 assert(position != bucket->size);
35e4c445
FG
1807 int r = crush_bucket_remove_item(crush, bucket, item);
1808 if (r < 0) {
1809 return r;
1810 }
b5b8bbf5
FG
1811 for (auto &w : choose_args) {
1812 crush_choose_arg_map &arg_map = w.second;
31f18b77
FG
1813 crush_choose_arg *arg = &arg_map.args[-1-bucket->id];
1814 for (__u32 j = 0; j < arg->weight_set_size; j++) {
1815 crush_weight_set *weight_set = &arg->weight_set[j];
1816 assert(weight_set->size - 1 == new_size);
1817 for (__u32 k = position; k < new_size; k++)
1818 weight_set->weights[k] = weight_set->weights[k+1];
35e4c445
FG
1819 if (new_size) {
1820 weight_set->weights = (__u32*)realloc(weight_set->weights,
1821 new_size * sizeof(__u32));
1822 } else {
1823 weight_set->weights = NULL;
1824 }
31f18b77
FG
1825 weight_set->size = new_size;
1826 }
1827 if (arg->ids_size) {
1828 assert(arg->ids_size - 1 == new_size);
1829 for (__u32 k = position; k < new_size; k++)
1830 arg->ids[k] = arg->ids[k+1];
35e4c445
FG
1831 if (new_size) {
1832 arg->ids = (__s32 *)realloc(arg->ids, new_size * sizeof(__s32));
1833 } else {
1834 arg->ids = NULL;
1835 }
31f18b77
FG
1836 arg->ids_size = new_size;
1837 }
1838 }
35e4c445 1839 return 0;
31f18b77
FG
1840}
1841
224ce89b
WB
1842int CrushWrapper::update_device_class(int id,
1843 const string& class_name,
1844 const string& name,
1845 ostream *ss)
7c673cae 1846{
c07f9fc5
FG
1847 assert(item_exists(id));
1848 auto old_class_name = get_item_class(id);
1849 if (old_class_name && old_class_name != class_name) {
1850 *ss << "osd." << id << " has already bound to class '" << old_class_name
1851 << "', can not reset class to '" << class_name << "'; "
1852 << "use 'ceph osd crush rm-device-class <osd>' to "
1853 << "remove old class first";
1854 return -EBUSY;
1855 }
1856
224ce89b 1857 int class_id = get_or_create_class_id(class_name);
7c673cae 1858 if (id < 0) {
224ce89b 1859 *ss << name << " id " << id << " is negative";
7c673cae
FG
1860 return -EINVAL;
1861 }
7c673cae
FG
1862
1863 if (class_map.count(id) != 0 && class_map[id] == class_id) {
224ce89b 1864 *ss << name << " already set to class " << class_name;
7c673cae
FG
1865 return 0;
1866 }
1867
1868 set_item_class(id, class_id);
1869
1870 int r = rebuild_roots_with_classes();
1871 if (r < 0)
1872 return r;
1873 return 1;
1874}
1875
c07f9fc5
FG
1876int CrushWrapper::remove_device_class(CephContext *cct, int id, ostream *ss)
1877{
1878 assert(ss);
1879 const char *name = get_item_name(id);
1880 if (!name) {
1881 *ss << "osd." << id << " does not have a name";
1882 return -ENOENT;
1883 }
1884
1885 const char *class_name = get_item_class(id);
1886 if (!class_name) {
1887 *ss << "osd." << id << " has not been bound to a specific class yet";
1888 return 0;
1889 }
1890 class_remove_item(id);
1891
c07f9fc5
FG
1892 int r = rebuild_roots_with_classes();
1893 if (r < 0) {
1894 *ss << "unable to rebuild roots with class '" << class_name << "' "
1895 << "of osd." << id << ": " << cpp_strerror(r);
1896 return r;
1897 }
1898 return 0;
1899}
1900
d2e6a577
FG
1901int CrushWrapper::device_class_clone(
1902 int original_id, int device_class,
1903 const std::map<int32_t, map<int32_t, int32_t>>& old_class_bucket,
1904 const std::set<int32_t>& used_ids,
35e4c445
FG
1905 int *clone,
1906 map<int,map<int,vector<int>>> *cmap_item_weight)
7c673cae
FG
1907{
1908 const char *item_name = get_item_name(original_id);
1909 if (item_name == NULL)
1910 return -ECHILD;
1911 const char *class_name = get_class_name(device_class);
1912 if (class_name == NULL)
1913 return -EBADF;
1914 string copy_name = item_name + string("~") + class_name;
1915 if (name_exists(copy_name)) {
1916 *clone = get_item_id(copy_name);
1917 return 0;
1918 }
35e4c445 1919
7c673cae 1920 crush_bucket *original = get_bucket(original_id);
d2e6a577 1921 assert(!IS_ERR(original));
7c673cae
FG
1922 crush_bucket *copy = crush_make_bucket(crush,
1923 original->alg,
1924 original->hash,
1925 original->type,
1926 0, NULL, NULL);
d2e6a577 1927 assert(copy);
35e4c445
FG
1928
1929 vector<unsigned> item_orig_pos; // new item pos -> orig item pos
7c673cae
FG
1930 for (unsigned i = 0; i < original->size; i++) {
1931 int item = original->items[i];
1932 int weight = crush_get_bucket_item_weight(original, i);
1933 if (item >= 0) {
1934 if (class_map.count(item) != 0 && class_map[item] == device_class) {
35e4c445 1935 int res = crush_bucket_add_item(crush, copy, item, weight);
7c673cae
FG
1936 if (res)
1937 return res;
35e4c445
FG
1938 } else {
1939 continue;
7c673cae
FG
1940 }
1941 } else {
1942 int child_copy_id;
d2e6a577 1943 int res = device_class_clone(item, device_class, old_class_bucket,
35e4c445
FG
1944 used_ids, &child_copy_id,
1945 cmap_item_weight);
7c673cae
FG
1946 if (res < 0)
1947 return res;
1948 crush_bucket *child_copy = get_bucket(child_copy_id);
d2e6a577 1949 assert(!IS_ERR(child_copy));
35e4c445
FG
1950 res = crush_bucket_add_item(crush, copy, child_copy_id,
1951 child_copy->weight);
7c673cae
FG
1952 if (res)
1953 return res;
1954 }
35e4c445 1955 item_orig_pos.push_back(i);
7c673cae 1956 }
35e4c445
FG
1957 assert(item_orig_pos.size() == copy->size);
1958
d2e6a577
FG
1959 int bno = 0;
1960 if (old_class_bucket.count(original_id) &&
1961 old_class_bucket.at(original_id).count(device_class)) {
1962 bno = old_class_bucket.at(original_id).at(device_class);
1963 } else {
1964 // pick a new shadow bucket id that is not used by the current map
1965 // *or* any previous shadow buckets.
1966 bno = -1;
1967 while (((-1-bno) < crush->max_buckets && crush->buckets[-1-bno]) ||
1968 used_ids.count(bno)) {
1969 --bno;
1970 }
1971 }
1972 int res = crush_add_bucket(crush, bno, copy, clone);
7c673cae
FG
1973 if (res)
1974 return res;
d2e6a577 1975 assert(!bno || bno == *clone);
35e4c445 1976
7c673cae
FG
1977 res = set_item_class(*clone, device_class);
1978 if (res < 0)
1979 return res;
35e4c445 1980
7c673cae
FG
1981 // we do not use set_item_name because the name is intentionally invalid
1982 name_map[*clone] = copy_name;
1983 if (have_rmaps)
1984 name_rmap[copy_name] = *clone;
1985 class_bucket[original_id][device_class] = *clone;
35e4c445
FG
1986
1987 // set up choose_args for the new bucket.
1988 for (auto& w : choose_args) {
1989 crush_choose_arg_map& cmap = w.second;
1990 if (-1-bno >= (int)cmap.size) {
1991 unsigned new_size = -1-bno + 1;
1992 cmap.args = (crush_choose_arg*)realloc(cmap.args,
1993 new_size * sizeof(cmap.args[0]));
b5b8bbf5 1994 assert(cmap.args);
35e4c445
FG
1995 memset(cmap.args + cmap.size, 0,
1996 (new_size - cmap.size) * sizeof(cmap.args[0]));
b5b8bbf5 1997 cmap.size = new_size;
35e4c445
FG
1998 }
1999 auto& o = cmap.args[-1-original_id];
2000 auto& n = cmap.args[-1-bno];
2001 n.ids_size = 0; // FIXME: implement me someday
2002 n.weight_set_size = o.weight_set_size;
2003 n.weight_set = (crush_weight_set*)calloc(
2004 n.weight_set_size, sizeof(crush_weight_set));
2005 for (size_t s = 0; s < n.weight_set_size; ++s) {
2006 n.weight_set[s].size = copy->size;
2007 n.weight_set[s].weights = (__u32*)calloc(copy->size, sizeof(__u32));
2008 }
2009 for (size_t s = 0; s < n.weight_set_size; ++s) {
2010 vector<int> bucket_weights(n.weight_set_size);
2011 for (size_t i = 0; i < copy->size; ++i) {
2012 int item = copy->items[i];
2013 if (item >= 0) {
2014 n.weight_set[s].weights[i] = o.weight_set[s].weights[item_orig_pos[i]];
2015 } else {
2016 n.weight_set[s].weights[i] = (*cmap_item_weight)[w.first][item][s];
2017 }
2018 bucket_weights[s] += n.weight_set[s].weights[i];
2019 }
2020 (*cmap_item_weight)[w.first][bno] = bucket_weights;
2021 }
2022 }
7c673cae
FG
2023 return 0;
2024}
2025
b5b8bbf5
FG
2026int CrushWrapper::get_rules_by_class(const string &class_name, set<int> *rules)
2027{
2028 assert(rules);
2029 rules->clear();
2030 if (!class_exists(class_name)) {
2031 return -ENOENT;
2032 }
2033 int class_id = get_class_id(class_name);
2034 for (unsigned i = 0; i < crush->max_rules; ++i) {
2035 crush_rule *r = crush->rules[i];
2036 if (!r)
2037 continue;
2038 for (unsigned j = 0; j < r->len; ++j) {
2039 if (r->steps[j].op == CRUSH_RULE_TAKE) {
2040 int step_item = r->steps[j].arg1;
2041 int original_item;
2042 int c;
2043 int res = split_id_class(step_item, &original_item, &c);
2044 if (res < 0) {
2045 return res;
2046 }
2047 if (c != -1 && c == class_id) {
2048 rules->insert(i);
2049 break;
2050 }
2051 }
2052 }
2053 }
2054 return 0;
2055}
2056
d2e6a577
FG
2057bool CrushWrapper::_class_is_dead(int class_id)
2058{
2059 for (auto &p: class_map) {
2060 if (p.first >= 0 && p.second == class_id) {
2061 return false;
2062 }
2063 }
2064 for (unsigned i = 0; i < crush->max_rules; ++i) {
2065 crush_rule *r = crush->rules[i];
2066 if (!r)
2067 continue;
2068 for (unsigned j = 0; j < r->len; ++j) {
2069 if (r->steps[j].op == CRUSH_RULE_TAKE) {
2070 int root = r->steps[j].arg1;
2071 for (auto &p : class_bucket) {
2072 auto& q = p.second;
2073 if (q.count(class_id) && q[class_id] == root) {
2074 return false;
2075 }
2076 }
2077 }
2078 }
2079 }
2080 // no more referenced by any devices or crush rules
2081 return true;
2082}
2083
2084void CrushWrapper::cleanup_dead_classes()
2085{
35e4c445
FG
2086 auto p = class_name.begin();
2087 while (p != class_name.end()) {
2088 if (_class_is_dead(p->first)) {
2089 string n = p->second;
2090 ++p;
2091 remove_class_name(n);
2092 } else {
2093 ++p;
2094 }
d2e6a577
FG
2095 }
2096}
2097
7c673cae
FG
2098int CrushWrapper::rebuild_roots_with_classes()
2099{
d2e6a577
FG
2100 std::map<int32_t, map<int32_t, int32_t> > old_class_bucket = class_bucket;
2101 cleanup_dead_classes();
35e4c445 2102 int r = trim_roots_with_class();
7c673cae
FG
2103 if (r < 0)
2104 return r;
d2e6a577
FG
2105 class_bucket.clear();
2106 return populate_classes(old_class_bucket);
7c673cae
FG
2107}
2108
2109void CrushWrapper::encode(bufferlist& bl, uint64_t features) const
2110{
2111 assert(crush);
2112
2113 __u32 magic = CRUSH_MAGIC;
2114 ::encode(magic, bl);
2115
2116 ::encode(crush->max_buckets, bl);
2117 ::encode(crush->max_rules, bl);
2118 ::encode(crush->max_devices, bl);
2119
31f18b77
FG
2120 bool encode_compat_choose_args = false;
2121 crush_choose_arg_map arg_map;
2122 memset(&arg_map, '\0', sizeof(arg_map));
2123 if (has_choose_args() &&
2124 !HAVE_FEATURE(features, CRUSH_CHOOSE_ARGS)) {
2125 assert(!has_incompat_choose_args());
2126 encode_compat_choose_args = true;
2127 arg_map = choose_args.begin()->second;
2128 }
2129
7c673cae
FG
2130 // buckets
2131 for (int i=0; i<crush->max_buckets; i++) {
2132 __u32 alg = 0;
2133 if (crush->buckets[i]) alg = crush->buckets[i]->alg;
2134 ::encode(alg, bl);
2135 if (!alg)
2136 continue;
2137
2138 ::encode(crush->buckets[i]->id, bl);
2139 ::encode(crush->buckets[i]->type, bl);
2140 ::encode(crush->buckets[i]->alg, bl);
2141 ::encode(crush->buckets[i]->hash, bl);
2142 ::encode(crush->buckets[i]->weight, bl);
2143 ::encode(crush->buckets[i]->size, bl);
2144 for (unsigned j=0; j<crush->buckets[i]->size; j++)
2145 ::encode(crush->buckets[i]->items[j], bl);
2146
2147 switch (crush->buckets[i]->alg) {
2148 case CRUSH_BUCKET_UNIFORM:
2149 ::encode((reinterpret_cast<crush_bucket_uniform*>(crush->buckets[i]))->item_weight, bl);
2150 break;
2151
2152 case CRUSH_BUCKET_LIST:
2153 for (unsigned j=0; j<crush->buckets[i]->size; j++) {
2154 ::encode((reinterpret_cast<crush_bucket_list*>(crush->buckets[i]))->item_weights[j], bl);
2155 ::encode((reinterpret_cast<crush_bucket_list*>(crush->buckets[i]))->sum_weights[j], bl);
2156 }
2157 break;
2158
2159 case CRUSH_BUCKET_TREE:
2160 ::encode((reinterpret_cast<crush_bucket_tree*>(crush->buckets[i]))->num_nodes, bl);
2161 for (unsigned j=0; j<(reinterpret_cast<crush_bucket_tree*>(crush->buckets[i]))->num_nodes; j++)
2162 ::encode((reinterpret_cast<crush_bucket_tree*>(crush->buckets[i]))->node_weights[j], bl);
2163 break;
2164
2165 case CRUSH_BUCKET_STRAW:
2166 for (unsigned j=0; j<crush->buckets[i]->size; j++) {
2167 ::encode((reinterpret_cast<crush_bucket_straw*>(crush->buckets[i]))->item_weights[j], bl);
2168 ::encode((reinterpret_cast<crush_bucket_straw*>(crush->buckets[i]))->straws[j], bl);
2169 }
2170 break;
2171
2172 case CRUSH_BUCKET_STRAW2:
31f18b77
FG
2173 {
2174 __u32 *weights;
2175 if (encode_compat_choose_args &&
2176 arg_map.args[i].weight_set_size > 0) {
2177 weights = arg_map.args[i].weight_set[0].weights;
2178 } else {
2179 weights = (reinterpret_cast<crush_bucket_straw2*>(crush->buckets[i]))->item_weights;
2180 }
2181 for (unsigned j=0; j<crush->buckets[i]->size; j++) {
2182 ::encode(weights[j], bl);
2183 }
7c673cae
FG
2184 }
2185 break;
2186
2187 default:
2188 ceph_abort();
2189 break;
2190 }
2191 }
2192
2193 // rules
2194 for (unsigned i=0; i<crush->max_rules; i++) {
2195 __u32 yes = crush->rules[i] ? 1:0;
2196 ::encode(yes, bl);
2197 if (!yes)
2198 continue;
2199
2200 ::encode(crush->rules[i]->len, bl);
2201 ::encode(crush->rules[i]->mask, bl);
2202 for (unsigned j=0; j<crush->rules[i]->len; j++)
2203 ::encode(crush->rules[i]->steps[j], bl);
2204 }
2205
2206 // name info
2207 ::encode(type_map, bl);
2208 ::encode(name_map, bl);
2209 ::encode(rule_name_map, bl);
2210
2211 // tunables
2212 ::encode(crush->choose_local_tries, bl);
2213 ::encode(crush->choose_local_fallback_tries, bl);
2214 ::encode(crush->choose_total_tries, bl);
2215 ::encode(crush->chooseleaf_descend_once, bl);
2216 ::encode(crush->chooseleaf_vary_r, bl);
2217 ::encode(crush->straw_calc_version, bl);
2218 ::encode(crush->allowed_bucket_algs, bl);
2219 if (features & CEPH_FEATURE_CRUSH_TUNABLES5) {
2220 ::encode(crush->chooseleaf_stable, bl);
2221 }
2222
2223 if (HAVE_FEATURE(features, SERVER_LUMINOUS)) {
2224 // device classes
2225 ::encode(class_map, bl);
2226 ::encode(class_name, bl);
2227 ::encode(class_bucket, bl);
2228
c07f9fc5 2229 // choose args
224ce89b
WB
2230 __u32 size = (__u32)choose_args.size();
2231 ::encode(size, bl);
7c673cae
FG
2232 for (auto c : choose_args) {
2233 ::encode(c.first, bl);
2234 crush_choose_arg_map arg_map = c.second;
224ce89b 2235 size = 0;
7c673cae
FG
2236 for (__u32 i = 0; i < arg_map.size; i++) {
2237 crush_choose_arg *arg = &arg_map.args[i];
2238 if (arg->weight_set_size == 0 &&
2239 arg->ids_size == 0)
2240 continue;
2241 size++;
2242 }
2243 ::encode(size, bl);
2244 for (__u32 i = 0; i < arg_map.size; i++) {
2245 crush_choose_arg *arg = &arg_map.args[i];
2246 if (arg->weight_set_size == 0 &&
2247 arg->ids_size == 0)
2248 continue;
2249 ::encode(i, bl);
2250 ::encode(arg->weight_set_size, bl);
2251 for (__u32 j = 0; j < arg->weight_set_size; j++) {
2252 crush_weight_set *weight_set = &arg->weight_set[j];
2253 ::encode(weight_set->size, bl);
2254 for (__u32 k = 0; k < weight_set->size; k++)
2255 ::encode(weight_set->weights[k], bl);
2256 }
2257 ::encode(arg->ids_size, bl);
2258 for (__u32 j = 0; j < arg->ids_size; j++)
2259 ::encode(arg->ids[j], bl);
2260 }
2261 }
2262 }
2263}
2264
2265static void decode_32_or_64_string_map(map<int32_t,string>& m, bufferlist::iterator& blp)
2266{
2267 m.clear();
2268 __u32 n;
2269 ::decode(n, blp);
2270 while (n--) {
2271 __s32 key;
2272 ::decode(key, blp);
2273
2274 __u32 strlen;
2275 ::decode(strlen, blp);
2276 if (strlen == 0) {
2277 // der, key was actually 64-bits!
2278 ::decode(strlen, blp);
2279 }
2280 ::decode_nohead(strlen, m[key], blp);
2281 }
2282}
2283
2284void CrushWrapper::decode(bufferlist::iterator& blp)
2285{
2286 create();
2287
2288 __u32 magic;
2289 ::decode(magic, blp);
2290 if (magic != CRUSH_MAGIC)
2291 throw buffer::malformed_input("bad magic number");
2292
2293 ::decode(crush->max_buckets, blp);
2294 ::decode(crush->max_rules, blp);
2295 ::decode(crush->max_devices, blp);
2296
2297 // legacy tunables, unless we decode something newer
2298 set_tunables_legacy();
2299
2300 try {
2301 // buckets
2302 crush->buckets = (crush_bucket**)calloc(1, crush->max_buckets * sizeof(crush_bucket*));
2303 for (int i=0; i<crush->max_buckets; i++) {
2304 decode_crush_bucket(&crush->buckets[i], blp);
2305 }
2306
2307 // rules
2308 crush->rules = (crush_rule**)calloc(1, crush->max_rules * sizeof(crush_rule*));
2309 for (unsigned i = 0; i < crush->max_rules; ++i) {
2310 __u32 yes;
2311 ::decode(yes, blp);
2312 if (!yes) {
2313 crush->rules[i] = NULL;
2314 continue;
2315 }
2316
2317 __u32 len;
2318 ::decode(len, blp);
2319 crush->rules[i] = reinterpret_cast<crush_rule*>(calloc(1, crush_rule_size(len)));
2320 crush->rules[i]->len = len;
2321 ::decode(crush->rules[i]->mask, blp);
2322 for (unsigned j=0; j<crush->rules[i]->len; j++)
2323 ::decode(crush->rules[i]->steps[j], blp);
2324 }
2325
2326 // name info
2327 // NOTE: we had a bug where we were incoding int instead of int32, which means the
2328 // 'key' field for these maps may be either 32 or 64 bits, depending. tolerate
2329 // both by assuming the string is always non-empty.
2330 decode_32_or_64_string_map(type_map, blp);
2331 decode_32_or_64_string_map(name_map, blp);
2332 decode_32_or_64_string_map(rule_name_map, blp);
2333
2334 // tunables
2335 if (!blp.end()) {
2336 ::decode(crush->choose_local_tries, blp);
2337 ::decode(crush->choose_local_fallback_tries, blp);
2338 ::decode(crush->choose_total_tries, blp);
2339 }
2340 if (!blp.end()) {
2341 ::decode(crush->chooseleaf_descend_once, blp);
2342 }
2343 if (!blp.end()) {
2344 ::decode(crush->chooseleaf_vary_r, blp);
2345 }
2346 if (!blp.end()) {
2347 ::decode(crush->straw_calc_version, blp);
2348 }
2349 if (!blp.end()) {
2350 ::decode(crush->allowed_bucket_algs, blp);
2351 }
2352 if (!blp.end()) {
2353 ::decode(crush->chooseleaf_stable, blp);
2354 }
2355 if (!blp.end()) {
2356 ::decode(class_map, blp);
2357 ::decode(class_name, blp);
2358 for (auto &c : class_name)
2359 class_rname[c.second] = c.first;
2360 ::decode(class_bucket, blp);
7c673cae
FG
2361 }
2362 if (!blp.end()) {
224ce89b 2363 __u32 choose_args_size;
7c673cae 2364 ::decode(choose_args_size, blp);
224ce89b 2365 for (__u32 i = 0; i < choose_args_size; i++) {
b5b8bbf5 2366 typename decltype(choose_args)::key_type choose_args_index;
7c673cae
FG
2367 ::decode(choose_args_index, blp);
2368 crush_choose_arg_map arg_map;
2369 arg_map.size = crush->max_buckets;
c07f9fc5
FG
2370 arg_map.args = (crush_choose_arg*)calloc(
2371 arg_map.size, sizeof(crush_choose_arg));
7c673cae
FG
2372 __u32 size;
2373 ::decode(size, blp);
2374 for (__u32 j = 0; j < size; j++) {
2375 __u32 bucket_index;
2376 ::decode(bucket_index, blp);
2377 assert(bucket_index < arg_map.size);
2378 crush_choose_arg *arg = &arg_map.args[bucket_index];
2379 ::decode(arg->weight_set_size, blp);
c07f9fc5
FG
2380 if (arg->weight_set_size) {
2381 arg->weight_set = (crush_weight_set*)calloc(
2382 arg->weight_set_size, sizeof(crush_weight_set));
2383 for (__u32 k = 0; k < arg->weight_set_size; k++) {
2384 crush_weight_set *weight_set = &arg->weight_set[k];
2385 ::decode(weight_set->size, blp);
2386 weight_set->weights = (__u32*)calloc(
2387 weight_set->size, sizeof(__u32));
2388 for (__u32 l = 0; l < weight_set->size; l++)
2389 ::decode(weight_set->weights[l], blp);
2390 }
7c673cae
FG
2391 }
2392 ::decode(arg->ids_size, blp);
c07f9fc5
FG
2393 if (arg->ids_size) {
2394 assert(arg->ids_size == crush->buckets[bucket_index]->size);
2395 arg->ids = (__s32 *)calloc(arg->ids_size, sizeof(__s32));
2396 for (__u32 k = 0; k < arg->ids_size; k++)
2397 ::decode(arg->ids[k], blp);
2398 }
7c673cae
FG
2399 }
2400 choose_args[choose_args_index] = arg_map;
2401 }
2402 }
2403 finalize();
2404 }
2405 catch (...) {
2406 crush_destroy(crush);
2407 throw;
2408 }
2409}
2410
2411void CrushWrapper::decode_crush_bucket(crush_bucket** bptr, bufferlist::iterator &blp)
2412{
2413 __u32 alg;
2414 ::decode(alg, blp);
2415 if (!alg) {
2416 *bptr = NULL;
2417 return;
2418 }
2419
2420 int size = 0;
2421 switch (alg) {
2422 case CRUSH_BUCKET_UNIFORM:
2423 size = sizeof(crush_bucket_uniform);
2424 break;
2425 case CRUSH_BUCKET_LIST:
2426 size = sizeof(crush_bucket_list);
2427 break;
2428 case CRUSH_BUCKET_TREE:
2429 size = sizeof(crush_bucket_tree);
2430 break;
2431 case CRUSH_BUCKET_STRAW:
2432 size = sizeof(crush_bucket_straw);
2433 break;
2434 case CRUSH_BUCKET_STRAW2:
2435 size = sizeof(crush_bucket_straw2);
2436 break;
2437 default:
2438 {
2439 char str[128];
2440 snprintf(str, sizeof(str), "unsupported bucket algorithm: %d", alg);
2441 throw buffer::malformed_input(str);
2442 }
2443 }
2444 crush_bucket *bucket = reinterpret_cast<crush_bucket*>(calloc(1, size));
2445 *bptr = bucket;
2446
2447 ::decode(bucket->id, blp);
2448 ::decode(bucket->type, blp);
2449 ::decode(bucket->alg, blp);
2450 ::decode(bucket->hash, blp);
2451 ::decode(bucket->weight, blp);
2452 ::decode(bucket->size, blp);
2453
2454 bucket->items = (__s32*)calloc(1, bucket->size * sizeof(__s32));
2455 for (unsigned j = 0; j < bucket->size; ++j) {
2456 ::decode(bucket->items[j], blp);
2457 }
2458
2459 switch (bucket->alg) {
2460 case CRUSH_BUCKET_UNIFORM:
2461 ::decode((reinterpret_cast<crush_bucket_uniform*>(bucket))->item_weight, blp);
2462 break;
2463
2464 case CRUSH_BUCKET_LIST: {
2465 crush_bucket_list* cbl = reinterpret_cast<crush_bucket_list*>(bucket);
2466 cbl->item_weights = (__u32*)calloc(1, bucket->size * sizeof(__u32));
2467 cbl->sum_weights = (__u32*)calloc(1, bucket->size * sizeof(__u32));
2468
2469 for (unsigned j = 0; j < bucket->size; ++j) {
2470 ::decode(cbl->item_weights[j], blp);
2471 ::decode(cbl->sum_weights[j], blp);
2472 }
2473 break;
2474 }
2475
2476 case CRUSH_BUCKET_TREE: {
2477 crush_bucket_tree* cbt = reinterpret_cast<crush_bucket_tree*>(bucket);
2478 ::decode(cbt->num_nodes, blp);
2479 cbt->node_weights = (__u32*)calloc(1, cbt->num_nodes * sizeof(__u32));
2480 for (unsigned j=0; j<cbt->num_nodes; j++) {
2481 ::decode(cbt->node_weights[j], blp);
2482 }
2483 break;
2484 }
2485
2486 case CRUSH_BUCKET_STRAW: {
2487 crush_bucket_straw* cbs = reinterpret_cast<crush_bucket_straw*>(bucket);
2488 cbs->straws = (__u32*)calloc(1, bucket->size * sizeof(__u32));
2489 cbs->item_weights = (__u32*)calloc(1, bucket->size * sizeof(__u32));
2490 for (unsigned j = 0; j < bucket->size; ++j) {
2491 ::decode(cbs->item_weights[j], blp);
2492 ::decode(cbs->straws[j], blp);
2493 }
2494 break;
2495 }
2496
2497 case CRUSH_BUCKET_STRAW2: {
2498 crush_bucket_straw2* cbs = reinterpret_cast<crush_bucket_straw2*>(bucket);
2499 cbs->item_weights = (__u32*)calloc(1, bucket->size * sizeof(__u32));
2500 for (unsigned j = 0; j < bucket->size; ++j) {
2501 ::decode(cbs->item_weights[j], blp);
2502 }
2503 break;
2504 }
2505
2506 default:
2507 // We should have handled this case in the first switch statement
2508 ceph_abort();
2509 break;
2510 }
2511}
2512
2513
2514void CrushWrapper::dump(Formatter *f) const
2515{
2516 f->open_array_section("devices");
2517 for (int i=0; i<get_max_devices(); i++) {
2518 f->open_object_section("device");
2519 f->dump_int("id", i);
2520 const char *n = get_item_name(i);
2521 if (n) {
2522 f->dump_string("name", n);
2523 } else {
2524 char name[20];
2525 sprintf(name, "device%d", i);
2526 f->dump_string("name", name);
2527 }
2528 const char *device_class = get_item_class(i);
2529 if (device_class != NULL)
2530 f->dump_string("class", device_class);
2531 f->close_section();
2532 }
2533 f->close_section();
2534
2535 f->open_array_section("types");
2536 int n = get_num_type_names();
2537 for (int i=0; n; i++) {
2538 const char *name = get_type_name(i);
2539 if (!name) {
2540 if (i == 0) {
2541 f->open_object_section("type");
2542 f->dump_int("type_id", 0);
2543 f->dump_string("name", "device");
2544 f->close_section();
2545 }
2546 continue;
2547 }
2548 n--;
2549 f->open_object_section("type");
2550 f->dump_int("type_id", i);
2551 f->dump_string("name", name);
2552 f->close_section();
2553 }
2554 f->close_section();
2555
2556 f->open_array_section("buckets");
2557 for (int bucket = -1; bucket > -1-get_max_buckets(); --bucket) {
2558 if (!bucket_exists(bucket))
2559 continue;
2560 f->open_object_section("bucket");
2561 f->dump_int("id", bucket);
2562 if (get_item_name(bucket))
2563 f->dump_string("name", get_item_name(bucket));
2564 f->dump_int("type_id", get_bucket_type(bucket));
2565 if (get_type_name(get_bucket_type(bucket)))
2566 f->dump_string("type_name", get_type_name(get_bucket_type(bucket)));
2567 f->dump_int("weight", get_bucket_weight(bucket));
2568 f->dump_string("alg", crush_bucket_alg_name(get_bucket_alg(bucket)));
2569 f->dump_string("hash", crush_hash_name(get_bucket_hash(bucket)));
2570 f->open_array_section("items");
2571 for (int j=0; j<get_bucket_size(bucket); j++) {
2572 f->open_object_section("item");
2573 f->dump_int("id", get_bucket_item(bucket, j));
2574 f->dump_int("weight", get_bucket_item_weight(bucket, j));
2575 f->dump_int("pos", j);
2576 f->close_section();
2577 }
2578 f->close_section();
2579 f->close_section();
2580 }
2581 f->close_section();
2582
2583 f->open_array_section("rules");
2584 dump_rules(f);
2585 f->close_section();
2586
2587 f->open_object_section("tunables");
2588 dump_tunables(f);
2589 f->close_section();
2590
2591 dump_choose_args(f);
2592}
2593
2594namespace {
2595 // depth first walker
2596 class TreeDumper {
2597 typedef CrushTreeDumper::Item Item;
2598 const CrushWrapper *crush;
c07f9fc5 2599 const CrushTreeDumper::name_map_t& weight_set_names;
7c673cae 2600 public:
c07f9fc5
FG
2601 explicit TreeDumper(const CrushWrapper *crush,
2602 const CrushTreeDumper::name_map_t& wsnames)
2603 : crush(crush), weight_set_names(wsnames) {}
7c673cae
FG
2604
2605 void dump(Formatter *f) {
2606 set<int> roots;
2607 crush->find_roots(roots);
2608 for (set<int>::iterator root = roots.begin(); root != roots.end(); ++root) {
c07f9fc5 2609 dump_item(Item(*root, 0, 0, crush->get_bucket_weightf(*root)), f);
7c673cae
FG
2610 }
2611 }
2612
2613 private:
2614 void dump_item(const Item& qi, Formatter* f) {
2615 if (qi.is_bucket()) {
2616 f->open_object_section("bucket");
c07f9fc5 2617 CrushTreeDumper::dump_item_fields(crush, weight_set_names, qi, f);
7c673cae
FG
2618 dump_bucket_children(qi, f);
2619 f->close_section();
2620 } else {
2621 f->open_object_section("device");
c07f9fc5 2622 CrushTreeDumper::dump_item_fields(crush, weight_set_names, qi, f);
7c673cae
FG
2623 f->close_section();
2624 }
2625 }
2626
2627 void dump_bucket_children(const Item& parent, Formatter* f) {
2628 f->open_array_section("items");
2629 const int max_pos = crush->get_bucket_size(parent.id);
2630 for (int pos = 0; pos < max_pos; pos++) {
2631 int id = crush->get_bucket_item(parent.id, pos);
2632 float weight = crush->get_bucket_item_weightf(parent.id, pos);
c07f9fc5 2633 dump_item(Item(id, parent.id, parent.depth + 1, weight), f);
7c673cae
FG
2634 }
2635 f->close_section();
2636 }
2637 };
2638}
2639
c07f9fc5
FG
2640void CrushWrapper::dump_tree(
2641 Formatter *f,
2642 const CrushTreeDumper::name_map_t& weight_set_names) const
7c673cae
FG
2643{
2644 assert(f);
c07f9fc5 2645 TreeDumper(this, weight_set_names).dump(f);
7c673cae
FG
2646}
2647
2648void CrushWrapper::dump_tunables(Formatter *f) const
2649{
2650 f->dump_int("choose_local_tries", get_choose_local_tries());
2651 f->dump_int("choose_local_fallback_tries", get_choose_local_fallback_tries());
2652 f->dump_int("choose_total_tries", get_choose_total_tries());
2653 f->dump_int("chooseleaf_descend_once", get_chooseleaf_descend_once());
2654 f->dump_int("chooseleaf_vary_r", get_chooseleaf_vary_r());
2655 f->dump_int("chooseleaf_stable", get_chooseleaf_stable());
2656 f->dump_int("straw_calc_version", get_straw_calc_version());
2657 f->dump_int("allowed_bucket_algs", get_allowed_bucket_algs());
2658
2659 // be helpful about it
2660 if (has_jewel_tunables())
2661 f->dump_string("profile", "jewel");
2662 else if (has_hammer_tunables())
2663 f->dump_string("profile", "hammer");
2664 else if (has_firefly_tunables())
2665 f->dump_string("profile", "firefly");
2666 else if (has_bobtail_tunables())
2667 f->dump_string("profile", "bobtail");
2668 else if (has_argonaut_tunables())
2669 f->dump_string("profile", "argonaut");
2670 else
2671 f->dump_string("profile", "unknown");
2672 f->dump_int("optimal_tunables", (int)has_optimal_tunables());
2673 f->dump_int("legacy_tunables", (int)has_legacy_tunables());
2674
2675 // be helpful about minimum version required
2676 f->dump_string("minimum_required_version", get_min_required_version());
2677
2678 f->dump_int("require_feature_tunables", (int)has_nondefault_tunables());
2679 f->dump_int("require_feature_tunables2", (int)has_nondefault_tunables2());
2680 f->dump_int("has_v2_rules", (int)has_v2_rules());
2681 f->dump_int("require_feature_tunables3", (int)has_nondefault_tunables3());
2682 f->dump_int("has_v3_rules", (int)has_v3_rules());
2683 f->dump_int("has_v4_buckets", (int)has_v4_buckets());
2684 f->dump_int("require_feature_tunables5", (int)has_nondefault_tunables5());
2685 f->dump_int("has_v5_rules", (int)has_v5_rules());
2686}
2687
2688void CrushWrapper::dump_choose_args(Formatter *f) const
2689{
2690 f->open_object_section("choose_args");
2691 for (auto c : choose_args) {
2692 crush_choose_arg_map arg_map = c.second;
2693 f->open_array_section(stringify(c.first).c_str());
2694 for (__u32 i = 0; i < arg_map.size; i++) {
2695 crush_choose_arg *arg = &arg_map.args[i];
2696 if (arg->weight_set_size == 0 &&
2697 arg->ids_size == 0)
2698 continue;
2699 f->open_object_section("choose_args");
2700 int bucket_index = i;
2701 f->dump_int("bucket_id", -1-bucket_index);
2702 if (arg->weight_set_size > 0) {
2703 f->open_array_section("weight_set");
2704 for (__u32 j = 0; j < arg->weight_set_size; j++) {
2705 f->open_array_section("weights");
2706 __u32 *weights = arg->weight_set[j].weights;
2707 __u32 size = arg->weight_set[j].size;
2708 for (__u32 k = 0; k < size; k++) {
2709 f->dump_float("weight", (float)weights[k]/(float)0x10000);
2710 }
2711 f->close_section();
2712 }
2713 f->close_section();
2714 }
2715 if (arg->ids_size > 0) {
2716 f->open_array_section("ids");
2717 for (__u32 j = 0; j < arg->ids_size; j++)
2718 f->dump_int("id", arg->ids[j]);
2719 f->close_section();
2720 }
2721 f->close_section();
2722 }
2723 f->close_section();
2724 }
2725 f->close_section();
2726}
2727
2728void CrushWrapper::dump_rules(Formatter *f) const
2729{
2730 for (int i=0; i<get_max_rules(); i++) {
2731 if (!rule_exists(i))
2732 continue;
2733 dump_rule(i, f);
2734 }
2735}
2736
2737void CrushWrapper::dump_rule(int ruleset, Formatter *f) const
2738{
2739 f->open_object_section("rule");
2740 f->dump_int("rule_id", ruleset);
2741 if (get_rule_name(ruleset))
2742 f->dump_string("rule_name", get_rule_name(ruleset));
2743 f->dump_int("ruleset", get_rule_mask_ruleset(ruleset));
2744 f->dump_int("type", get_rule_mask_type(ruleset));
2745 f->dump_int("min_size", get_rule_mask_min_size(ruleset));
2746 f->dump_int("max_size", get_rule_mask_max_size(ruleset));
2747 f->open_array_section("steps");
2748 for (int j=0; j<get_rule_len(ruleset); j++) {
2749 f->open_object_section("step");
2750 switch (get_rule_op(ruleset, j)) {
2751 case CRUSH_RULE_NOOP:
2752 f->dump_string("op", "noop");
2753 break;
2754 case CRUSH_RULE_TAKE:
2755 f->dump_string("op", "take");
2756 {
2757 int item = get_rule_arg1(ruleset, j);
2758 f->dump_int("item", item);
2759
2760 const char *name = get_item_name(item);
2761 f->dump_string("item_name", name ? name : "");
2762 }
2763 break;
2764 case CRUSH_RULE_EMIT:
2765 f->dump_string("op", "emit");
2766 break;
2767 case CRUSH_RULE_CHOOSE_FIRSTN:
2768 f->dump_string("op", "choose_firstn");
2769 f->dump_int("num", get_rule_arg1(ruleset, j));
2770 f->dump_string("type", get_type_name(get_rule_arg2(ruleset, j)));
2771 break;
2772 case CRUSH_RULE_CHOOSE_INDEP:
2773 f->dump_string("op", "choose_indep");
2774 f->dump_int("num", get_rule_arg1(ruleset, j));
2775 f->dump_string("type", get_type_name(get_rule_arg2(ruleset, j)));
2776 break;
2777 case CRUSH_RULE_CHOOSELEAF_FIRSTN:
2778 f->dump_string("op", "chooseleaf_firstn");
2779 f->dump_int("num", get_rule_arg1(ruleset, j));
2780 f->dump_string("type", get_type_name(get_rule_arg2(ruleset, j)));
2781 break;
2782 case CRUSH_RULE_CHOOSELEAF_INDEP:
2783 f->dump_string("op", "chooseleaf_indep");
2784 f->dump_int("num", get_rule_arg1(ruleset, j));
2785 f->dump_string("type", get_type_name(get_rule_arg2(ruleset, j)));
2786 break;
2787 case CRUSH_RULE_SET_CHOOSE_TRIES:
2788 f->dump_string("op", "set_choose_tries");
2789 f->dump_int("num", get_rule_arg1(ruleset, j));
2790 break;
2791 case CRUSH_RULE_SET_CHOOSELEAF_TRIES:
2792 f->dump_string("op", "set_chooseleaf_tries");
2793 f->dump_int("num", get_rule_arg1(ruleset, j));
2794 break;
2795 default:
2796 f->dump_int("opcode", get_rule_op(ruleset, j));
2797 f->dump_int("arg1", get_rule_arg1(ruleset, j));
2798 f->dump_int("arg2", get_rule_arg2(ruleset, j));
2799 }
2800 f->close_section();
2801 }
2802 f->close_section();
2803 f->close_section();
2804}
2805
2806void CrushWrapper::list_rules(Formatter *f) const
2807{
2808 for (int rule = 0; rule < get_max_rules(); rule++) {
2809 if (!rule_exists(rule))
2810 continue;
2811 f->dump_string("name", get_rule_name(rule));
2812 }
2813}
2814
c07f9fc5
FG
2815void CrushWrapper::list_rules(ostream *ss) const
2816{
2817 for (int rule = 0; rule < get_max_rules(); rule++) {
2818 if (!rule_exists(rule))
2819 continue;
2820 *ss << get_rule_name(rule) << "\n";
2821 }
2822}
7c673cae 2823
c07f9fc5
FG
2824class CrushTreePlainDumper : public CrushTreeDumper::Dumper<TextTable> {
2825public:
2826 typedef CrushTreeDumper::Dumper<TextTable> Parent;
2827
2828 explicit CrushTreePlainDumper(const CrushWrapper *crush,
2829 const CrushTreeDumper::name_map_t& wsnames)
2830 : Parent(crush, wsnames) {}
2831 explicit CrushTreePlainDumper(const CrushWrapper *crush,
2832 const CrushTreeDumper::name_map_t& wsnames,
2833 bool show_shadow)
2834 : Parent(crush, wsnames, show_shadow) {}
2835
2836
2837 void dump(TextTable *tbl) {
2838 tbl->define_column("ID", TextTable::LEFT, TextTable::RIGHT);
d2e6a577 2839 tbl->define_column("CLASS", TextTable::LEFT, TextTable::RIGHT);
c07f9fc5
FG
2840 tbl->define_column("WEIGHT", TextTable::LEFT, TextTable::RIGHT);
2841 for (auto& p : crush->choose_args) {
2842 if (p.first == CrushWrapper::DEFAULT_CHOOSE_ARGS) {
2843 tbl->define_column("(compat)", TextTable::LEFT, TextTable::RIGHT);
2844 } else {
2845 string name;
2846 auto q = weight_set_names.find(p.first);
2847 name = q != weight_set_names.end() ? q->second :
2848 stringify(p.first);
2849 tbl->define_column(name.c_str(), TextTable::LEFT, TextTable::RIGHT);
2850 }
2851 }
2852 tbl->define_column("TYPE NAME", TextTable::LEFT, TextTable::LEFT);
2853 Parent::dump(tbl);
7c673cae
FG
2854 }
2855
2856protected:
c07f9fc5 2857 void dump_item(const CrushTreeDumper::Item &qi, TextTable *tbl) override {
d2e6a577
FG
2858 const char *c = crush->get_item_class(qi.id);
2859 if (!c)
2860 c = "";
c07f9fc5 2861 *tbl << qi.id
d2e6a577 2862 << c
c07f9fc5
FG
2863 << weightf_t(qi.weight);
2864 for (auto& p : crush->choose_args) {
2865 if (qi.parent < 0) {
2866 const crush_choose_arg_map cmap = crush->choose_args_get(p.first);
2867 int bidx = -1 - qi.parent;
2868 const crush_bucket *b = crush->get_bucket(qi.parent);
2869 if (b &&
2870 bidx < (int)cmap.size &&
2871 cmap.args[bidx].weight_set &&
2872 cmap.args[bidx].weight_set_size >= 1) {
2873 int pos;
2874 for (pos = 0;
2875 pos < (int)cmap.args[bidx].weight_set[0].size &&
2876 b->items[pos] != qi.id;
2877 ++pos) ;
2878 *tbl << weightf_t((float)cmap.args[bidx].weight_set[0].weights[pos] /
2879 (float)0x10000);
2880 continue;
2881 }
2882 }
2883 *tbl << "";
7c673cae 2884 }
c07f9fc5
FG
2885 ostringstream ss;
2886 for (int k=0; k < qi.depth; k++) {
2887 ss << " ";
7c673cae 2888 }
c07f9fc5
FG
2889 if (qi.is_bucket()) {
2890 ss << crush->get_type_name(crush->get_bucket_type(qi.id)) << " "
2891 << crush->get_item_name(qi.id);
2892 } else {
2893 ss << "osd." << qi.id;
2894 }
2895 *tbl << ss.str();
2896 *tbl << TextTable::endrow;
7c673cae
FG
2897 }
2898};
2899
2900
2901class CrushTreeFormattingDumper : public CrushTreeDumper::FormattingDumper {
2902public:
2903 typedef CrushTreeDumper::FormattingDumper Parent;
2904
c07f9fc5
FG
2905 explicit CrushTreeFormattingDumper(
2906 const CrushWrapper *crush,
2907 const CrushTreeDumper::name_map_t& wsnames)
2908 : Parent(crush, wsnames) {}
2909
2910 explicit CrushTreeFormattingDumper(
2911 const CrushWrapper *crush,
2912 const CrushTreeDumper::name_map_t& wsnames,
2913 bool show_shadow)
2914 : Parent(crush, wsnames, show_shadow) {}
7c673cae
FG
2915
2916 void dump(Formatter *f) {
2917 f->open_array_section("nodes");
2918 Parent::dump(f);
2919 f->close_section();
2920 f->open_array_section("stray");
2921 f->close_section();
2922 }
2923};
2924
2925
c07f9fc5
FG
2926void CrushWrapper::dump_tree(
2927 ostream *out,
2928 Formatter *f,
2929 const CrushTreeDumper::name_map_t& weight_set_names,
2930 bool show_shadow) const
7c673cae 2931{
c07f9fc5
FG
2932 if (out) {
2933 TextTable tbl;
2934 CrushTreePlainDumper(this, weight_set_names, show_shadow).dump(&tbl);
2935 *out << tbl;
2936 }
2937 if (f) {
2938 CrushTreeFormattingDumper(this, weight_set_names, show_shadow).dump(f);
2939 }
7c673cae
FG
2940}
2941
2942void CrushWrapper::generate_test_instances(list<CrushWrapper*>& o)
2943{
2944 o.push_back(new CrushWrapper);
2945 // fixme
2946}
2947
7c673cae
FG
2948/**
2949 * Determine the default CRUSH ruleset ID to be used with
2950 * newly created replicated pools.
2951 *
2952 * @returns a ruleset ID (>=0) or -1 if no suitable ruleset found
2953 */
2954int CrushWrapper::get_osd_pool_default_crush_replicated_ruleset(CephContext *cct)
2955{
31f18b77
FG
2956 int crush_ruleset = cct->_conf->osd_pool_default_crush_rule;
2957 if (crush_ruleset < 0) {
7c673cae
FG
2958 crush_ruleset = find_first_ruleset(pg_pool_t::TYPE_REPLICATED);
2959 } else if (!ruleset_exists(crush_ruleset)) {
2960 crush_ruleset = -1; // match find_first_ruleset() retval
2961 }
7c673cae
FG
2962 return crush_ruleset;
2963}
2964
2965bool CrushWrapper::is_valid_crush_name(const string& s)
2966{
2967 if (s.empty())
2968 return false;
2969 for (string::const_iterator p = s.begin(); p != s.end(); ++p) {
2970 if (!(*p == '-') &&
2971 !(*p == '_') &&
2972 !(*p == '.') &&
2973 !(*p >= '0' && *p <= '9') &&
2974 !(*p >= 'A' && *p <= 'Z') &&
2975 !(*p >= 'a' && *p <= 'z'))
2976 return false;
2977 }
2978 return true;
2979}
2980
2981bool CrushWrapper::is_valid_crush_loc(CephContext *cct,
2982 const map<string,string>& loc)
2983{
2984 for (map<string,string>::const_iterator l = loc.begin(); l != loc.end(); ++l) {
2985 if (!is_valid_crush_name(l->first) ||
2986 !is_valid_crush_name(l->second)) {
2987 ldout(cct, 1) << "loc["
2988 << l->first << "] = '"
2989 << l->second << "' not a valid crush name ([A-Za-z0-9_-.]+)"
2990 << dendl;
2991 return false;
2992 }
2993 }
2994 return true;
2995}
2996
2997int CrushWrapper::_choose_type_stack(
2998 CephContext *cct,
2999 const vector<pair<int,int>>& stack,
3000 const set<int>& overfull,
3001 const vector<int>& underfull,
3002 const vector<int>& orig,
3003 vector<int>::const_iterator& i,
3004 set<int>& used,
3005 vector<int> *pw) const
3006{
3007 vector<int> w = *pw;
3008 vector<int> o;
3009
3010 ldout(cct, 10) << __func__ << " stack " << stack
3011 << " orig " << orig
3012 << " at " << *i
3013 << " pw " << *pw
3014 << dendl;
3015
3016 vector<int> cumulative_fanout(stack.size());
3017 int f = 1;
3018 for (int j = (int)stack.size() - 1; j >= 0; --j) {
3019 cumulative_fanout[j] = f;
3020 f *= stack[j].second;
3021 }
3022 ldout(cct, 10) << __func__ << " cumulative_fanout " << cumulative_fanout
3023 << dendl;
3024
31f18b77
FG
3025 // identify underful targets for each intermediate level.
3026 // this serves two purposes:
3027 // 1. we can tell when we are selecting a bucket that does not have any underfull
3028 // devices beneath it. that means that if the current input includes an overfull
3029 // device, we won't be able to find an underfull device with this parent to
3030 // swap for it.
3031 // 2. when we decide we should reject a bucket due to the above, this list gives us
3032 // a list of peers to consider that *do* have underfull devices available.. (we
3033 // are careful to pick one that has the same parent.)
3034 vector<set<int>> underfull_buckets; // level -> set of buckets with >0 underfull item(s)
3035 underfull_buckets.resize(stack.size() - 1);
3036 for (auto osd : underfull) {
3037 int item = osd;
3038 for (int j = (int)stack.size() - 2; j >= 0; --j) {
3039 int type = stack[j].first;
3040 item = get_parent_of_type(item, type);
3041 ldout(cct, 10) << __func__ << " underfull " << osd << " type " << type
3042 << " is " << item << dendl;
3043 underfull_buckets[j].insert(item);
3044 }
3045 }
3046 ldout(cct, 20) << __func__ << " underfull_buckets " << underfull_buckets << dendl;
3047
7c673cae
FG
3048 for (unsigned j = 0; j < stack.size(); ++j) {
3049 int type = stack[j].first;
3050 int fanout = stack[j].second;
3051 int cum_fanout = cumulative_fanout[j];
3052 ldout(cct, 10) << " level " << j << ": type " << type << " fanout " << fanout
3053 << " cumulative " << cum_fanout
3054 << " w " << w << dendl;
3055 vector<int> o;
3056 auto tmpi = i;
3057 for (auto from : w) {
3058 ldout(cct, 10) << " from " << from << dendl;
31f18b77
FG
3059 // identify leaves under each choice. we use this to check whether any of these
3060 // leaves are overfull. (if so, we need to make sure there are underfull candidates
3061 // to swap for them.)
3062 vector<set<int>> leaves;
3063 leaves.resize(fanout);
7c673cae
FG
3064 for (int pos = 0; pos < fanout; ++pos) {
3065 if (type > 0) {
3066 // non-leaf
31f18b77 3067 int item = get_parent_of_type(*tmpi, type);
7c673cae 3068 o.push_back(item);
7c673cae 3069 int n = cum_fanout;
31f18b77
FG
3070 while (n-- && tmpi != orig.end()) {
3071 leaves[pos].insert(*tmpi++);
3072 }
3073 ldout(cct, 10) << __func__ << " from " << *tmpi << " got " << item
3074 << " of type " << type << " over leaves " << leaves[pos] << dendl;
7c673cae
FG
3075 } else {
3076 // leaf
3077 bool replaced = false;
3078 if (overfull.count(*i)) {
3079 for (auto item : underfull) {
3080 ldout(cct, 10) << __func__ << " pos " << pos
3081 << " was " << *i << " considering " << item
3082 << dendl;
3083 if (used.count(item)) {
3084 ldout(cct, 20) << __func__ << " in used " << used << dendl;
3085 continue;
3086 }
3087 if (!subtree_contains(from, item)) {
3088 ldout(cct, 20) << __func__ << " not in subtree " << from << dendl;
3089 continue;
3090 }
3091 if (std::find(orig.begin(), orig.end(), item) != orig.end()) {
3092 ldout(cct, 20) << __func__ << " in orig " << orig << dendl;
3093 continue;
3094 }
3095 o.push_back(item);
3096 used.insert(item);
3097 ldout(cct, 10) << __func__ << " pos " << pos << " replace "
3098 << *i << " -> " << item << dendl;
3099 replaced = true;
3100 ++i;
3101 break;
3102 }
3103 }
3104 if (!replaced) {
3105 ldout(cct, 10) << __func__ << " pos " << pos << " keep " << *i
3106 << dendl;
3107 o.push_back(*i);
3108 ++i;
3109 }
3110 if (i == orig.end()) {
3111 ldout(cct, 10) << __func__ << " end of orig, break 1" << dendl;
3112 break;
3113 }
3114 }
3115 }
31f18b77
FG
3116 if (j + 1 < stack.size()) {
3117 // check if any buckets have overfull leaves but no underfull candidates
3118 for (int pos = 0; pos < fanout; ++pos) {
3119 if (underfull_buckets[j].count(o[pos]) == 0) {
3120 // are any leaves overfull?
3121 bool any_overfull = false;
3122 for (auto osd : leaves[pos]) {
3123 if (overfull.count(osd)) {
3124 any_overfull = true;
3125 }
3126 }
3127 if (any_overfull) {
3128 ldout(cct, 10) << " bucket " << o[pos] << " has no underfull targets and "
3129 << ">0 leaves " << leaves[pos] << " is overfull; alts "
3130 << underfull_buckets[j]
3131 << dendl;
3132 for (auto alt : underfull_buckets[j]) {
3133 if (std::find(o.begin(), o.end(), alt) == o.end()) {
3134 // see if alt has the same parent
3135 if (j == 0 ||
3136 get_parent_of_type(o[pos], stack[j-1].first) ==
3137 get_parent_of_type(alt, stack[j-1].first)) {
3138 if (j)
3139 ldout(cct, 10) << " replacing " << o[pos]
3140 << " (which has no underfull leaves) with " << alt
3141 << " (same parent "
3142 << get_parent_of_type(alt, stack[j-1].first) << " type "
3143 << type << ")" << dendl;
3144 else
3145 ldout(cct, 10) << " replacing " << o[pos]
3146 << " (which has no underfull leaves) with " << alt
3147 << " (first level)" << dendl;
3148 o[pos] = alt;
3149 break;
3150 } else {
3151 ldout(cct, 30) << " alt " << alt << " for " << o[pos]
3152 << " has different parent, skipping" << dendl;
3153 }
3154 }
3155 }
3156 }
3157 }
3158 }
3159 }
7c673cae
FG
3160 if (i == orig.end()) {
3161 ldout(cct, 10) << __func__ << " end of orig, break 2" << dendl;
3162 break;
3163 }
3164 }
3165 ldout(cct, 10) << __func__ << " w <- " << o << " was " << w << dendl;
3166 w.swap(o);
3167 }
3168 *pw = w;
3169 return 0;
3170}
3171
3172int CrushWrapper::try_remap_rule(
3173 CephContext *cct,
3174 int ruleno,
3175 int maxout,
3176 const set<int>& overfull,
3177 const vector<int>& underfull,
3178 const vector<int>& orig,
3179 vector<int> *out) const
3180{
3181 const crush_map *map = crush;
3182 const crush_rule *rule = get_rule(ruleno);
3183 assert(rule);
3184
3185 ldout(cct, 10) << __func__ << " ruleno " << ruleno
3186 << " numrep " << maxout << " overfull " << overfull
3187 << " underfull " << underfull << " orig " << orig
3188 << dendl;
3189 vector<int> w; // working set
3190 out->clear();
3191
3192 auto i = orig.begin();
3193 set<int> used;
3194
3195 vector<pair<int,int>> type_stack; // (type, fan-out)
3196
3197 for (unsigned step = 0; step < rule->len; ++step) {
3198 const crush_rule_step *curstep = &rule->steps[step];
3199 ldout(cct, 10) << __func__ << " step " << step << " w " << w << dendl;
3200 switch (curstep->op) {
3201 case CRUSH_RULE_TAKE:
3202 if ((curstep->arg1 >= 0 && curstep->arg1 < map->max_devices) ||
3203 (-1-curstep->arg1 >= 0 && -1-curstep->arg1 < map->max_buckets &&
3204 map->buckets[-1-curstep->arg1])) {
3205 w.clear();
3206 w.push_back(curstep->arg1);
3207 ldout(cct, 10) << __func__ << " take " << w << dendl;
3208 } else {
3209 ldout(cct, 1) << " bad take value " << curstep->arg1 << dendl;
3210 }
3211 break;
3212
3213 case CRUSH_RULE_CHOOSELEAF_FIRSTN:
3214 case CRUSH_RULE_CHOOSELEAF_INDEP:
3215 {
3216 int numrep = curstep->arg1;
3217 int type = curstep->arg2;
3218 if (numrep <= 0)
3219 numrep += maxout;
3220 type_stack.push_back(make_pair(type, numrep));
3221 type_stack.push_back(make_pair(0, 1));
3222 int r = _choose_type_stack(cct, type_stack, overfull, underfull, orig,
3223 i, used, &w);
3224 if (r < 0)
3225 return r;
3226 type_stack.clear();
3227 }
3228 break;
3229
3230 case CRUSH_RULE_CHOOSE_FIRSTN:
3231 case CRUSH_RULE_CHOOSE_INDEP:
3232 {
3233 int numrep = curstep->arg1;
3234 int type = curstep->arg2;
3235 if (numrep <= 0)
3236 numrep += maxout;
3237 type_stack.push_back(make_pair(type, numrep));
3238 }
3239 break;
3240
3241 case CRUSH_RULE_EMIT:
3242 ldout(cct, 10) << " emit " << w << dendl;
3243 if (!type_stack.empty()) {
3244 int r = _choose_type_stack(cct, type_stack, overfull, underfull, orig,
3245 i, used, &w);
3246 if (r < 0)
3247 return r;
3248 type_stack.clear();
3249 }
3250 for (auto item : w) {
3251 out->push_back(item);
3252 }
3253 w.clear();
3254 break;
3255
3256 default:
3257 // ignore
3258 break;
3259 }
3260 }
3261
3262 return 0;
3263}
c07f9fc5
FG
3264
3265
3266int CrushWrapper::_choose_args_adjust_item_weight_in_bucket(
3267 CephContext *cct,
3268 crush_choose_arg_map cmap,
3269 int bucketid,
3270 int id,
3271 const vector<int>& weight,
3272 ostream *ss)
3273{
3274 int changed = 0;
3275 int bidx = -1 - bucketid;
3276 crush_bucket *b = crush->buckets[bidx];
3277 if (bidx >= (int)cmap.size) {
3278 if (ss)
3279 *ss << "no weight-set for bucket " << b->id;
3280 ldout(cct, 10) << __func__ << " no crush_choose_arg for bucket " << b->id
3281 << dendl;
3282 return 0;
3283 }
3284 crush_choose_arg *carg = &cmap.args[bidx];
3285 if (carg->weight_set == NULL) {
3286 if (ss)
3287 *ss << "no weight-set for bucket " << b->id;
3288 ldout(cct, 10) << __func__ << " no weight_set for bucket " << b->id
3289 << dendl;
3290 return 0;
3291 }
3292 if (carg->weight_set_size != weight.size()) {
3293 if (ss)
3294 *ss << "weight_set_size != " << weight.size() << " for bucket " << b->id;
3295 ldout(cct, 10) << __func__ << " weight_set_size != " << weight.size()
3296 << " for bucket " << b->id << dendl;
3297 return 0;
3298 }
3299 for (unsigned i = 0; i < b->size; i++) {
3300 if (b->items[i] == id) {
3301 for (unsigned j = 0; j < weight.size(); ++j) {
3302 carg->weight_set[j].weights[i] = weight[j];
3303 }
3304 ldout(cct, 5) << __func__ << " set " << id << " to " << weight
3305 << " in bucket " << b->id << dendl;
3306 changed++;
3307 }
3308 }
3309 if (changed) {
3310 vector<int> bucket_weight(weight.size(), 0);
3311 for (unsigned i = 0; i < b->size; i++) {
3312 for (unsigned j = 0; j < weight.size(); ++j) {
3313 bucket_weight[j] += carg->weight_set[j].weights[i];
3314 }
3315 }
3316 choose_args_adjust_item_weight(cct, cmap, b->id, bucket_weight, nullptr);
3317 }
3318 return changed;
3319}
3320
3321int CrushWrapper::choose_args_adjust_item_weight(
3322 CephContext *cct,
3323 crush_choose_arg_map cmap,
3324 int id,
3325 const vector<int>& weight,
3326 ostream *ss)
3327{
3328 ldout(cct, 5) << __func__ << " " << id << " weight " << weight << dendl;
3329 int changed = 0;
3330 for (int bidx = 0; bidx < crush->max_buckets; bidx++) {
3331 crush_bucket *b = crush->buckets[bidx];
3332 if (b == nullptr) {
3333 continue;
3334 }
3335 changed += _choose_args_adjust_item_weight_in_bucket(
3336 cct, cmap, b->id, id, weight, ss);
3337 }
3338 if (!changed) {
3339 if (ss)
3340 *ss << "item " << id << " not found in crush map";
3341 return -ENOENT;
3342 }
3343 return changed;
3344}