]> git.proxmox.com Git - ceph.git/blob - ceph/src/crush/CrushWrapper.cc
import new upstream nautilus stable release 14.2.8
[ceph.git] / ceph / src / crush / CrushWrapper.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include "osd/osd_types.h"
5 #include "common/debug.h"
6 #include "common/Formatter.h"
7 #include "common/errno.h"
8 #include "common/TextTable.h"
9 #include "include/stringify.h"
10
11 #include "CrushWrapper.h"
12 #include "CrushTreeDumper.h"
13
14 #define dout_subsys ceph_subsys_crush
15
16 bool CrushWrapper::has_legacy_rule_ids() const
17 {
18 for (unsigned i=0; i<crush->max_rules; i++) {
19 crush_rule *r = crush->rules[i];
20 if (r &&
21 r->mask.ruleset != i) {
22 return true;
23 }
24 }
25 return false;
26 }
27
28 std::map<int, int> CrushWrapper::renumber_rules()
29 {
30 std::map<int, int> result;
31 for (unsigned i=0; i<crush->max_rules; i++) {
32 crush_rule *r = crush->rules[i];
33 if (r && r->mask.ruleset != i) {
34 result[r->mask.ruleset] = i;
35 r->mask.ruleset = i;
36 }
37 }
38 return result;
39 }
40
41 bool CrushWrapper::has_non_straw2_buckets() const
42 {
43 for (int i=0; i<crush->max_buckets; ++i) {
44 crush_bucket *b = crush->buckets[i];
45 if (!b)
46 continue;
47 if (b->alg != CRUSH_BUCKET_STRAW2)
48 return true;
49 }
50 return false;
51 }
52
53 bool CrushWrapper::has_v2_rules() const
54 {
55 for (unsigned i=0; i<crush->max_rules; i++) {
56 if (is_v2_rule(i)) {
57 return true;
58 }
59 }
60 return false;
61 }
62
63 bool CrushWrapper::is_v2_rule(unsigned ruleid) const
64 {
65 // check rule for use of indep or new SET_* rule steps
66 if (ruleid >= crush->max_rules)
67 return false;
68 crush_rule *r = crush->rules[ruleid];
69 if (!r)
70 return false;
71 for (unsigned j=0; j<r->len; j++) {
72 if (r->steps[j].op == CRUSH_RULE_CHOOSE_INDEP ||
73 r->steps[j].op == CRUSH_RULE_CHOOSELEAF_INDEP ||
74 r->steps[j].op == CRUSH_RULE_SET_CHOOSE_TRIES ||
75 r->steps[j].op == CRUSH_RULE_SET_CHOOSELEAF_TRIES) {
76 return true;
77 }
78 }
79 return false;
80 }
81
82 bool CrushWrapper::has_v3_rules() const
83 {
84 for (unsigned i=0; i<crush->max_rules; i++) {
85 if (is_v3_rule(i)) {
86 return true;
87 }
88 }
89 return false;
90 }
91
92 bool CrushWrapper::is_v3_rule(unsigned ruleid) const
93 {
94 // check rule for use of SET_CHOOSELEAF_VARY_R step
95 if (ruleid >= crush->max_rules)
96 return false;
97 crush_rule *r = crush->rules[ruleid];
98 if (!r)
99 return false;
100 for (unsigned j=0; j<r->len; j++) {
101 if (r->steps[j].op == CRUSH_RULE_SET_CHOOSELEAF_VARY_R) {
102 return true;
103 }
104 }
105 return false;
106 }
107
108 bool CrushWrapper::has_v4_buckets() const
109 {
110 for (int i=0; i<crush->max_buckets; ++i) {
111 crush_bucket *b = crush->buckets[i];
112 if (!b)
113 continue;
114 if (b->alg == CRUSH_BUCKET_STRAW2)
115 return true;
116 }
117 return false;
118 }
119
120 bool CrushWrapper::has_v5_rules() const
121 {
122 for (unsigned i=0; i<crush->max_rules; i++) {
123 if (is_v5_rule(i)) {
124 return true;
125 }
126 }
127 return false;
128 }
129
130 bool CrushWrapper::is_v5_rule(unsigned ruleid) const
131 {
132 // check rule for use of SET_CHOOSELEAF_STABLE step
133 if (ruleid >= crush->max_rules)
134 return false;
135 crush_rule *r = crush->rules[ruleid];
136 if (!r)
137 return false;
138 for (unsigned j=0; j<r->len; j++) {
139 if (r->steps[j].op == CRUSH_RULE_SET_CHOOSELEAF_STABLE) {
140 return true;
141 }
142 }
143 return false;
144 }
145
146 bool CrushWrapper::has_choose_args() const
147 {
148 return !choose_args.empty();
149 }
150
151 bool CrushWrapper::has_incompat_choose_args() const
152 {
153 if (choose_args.empty())
154 return false;
155 if (choose_args.size() > 1)
156 return true;
157 if (choose_args.begin()->first != DEFAULT_CHOOSE_ARGS)
158 return true;
159 crush_choose_arg_map arg_map = choose_args.begin()->second;
160 for (__u32 i = 0; i < arg_map.size; i++) {
161 crush_choose_arg *arg = &arg_map.args[i];
162 if (arg->weight_set_positions == 0 &&
163 arg->ids_size == 0)
164 continue;
165 if (arg->weight_set_positions != 1)
166 return true;
167 if (arg->ids_size != 0)
168 return true;
169 }
170 return false;
171 }
172
173 int CrushWrapper::split_id_class(int i, int *idout, int *classout) const
174 {
175 if (!item_exists(i))
176 return -EINVAL;
177 string name = get_item_name(i);
178 size_t pos = name.find("~");
179 if (pos == string::npos) {
180 *idout = i;
181 *classout = -1;
182 return 0;
183 }
184 string name_no_class = name.substr(0, pos);
185 if (!name_exists(name_no_class))
186 return -ENOENT;
187 string class_name = name.substr(pos + 1);
188 if (!class_exists(class_name))
189 return -ENOENT;
190 *idout = get_item_id(name_no_class);
191 *classout = get_class_id(class_name);
192 return 0;
193 }
194
195 int CrushWrapper::can_rename_item(const string& srcname,
196 const string& dstname,
197 ostream *ss) const
198 {
199 if (name_exists(srcname)) {
200 if (name_exists(dstname)) {
201 *ss << "dstname = '" << dstname << "' already exists";
202 return -EEXIST;
203 }
204 if (is_valid_crush_name(dstname)) {
205 return 0;
206 } else {
207 *ss << "dstname = '" << dstname << "' does not match [-_.0-9a-zA-Z]+";
208 return -EINVAL;
209 }
210 } else {
211 if (name_exists(dstname)) {
212 *ss << "srcname = '" << srcname << "' does not exist "
213 << "and dstname = '" << dstname << "' already exists";
214 return -EALREADY;
215 } else {
216 *ss << "srcname = '" << srcname << "' does not exist";
217 return -ENOENT;
218 }
219 }
220 }
221
222 int CrushWrapper::rename_item(const string& srcname,
223 const string& dstname,
224 ostream *ss)
225 {
226 int ret = can_rename_item(srcname, dstname, ss);
227 if (ret < 0)
228 return ret;
229 int oldid = get_item_id(srcname);
230 return set_item_name(oldid, dstname);
231 }
232
233 int CrushWrapper::can_rename_bucket(const string& srcname,
234 const string& dstname,
235 ostream *ss) const
236 {
237 int ret = can_rename_item(srcname, dstname, ss);
238 if (ret)
239 return ret;
240 int srcid = get_item_id(srcname);
241 if (srcid >= 0) {
242 *ss << "srcname = '" << srcname << "' is not a bucket "
243 << "because its id = " << srcid << " is >= 0";
244 return -ENOTDIR;
245 }
246 return 0;
247 }
248
249 int CrushWrapper::rename_bucket(const string& srcname,
250 const string& dstname,
251 ostream *ss)
252 {
253 int ret = can_rename_bucket(srcname, dstname, ss);
254 if (ret < 0)
255 return ret;
256 int oldid = get_item_id(srcname);
257 return set_item_name(oldid, dstname);
258 }
259
260 int CrushWrapper::rename_rule(const string& srcname,
261 const string& dstname,
262 ostream *ss)
263 {
264 if (!rule_exists(srcname)) {
265 if (ss) {
266 *ss << "source rule name '" << srcname << "' does not exist";
267 }
268 return -ENOENT;
269 }
270 if (rule_exists(dstname)) {
271 if (ss) {
272 *ss << "destination rule name '" << dstname << "' already exists";
273 }
274 return -EEXIST;
275 }
276 int rule_id = get_rule_id(srcname);
277 auto it = rule_name_map.find(rule_id);
278 ceph_assert(it != rule_name_map.end());
279 it->second = dstname;
280 if (have_rmaps) {
281 rule_name_rmap.erase(srcname);
282 rule_name_rmap[dstname] = rule_id;
283 }
284 return 0;
285 }
286
287 void CrushWrapper::find_takes(set<int> *roots) const
288 {
289 for (unsigned i=0; i<crush->max_rules; i++) {
290 crush_rule *r = crush->rules[i];
291 if (!r)
292 continue;
293 for (unsigned j=0; j<r->len; j++) {
294 if (r->steps[j].op == CRUSH_RULE_TAKE)
295 roots->insert(r->steps[j].arg1);
296 }
297 }
298 }
299
300 void CrushWrapper::find_takes_by_rule(int rule, set<int> *roots) const
301 {
302 if (rule < 0 || rule >= (int)crush->max_rules)
303 return;
304 crush_rule *r = crush->rules[rule];
305 if (!r)
306 return;
307 for (unsigned i = 0; i < r->len; i++) {
308 if (r->steps[i].op == CRUSH_RULE_TAKE)
309 roots->insert(r->steps[i].arg1);
310 }
311 }
312
313 void CrushWrapper::find_roots(set<int> *roots) const
314 {
315 for (int i = 0; i < crush->max_buckets; i++) {
316 if (!crush->buckets[i])
317 continue;
318 crush_bucket *b = crush->buckets[i];
319 if (!_search_item_exists(b->id))
320 roots->insert(b->id);
321 }
322 }
323
324 bool CrushWrapper::subtree_contains(int root, int item) const
325 {
326 if (root == item)
327 return true;
328
329 if (root >= 0)
330 return false; // root is a leaf
331
332 const crush_bucket *b = get_bucket(root);
333 if (IS_ERR(b))
334 return false;
335
336 for (unsigned j=0; j<b->size; j++) {
337 if (subtree_contains(b->items[j], item))
338 return true;
339 }
340 return false;
341 }
342
343 bool CrushWrapper::_maybe_remove_last_instance(CephContext *cct, int item, bool unlink_only)
344 {
345 // last instance?
346 if (_search_item_exists(item)) {
347 return false;
348 }
349 if (item < 0 && _bucket_is_in_use(item)) {
350 return false;
351 }
352
353 if (item < 0 && !unlink_only) {
354 crush_bucket *t = get_bucket(item);
355 ldout(cct, 5) << "_maybe_remove_last_instance removing bucket " << item << dendl;
356 crush_remove_bucket(crush, t);
357 if (class_bucket.count(item) != 0)
358 class_bucket.erase(item);
359 class_remove_item(item);
360 update_choose_args(cct);
361 }
362 if ((item >= 0 || !unlink_only) && name_map.count(item)) {
363 ldout(cct, 5) << "_maybe_remove_last_instance removing name for item " << item << dendl;
364 name_map.erase(item);
365 have_rmaps = false;
366 if (item >= 0 && !unlink_only) {
367 class_remove_item(item);
368 }
369 }
370 rebuild_roots_with_classes(cct);
371 return true;
372 }
373
374 int CrushWrapper::remove_root(CephContext *cct, int item)
375 {
376 crush_bucket *b = get_bucket(item);
377 if (IS_ERR(b)) {
378 // should be idempotent
379 // e.g.: we use 'crush link' to link same host into
380 // different roots, which as a result can cause different
381 // shadow trees reference same hosts too. This means
382 // we may need to destory the same buckets(hosts, racks, etc.)
383 // multiple times during rebuilding all shadow trees.
384 return 0;
385 }
386
387 for (unsigned n = 0; n < b->size; n++) {
388 if (b->items[n] >= 0)
389 continue;
390 int r = remove_root(cct, b->items[n]);
391 if (r < 0)
392 return r;
393 }
394
395 crush_remove_bucket(crush, b);
396 if (name_map.count(item) != 0) {
397 name_map.erase(item);
398 have_rmaps = false;
399 }
400 if (class_bucket.count(item) != 0)
401 class_bucket.erase(item);
402 class_remove_item(item);
403 update_choose_args(cct);
404 return 0;
405 }
406
407 void CrushWrapper::update_choose_args(CephContext *cct)
408 {
409 for (auto& i : choose_args) {
410 crush_choose_arg_map &arg_map = i.second;
411 assert(arg_map.size == (unsigned)crush->max_buckets);
412 unsigned positions = get_choose_args_positions(arg_map);
413 for (int j = 0; j < crush->max_buckets; ++j) {
414 crush_bucket *b = crush->buckets[j];
415 assert(j < (int)arg_map.size);
416 auto& carg = arg_map.args[j];
417 // strip out choose_args for any buckets that no longer exist
418 if (!b || b->alg != CRUSH_BUCKET_STRAW2) {
419 if (carg.ids) {
420 if (cct)
421 ldout(cct,10) << __func__ << " removing " << i.first << " bucket "
422 << (-1-j) << " ids" << dendl;
423 free(carg.ids);
424 carg.ids = 0;
425 carg.ids_size = 0;
426 }
427 if (carg.weight_set) {
428 if (cct)
429 ldout(cct,10) << __func__ << " removing " << i.first << " bucket "
430 << (-1-j) << " weight_sets" << dendl;
431 for (unsigned p = 0; p < carg.weight_set_positions; ++p) {
432 free(carg.weight_set[p].weights);
433 }
434 free(carg.weight_set);
435 carg.weight_set = 0;
436 carg.weight_set_positions = 0;
437 }
438 continue;
439 }
440 if (carg.weight_set_positions == 0) {
441 continue; // skip it
442 }
443 if (carg.weight_set_positions != positions) {
444 if (cct)
445 lderr(cct) << __func__ << " " << i.first << " bucket "
446 << (-1-j) << " positions " << carg.weight_set_positions
447 << " -> " << positions << dendl;
448 continue; // wth... skip!
449 }
450 // mis-sized weight_sets? this shouldn't ever happen.
451 for (unsigned p = 0; p < positions; ++p) {
452 if (carg.weight_set[p].size != b->size) {
453 if (cct)
454 lderr(cct) << __func__ << " fixing " << i.first << " bucket "
455 << (-1-j) << " position " << p
456 << " size " << carg.weight_set[p].size << " -> "
457 << b->size << dendl;
458 auto old_ws = carg.weight_set[p];
459 carg.weight_set[p].size = b->size;
460 carg.weight_set[p].weights = (__u32*)calloc(b->size, sizeof(__u32));
461 auto max = std::min<unsigned>(old_ws.size, b->size);
462 for (unsigned k = 0; k < max; ++k) {
463 carg.weight_set[p].weights[k] = old_ws.weights[k];
464 }
465 free(old_ws.weights);
466 }
467 }
468 }
469 }
470 }
471
472 int CrushWrapper::remove_item(CephContext *cct, int item, bool unlink_only)
473 {
474 ldout(cct, 5) << "remove_item " << item
475 << (unlink_only ? " unlink_only":"") << dendl;
476
477 int ret = -ENOENT;
478
479 if (item < 0 && !unlink_only) {
480 crush_bucket *t = get_bucket(item);
481 if (IS_ERR(t)) {
482 ldout(cct, 1) << "remove_item bucket " << item << " does not exist"
483 << dendl;
484 return -ENOENT;
485 }
486
487 if (t->size) {
488 ldout(cct, 1) << "remove_item bucket " << item << " has " << t->size
489 << " items, not empty" << dendl;
490 return -ENOTEMPTY;
491 }
492 if (_bucket_is_in_use(item)) {
493 return -EBUSY;
494 }
495 }
496
497 for (int i = 0; i < crush->max_buckets; i++) {
498 if (!crush->buckets[i])
499 continue;
500 crush_bucket *b = crush->buckets[i];
501
502 for (unsigned i=0; i<b->size; ++i) {
503 int id = b->items[i];
504 if (id == item) {
505 ldout(cct, 5) << "remove_item removing item " << item
506 << " from bucket " << b->id << dendl;
507 adjust_item_weight_in_bucket(cct, item, 0, b->id, true);
508 bucket_remove_item(b, item);
509 ret = 0;
510 }
511 }
512 }
513
514 if (_maybe_remove_last_instance(cct, item, unlink_only))
515 ret = 0;
516
517 return ret;
518 }
519
520 bool CrushWrapper::_search_item_exists(int item) const
521 {
522 for (int i = 0; i < crush->max_buckets; i++) {
523 if (!crush->buckets[i])
524 continue;
525 crush_bucket *b = crush->buckets[i];
526 for (unsigned j=0; j<b->size; ++j) {
527 if (b->items[j] == item)
528 return true;
529 }
530 }
531 return false;
532 }
533
534 bool CrushWrapper::_bucket_is_in_use(int item)
535 {
536 for (auto &i : class_bucket)
537 for (auto &j : i.second)
538 if (j.second == item)
539 return true;
540 for (unsigned i = 0; i < crush->max_rules; ++i) {
541 crush_rule *r = crush->rules[i];
542 if (!r)
543 continue;
544 for (unsigned j = 0; j < r->len; ++j) {
545 if (r->steps[j].op == CRUSH_RULE_TAKE) {
546 int step_item = r->steps[j].arg1;
547 int original_item;
548 int c;
549 int res = split_id_class(step_item, &original_item, &c);
550 if (res < 0)
551 return false;
552 if (step_item == item || original_item == item)
553 return true;
554 }
555 }
556 }
557 return false;
558 }
559
560 int CrushWrapper::_remove_item_under(
561 CephContext *cct, int item, int ancestor, bool unlink_only)
562 {
563 ldout(cct, 5) << "_remove_item_under " << item << " under " << ancestor
564 << (unlink_only ? " unlink_only":"") << dendl;
565
566 if (ancestor >= 0) {
567 return -EINVAL;
568 }
569
570 if (!bucket_exists(ancestor))
571 return -EINVAL;
572
573 int ret = -ENOENT;
574
575 crush_bucket *b = get_bucket(ancestor);
576 for (unsigned i=0; i<b->size; ++i) {
577 int id = b->items[i];
578 if (id == item) {
579 ldout(cct, 5) << "_remove_item_under removing item " << item
580 << " from bucket " << b->id << dendl;
581 adjust_item_weight_in_bucket(cct, item, 0, b->id, true);
582 bucket_remove_item(b, item);
583 ret = 0;
584 } else if (id < 0) {
585 int r = remove_item_under(cct, item, id, unlink_only);
586 if (r == 0)
587 ret = 0;
588 }
589 }
590 return ret;
591 }
592
593 int CrushWrapper::remove_item_under(
594 CephContext *cct, int item, int ancestor, bool unlink_only)
595 {
596 ldout(cct, 5) << "remove_item_under " << item << " under " << ancestor
597 << (unlink_only ? " unlink_only":"") << dendl;
598
599 if (!unlink_only && _bucket_is_in_use(item)) {
600 return -EBUSY;
601 }
602
603 int ret = _remove_item_under(cct, item, ancestor, unlink_only);
604 if (ret < 0)
605 return ret;
606
607 if (item < 0 && !unlink_only) {
608 crush_bucket *t = get_bucket(item);
609 if (IS_ERR(t)) {
610 ldout(cct, 1) << "remove_item_under bucket " << item
611 << " does not exist" << dendl;
612 return -ENOENT;
613 }
614
615 if (t->size) {
616 ldout(cct, 1) << "remove_item_under bucket " << item << " has " << t->size
617 << " items, not empty" << dendl;
618 return -ENOTEMPTY;
619 }
620 }
621
622 if (_maybe_remove_last_instance(cct, item, unlink_only))
623 ret = 0;
624
625 return ret;
626 }
627
628 int CrushWrapper::get_common_ancestor_distance(CephContext *cct, int id,
629 const std::multimap<string,string>& loc) const
630 {
631 ldout(cct, 5) << __func__ << " " << id << " " << loc << dendl;
632 if (!item_exists(id))
633 return -ENOENT;
634 map<string,string> id_loc = get_full_location(id);
635 ldout(cct, 20) << " id is at " << id_loc << dendl;
636
637 for (map<int,string>::const_iterator p = type_map.begin();
638 p != type_map.end();
639 ++p) {
640 map<string,string>::iterator ip = id_loc.find(p->second);
641 if (ip == id_loc.end())
642 continue;
643 for (std::multimap<string,string>::const_iterator q = loc.find(p->second);
644 q != loc.end();
645 ++q) {
646 if (q->first != p->second)
647 break;
648 if (q->second == ip->second)
649 return p->first;
650 }
651 }
652 return -ERANGE;
653 }
654
655 int CrushWrapper::parse_loc_map(const std::vector<string>& args,
656 std::map<string,string> *ploc)
657 {
658 ploc->clear();
659 for (unsigned i = 0; i < args.size(); ++i) {
660 const char *s = args[i].c_str();
661 const char *pos = strchr(s, '=');
662 if (!pos)
663 return -EINVAL;
664 string key(s, 0, pos-s);
665 string value(pos+1);
666 if (value.length())
667 (*ploc)[key] = value;
668 else
669 return -EINVAL;
670 }
671 return 0;
672 }
673
674 int CrushWrapper::parse_loc_multimap(const std::vector<string>& args,
675 std::multimap<string,string> *ploc)
676 {
677 ploc->clear();
678 for (unsigned i = 0; i < args.size(); ++i) {
679 const char *s = args[i].c_str();
680 const char *pos = strchr(s, '=');
681 if (!pos)
682 return -EINVAL;
683 string key(s, 0, pos-s);
684 string value(pos+1);
685 if (value.length())
686 ploc->insert(make_pair(key, value));
687 else
688 return -EINVAL;
689 }
690 return 0;
691 }
692
693 bool CrushWrapper::check_item_loc(CephContext *cct, int item, const map<string,string>& loc,
694 int *weight)
695 {
696 ldout(cct, 5) << "check_item_loc item " << item << " loc " << loc << dendl;
697
698 for (map<int,string>::const_iterator p = type_map.begin(); p != type_map.end(); ++p) {
699 // ignore device
700 if (p->first == 0)
701 continue;
702
703 // ignore types that aren't specified in loc
704 map<string,string>::const_iterator q = loc.find(p->second);
705 if (q == loc.end()) {
706 ldout(cct, 2) << "warning: did not specify location for '" << p->second << "' level (levels are "
707 << type_map << ")" << dendl;
708 continue;
709 }
710
711 if (!name_exists(q->second)) {
712 ldout(cct, 5) << "check_item_loc bucket " << q->second << " dne" << dendl;
713 return false;
714 }
715
716 int id = get_item_id(q->second);
717 if (id >= 0) {
718 ldout(cct, 5) << "check_item_loc requested " << q->second << " for type " << p->second
719 << " is a device, not bucket" << dendl;
720 return false;
721 }
722
723 ceph_assert(bucket_exists(id));
724 crush_bucket *b = get_bucket(id);
725
726 // see if item exists in this bucket
727 for (unsigned j=0; j<b->size; j++) {
728 if (b->items[j] == item) {
729 ldout(cct, 2) << "check_item_loc " << item << " exists in bucket " << b->id << dendl;
730 if (weight)
731 *weight = crush_get_bucket_item_weight(b, j);
732 return true;
733 }
734 }
735 return false;
736 }
737
738 ldout(cct, 2) << __func__ << " item " << item << " loc " << loc << dendl;
739 return false;
740 }
741
742 map<string, string> CrushWrapper::get_full_location(int id) const
743 {
744 vector<pair<string, string> > full_location_ordered;
745 map<string,string> full_location;
746
747 get_full_location_ordered(id, full_location_ordered);
748
749 std::copy(full_location_ordered.begin(),
750 full_location_ordered.end(),
751 std::inserter(full_location, full_location.begin()));
752
753 return full_location;
754 }
755
756 int CrushWrapper::get_full_location(const string& name,
757 map<string,string> *ploc)
758 {
759 build_rmaps();
760 auto p = name_rmap.find(name);
761 if (p == name_rmap.end()) {
762 return -ENOENT;
763 }
764 *ploc = get_full_location(p->second);
765 return 0;
766 }
767
768 int CrushWrapper::get_full_location_ordered(int id, vector<pair<string, string> >& path) const
769 {
770 if (!item_exists(id))
771 return -ENOENT;
772 int cur = id;
773 int ret;
774 while (true) {
775 pair<string, string> parent_coord = get_immediate_parent(cur, &ret);
776 if (ret != 0)
777 break;
778 path.push_back(parent_coord);
779 cur = get_item_id(parent_coord.second);
780 }
781 return 0;
782 }
783
784 string CrushWrapper::get_full_location_ordered_string(int id) const
785 {
786 vector<pair<string, string> > full_location_ordered;
787 string full_location;
788 get_full_location_ordered(id, full_location_ordered);
789 reverse(begin(full_location_ordered), end(full_location_ordered));
790 for(auto i = full_location_ordered.begin(); i != full_location_ordered.end(); i++) {
791 full_location = full_location + i->first + "=" + i->second;
792 if (i != full_location_ordered.end() - 1) {
793 full_location = full_location + ",";
794 }
795 }
796 return full_location;
797 }
798
799 map<int, string> CrushWrapper::get_parent_hierarchy(int id) const
800 {
801 map<int,string> parent_hierarchy;
802 pair<string, string> parent_coord = get_immediate_parent(id);
803 int parent_id;
804
805 // get the integer type for id and create a counter from there
806 int type_counter = get_bucket_type(id);
807
808 // if we get a negative type then we can assume that we have an OSD
809 // change behavior in get_item_type FIXME
810 if (type_counter < 0)
811 type_counter = 0;
812
813 // read the type map and get the name of the type with the largest ID
814 int high_type = 0;
815 if (!type_map.empty())
816 high_type = type_map.rbegin()->first;
817
818 parent_id = get_item_id(parent_coord.second);
819
820 while (type_counter < high_type) {
821 type_counter++;
822 parent_hierarchy[ type_counter ] = parent_coord.first;
823
824 if (type_counter < high_type){
825 // get the coordinate information for the next parent
826 parent_coord = get_immediate_parent(parent_id);
827 parent_id = get_item_id(parent_coord.second);
828 }
829 }
830
831 return parent_hierarchy;
832 }
833
834 int CrushWrapper::get_children(int id, list<int> *children) const
835 {
836 // leaf?
837 if (id >= 0) {
838 return 0;
839 }
840
841 auto *b = get_bucket(id);
842 if (IS_ERR(b)) {
843 return -ENOENT;
844 }
845
846 for (unsigned n=0; n<b->size; n++) {
847 children->push_back(b->items[n]);
848 }
849 return b->size;
850 }
851
852 int CrushWrapper::get_all_children(int id, set<int> *children) const
853 {
854 // leaf?
855 if (id >= 0) {
856 return 0;
857 }
858
859 auto *b = get_bucket(id);
860 if (IS_ERR(b)) {
861 return -ENOENT;
862 }
863
864 int c = 0;
865 for (unsigned n = 0; n < b->size; n++) {
866 children->insert(b->items[n]);
867 c++;
868 auto r = get_all_children(b->items[n], children);
869 if (r < 0)
870 return r;
871 c += r;
872 }
873 return c;
874 }
875
876 void CrushWrapper::get_children_of_type(int id,
877 int type,
878 vector<int> *children,
879 bool exclude_shadow) const
880 {
881 if (id >= 0) {
882 if (type == 0) {
883 // want leaf?
884 children->push_back(id);
885 }
886 return;
887 }
888 auto b = get_bucket(id);
889 if (IS_ERR(b)) {
890 return;
891 }
892 if (b->type < type) {
893 // give up
894 return;
895 } else if (b->type == type) {
896 if (!is_shadow_item(b->id) || !exclude_shadow) {
897 children->push_back(b->id);
898 }
899 return;
900 }
901 for (unsigned n = 0; n < b->size; n++) {
902 get_children_of_type(b->items[n], type, children, exclude_shadow);
903 }
904 }
905
906 int CrushWrapper::verify_upmap(CephContext *cct,
907 int rule_id,
908 int pool_size,
909 const vector<int>& up)
910 {
911 auto rule = get_rule(rule_id);
912 if (IS_ERR(rule) || !rule) {
913 lderr(cct) << __func__ << " rule " << rule_id << " does not exist"
914 << dendl;
915 return -ENOENT;
916 }
917 for (unsigned step = 0; step < rule->len; ++step) {
918 auto curstep = &rule->steps[step];
919 ldout(cct, 10) << __func__ << " step " << step << dendl;
920 switch (curstep->op) {
921 case CRUSH_RULE_CHOOSELEAF_FIRSTN:
922 case CRUSH_RULE_CHOOSELEAF_INDEP:
923 {
924 int type = curstep->arg2;
925 if (type == 0) // osd
926 break;
927 map<int, set<int>> osds_by_parent; // parent_of_desired_type -> osds
928 for (auto osd : up) {
929 auto parent = get_parent_of_type(osd, type, rule_id);
930 if (parent < 0) {
931 osds_by_parent[parent].insert(osd);
932 } else {
933 ldout(cct, 1) << __func__ << " unable to get parent of osd." << osd
934 << ", skipping for now"
935 << dendl;
936 }
937 }
938 for (auto i : osds_by_parent) {
939 if (i.second.size() > 1) {
940 lderr(cct) << __func__ << " multiple osds " << i.second
941 << " come from same failure domain " << i.first
942 << dendl;
943 return -EINVAL;
944 }
945 }
946 }
947 break;
948
949 case CRUSH_RULE_CHOOSE_FIRSTN:
950 case CRUSH_RULE_CHOOSE_INDEP:
951 {
952 int numrep = curstep->arg1;
953 int type = curstep->arg2;
954 if (type == 0) // osd
955 break;
956 if (numrep <= 0)
957 numrep += pool_size;
958 set<int> parents_of_type;
959 for (auto osd : up) {
960 auto parent = get_parent_of_type(osd, type, rule_id);
961 if (parent < 0) {
962 parents_of_type.insert(parent);
963 } else {
964 ldout(cct, 1) << __func__ << " unable to get parent of osd." << osd
965 << ", skipping for now"
966 << dendl;
967 }
968 }
969 if ((int)parents_of_type.size() > numrep) {
970 lderr(cct) << __func__ << " number of buckets "
971 << parents_of_type.size() << " exceeds desired " << numrep
972 << dendl;
973 return -EINVAL;
974 }
975 }
976 break;
977
978 default:
979 // ignore
980 break;
981 }
982 }
983 return 0;
984 }
985
986 int CrushWrapper::_get_leaves(int id, list<int> *leaves) const
987 {
988 ceph_assert(leaves);
989
990 // Already leaf?
991 if (id >= 0) {
992 leaves->push_back(id);
993 return 0;
994 }
995
996 auto b = get_bucket(id);
997 if (IS_ERR(b)) {
998 return -ENOENT;
999 }
1000
1001 for (unsigned n = 0; n < b->size; n++) {
1002 if (b->items[n] >= 0) {
1003 leaves->push_back(b->items[n]);
1004 } else {
1005 // is a bucket, do recursive call
1006 int r = _get_leaves(b->items[n], leaves);
1007 if (r < 0) {
1008 return r;
1009 }
1010 }
1011 }
1012
1013 return 0; // all is well
1014 }
1015
1016 int CrushWrapper::get_leaves(const string &name, set<int> *leaves) const
1017 {
1018 ceph_assert(leaves);
1019 leaves->clear();
1020
1021 if (!name_exists(name)) {
1022 return -ENOENT;
1023 }
1024
1025 int id = get_item_id(name);
1026 if (id >= 0) {
1027 // already leaf
1028 leaves->insert(id);
1029 return 0;
1030 }
1031
1032 list<int> unordered;
1033 int r = _get_leaves(id, &unordered);
1034 if (r < 0) {
1035 return r;
1036 }
1037
1038 for (auto &p : unordered) {
1039 leaves->insert(p);
1040 }
1041
1042 return 0;
1043 }
1044
1045 int CrushWrapper::insert_item(
1046 CephContext *cct, int item, float weight, string name,
1047 const map<string,string>& loc, // typename -> bucketname
1048 bool init_weight_sets)
1049 {
1050 ldout(cct, 5) << "insert_item item " << item << " weight " << weight
1051 << " name " << name << " loc " << loc << dendl;
1052
1053 if (!is_valid_crush_name(name))
1054 return -EINVAL;
1055
1056 if (!is_valid_crush_loc(cct, loc))
1057 return -EINVAL;
1058
1059 int r = validate_weightf(weight);
1060 if (r < 0) {
1061 return r;
1062 }
1063
1064 if (name_exists(name)) {
1065 if (get_item_id(name) != item) {
1066 ldout(cct, 10) << "device name '" << name << "' already exists as id "
1067 << get_item_id(name) << dendl;
1068 return -EEXIST;
1069 }
1070 } else {
1071 set_item_name(item, name);
1072 }
1073
1074 int cur = item;
1075
1076 // create locations if locations don't exist and add child in
1077 // location with 0 weight the more detail in the insert_item method
1078 // declaration in CrushWrapper.h
1079 for (auto p = type_map.begin(); p != type_map.end(); ++p) {
1080 // ignore device type
1081 if (p->first == 0)
1082 continue;
1083
1084 // skip types that are unspecified
1085 map<string,string>::const_iterator q = loc.find(p->second);
1086 if (q == loc.end()) {
1087 ldout(cct, 2) << "warning: did not specify location for '"
1088 << p->second << "' level (levels are "
1089 << type_map << ")" << dendl;
1090 continue;
1091 }
1092
1093 if (!name_exists(q->second)) {
1094 ldout(cct, 5) << "insert_item creating bucket " << q->second << dendl;
1095 int empty = 0, newid;
1096 int r = add_bucket(0, 0,
1097 CRUSH_HASH_DEFAULT, p->first, 1, &cur, &empty, &newid);
1098 if (r < 0) {
1099 ldout(cct, 1) << "add_bucket failure error: " << cpp_strerror(r)
1100 << dendl;
1101 return r;
1102 }
1103 set_item_name(newid, q->second);
1104
1105 cur = newid;
1106 continue;
1107 }
1108
1109 // add to an existing bucket
1110 int id = get_item_id(q->second);
1111 if (!bucket_exists(id)) {
1112 ldout(cct, 1) << "insert_item doesn't have bucket " << id << dendl;
1113 return -EINVAL;
1114 }
1115
1116 // check that we aren't creating a cycle.
1117 if (subtree_contains(id, cur)) {
1118 ldout(cct, 1) << "insert_item item " << cur << " already exists beneath "
1119 << id << dendl;
1120 return -EINVAL;
1121 }
1122
1123 // we have done sanity check above
1124 crush_bucket *b = get_bucket(id);
1125
1126 if (p->first != b->type) {
1127 ldout(cct, 1) << "insert_item existing bucket has type "
1128 << "'" << type_map[b->type] << "' != "
1129 << "'" << type_map[p->first] << "'" << dendl;
1130 return -EINVAL;
1131 }
1132
1133 // are we forming a loop?
1134 if (subtree_contains(cur, b->id)) {
1135 ldout(cct, 1) << "insert_item " << cur << " already contains " << b->id
1136 << "; cannot form loop" << dendl;
1137 return -ELOOP;
1138 }
1139
1140 ldout(cct, 5) << "insert_item adding " << cur << " weight " << weight
1141 << " to bucket " << id << dendl;
1142 [[maybe_unused]] int r = bucket_add_item(b, cur, 0);
1143 ceph_assert(!r);
1144 break;
1145 }
1146
1147 // adjust the item's weight in location
1148 if (adjust_item_weightf_in_loc(cct, item, weight, loc,
1149 item >= 0 && init_weight_sets) > 0) {
1150 if (item >= crush->max_devices) {
1151 crush->max_devices = item + 1;
1152 ldout(cct, 5) << "insert_item max_devices now " << crush->max_devices
1153 << dendl;
1154 }
1155 r = rebuild_roots_with_classes(cct);
1156 if (r < 0) {
1157 ldout(cct, 0) << __func__ << " unable to rebuild roots with classes: "
1158 << cpp_strerror(r) << dendl;
1159 return r;
1160 }
1161 return 0;
1162 }
1163
1164 ldout(cct, 1) << "error: didn't find anywhere to add item " << item
1165 << " in " << loc << dendl;
1166 return -EINVAL;
1167 }
1168
1169
1170 int CrushWrapper::move_bucket(
1171 CephContext *cct, int id, const map<string,string>& loc)
1172 {
1173 // sorry this only works for buckets
1174 if (id >= 0)
1175 return -EINVAL;
1176
1177 if (!item_exists(id))
1178 return -ENOENT;
1179
1180 // get the name of the bucket we are trying to move for later
1181 string id_name = get_item_name(id);
1182
1183 // detach the bucket
1184 int bucket_weight = detach_bucket(cct, id);
1185
1186 // insert the bucket back into the hierarchy
1187 return insert_item(cct, id, bucket_weight / (float)0x10000, id_name, loc,
1188 false);
1189 }
1190
1191 int CrushWrapper::detach_bucket(CephContext *cct, int item)
1192 {
1193 if (!crush)
1194 return (-EINVAL);
1195
1196 if (item >= 0)
1197 return (-EINVAL);
1198
1199 // check that the bucket that we want to detach exists
1200 ceph_assert(bucket_exists(item));
1201
1202 // get the bucket's weight
1203 crush_bucket *b = get_bucket(item);
1204 unsigned bucket_weight = b->weight;
1205
1206 // get where the bucket is located
1207 pair<string, string> bucket_location = get_immediate_parent(item);
1208
1209 // get the id of the parent bucket
1210 int parent_id = get_item_id(bucket_location.second);
1211
1212 // get the parent bucket
1213 crush_bucket *parent_bucket = get_bucket(parent_id);
1214
1215 if (!IS_ERR(parent_bucket)) {
1216 // zero out the bucket weight
1217 adjust_item_weight_in_bucket(cct, item, 0, parent_bucket->id, true);
1218
1219 // remove the bucket from the parent
1220 bucket_remove_item(parent_bucket, item);
1221 } else if (PTR_ERR(parent_bucket) != -ENOENT) {
1222 return PTR_ERR(parent_bucket);
1223 }
1224
1225 // check that we're happy
1226 int test_weight = 0;
1227 map<string,string> test_location;
1228 test_location[ bucket_location.first ] = (bucket_location.second);
1229
1230 bool successful_detach = !(check_item_loc(cct, item, test_location,
1231 &test_weight));
1232 ceph_assert(successful_detach);
1233 ceph_assert(test_weight == 0);
1234
1235 return bucket_weight;
1236 }
1237
1238 bool CrushWrapper::is_parent_of(int child, int p) const
1239 {
1240 int parent = 0;
1241 while (!get_immediate_parent_id(child, &parent)) {
1242 if (parent == p) {
1243 return true;
1244 }
1245 child = parent;
1246 }
1247 return false;
1248 }
1249
1250 int CrushWrapper::swap_bucket(CephContext *cct, int src, int dst)
1251 {
1252 if (src >= 0 || dst >= 0)
1253 return -EINVAL;
1254 if (!item_exists(src) || !item_exists(dst))
1255 return -EINVAL;
1256 crush_bucket *a = get_bucket(src);
1257 crush_bucket *b = get_bucket(dst);
1258 if (is_parent_of(a->id, b->id) || is_parent_of(b->id, a->id)) {
1259 return -EINVAL;
1260 }
1261 unsigned aw = a->weight;
1262 unsigned bw = b->weight;
1263
1264 // swap weights
1265 adjust_item_weight(cct, a->id, bw);
1266 adjust_item_weight(cct, b->id, aw);
1267
1268 // swap items
1269 map<int,unsigned> tmp;
1270 unsigned as = a->size;
1271 unsigned bs = b->size;
1272 for (unsigned i = 0; i < as; ++i) {
1273 int item = a->items[0];
1274 int itemw = crush_get_bucket_item_weight(a, 0);
1275 tmp[item] = itemw;
1276 bucket_remove_item(a, item);
1277 }
1278 ceph_assert(a->size == 0);
1279 ceph_assert(b->size == bs);
1280 for (unsigned i = 0; i < bs; ++i) {
1281 int item = b->items[0];
1282 int itemw = crush_get_bucket_item_weight(b, 0);
1283 bucket_remove_item(b, item);
1284 bucket_add_item(a, item, itemw);
1285 }
1286 ceph_assert(a->size == bs);
1287 ceph_assert(b->size == 0);
1288 for (auto t : tmp) {
1289 bucket_add_item(b, t.first, t.second);
1290 }
1291 ceph_assert(a->size == bs);
1292 ceph_assert(b->size == as);
1293
1294 // swap names
1295 swap_names(src, dst);
1296 return rebuild_roots_with_classes(cct);
1297 }
1298
1299 int CrushWrapper::link_bucket(
1300 CephContext *cct, int id, const map<string,string>& loc)
1301 {
1302 // sorry this only works for buckets
1303 if (id >= 0)
1304 return -EINVAL;
1305
1306 if (!item_exists(id))
1307 return -ENOENT;
1308
1309 // get the name of the bucket we are trying to move for later
1310 string id_name = get_item_name(id);
1311
1312 crush_bucket *b = get_bucket(id);
1313 unsigned bucket_weight = b->weight;
1314
1315 return insert_item(cct, id, bucket_weight / (float)0x10000, id_name, loc);
1316 }
1317
1318 int CrushWrapper::create_or_move_item(
1319 CephContext *cct, int item, float weight, string name,
1320 const map<string,string>& loc, // typename -> bucketname
1321 bool init_weight_sets)
1322 {
1323 int ret = 0;
1324 int old_iweight;
1325
1326 if (!is_valid_crush_name(name))
1327 return -EINVAL;
1328
1329 if (check_item_loc(cct, item, loc, &old_iweight)) {
1330 ldout(cct, 5) << "create_or_move_item " << item << " already at " << loc
1331 << dendl;
1332 } else {
1333 if (_search_item_exists(item)) {
1334 weight = get_item_weightf(item);
1335 ldout(cct, 10) << "create_or_move_item " << item
1336 << " exists with weight " << weight << dendl;
1337 remove_item(cct, item, true);
1338 }
1339 ldout(cct, 5) << "create_or_move_item adding " << item
1340 << " weight " << weight
1341 << " at " << loc << dendl;
1342 ret = insert_item(cct, item, weight, name, loc,
1343 item >= 0 && init_weight_sets);
1344 if (ret == 0)
1345 ret = 1; // changed
1346 }
1347 return ret;
1348 }
1349
1350 int CrushWrapper::update_item(
1351 CephContext *cct, int item, float weight, string name,
1352 const map<string,string>& loc) // typename -> bucketname
1353 {
1354 ldout(cct, 5) << "update_item item " << item << " weight " << weight
1355 << " name " << name << " loc " << loc << dendl;
1356 int ret = 0;
1357
1358 if (!is_valid_crush_name(name))
1359 return -EINVAL;
1360
1361 if (!is_valid_crush_loc(cct, loc))
1362 return -EINVAL;
1363
1364 ret = validate_weightf(weight);
1365 if (ret < 0) {
1366 return ret;
1367 }
1368
1369 // compare quantized (fixed-point integer) weights!
1370 int iweight = (int)(weight * (float)0x10000);
1371 int old_iweight;
1372 if (check_item_loc(cct, item, loc, &old_iweight)) {
1373 ldout(cct, 5) << "update_item " << item << " already at " << loc << dendl;
1374 if (old_iweight != iweight) {
1375 ldout(cct, 5) << "update_item " << item << " adjusting weight "
1376 << ((float)old_iweight/(float)0x10000) << " -> " << weight
1377 << dendl;
1378 adjust_item_weight_in_loc(cct, item, iweight, loc);
1379 ret = 1;
1380 }
1381 if (get_item_name(item) != name) {
1382 ldout(cct, 5) << "update_item setting " << item << " name to " << name
1383 << dendl;
1384 set_item_name(item, name);
1385 ret = 1;
1386 }
1387 } else {
1388 if (item_exists(item)) {
1389 remove_item(cct, item, true);
1390 }
1391 ldout(cct, 5) << "update_item adding " << item << " weight " << weight
1392 << " at " << loc << dendl;
1393 ret = insert_item(cct, item, weight, name, loc);
1394 if (ret == 0)
1395 ret = 1; // changed
1396 }
1397 return ret;
1398 }
1399
1400 int CrushWrapper::get_item_weight(int id) const
1401 {
1402 for (int bidx = 0; bidx < crush->max_buckets; bidx++) {
1403 crush_bucket *b = crush->buckets[bidx];
1404 if (b == NULL)
1405 continue;
1406 if (b->id == id)
1407 return b->weight;
1408 for (unsigned i = 0; i < b->size; i++)
1409 if (b->items[i] == id)
1410 return crush_get_bucket_item_weight(b, i);
1411 }
1412 return -ENOENT;
1413 }
1414
1415 int CrushWrapper::get_item_weight_in_loc(int id, const map<string,string> &loc)
1416 {
1417 for (map<string,string>::const_iterator l = loc.begin(); l != loc.end(); ++l) {
1418
1419 int bid = get_item_id(l->second);
1420 if (!bucket_exists(bid))
1421 continue;
1422 crush_bucket *b = get_bucket(bid);
1423 for (unsigned int i = 0; i < b->size; i++) {
1424 if (b->items[i] == id) {
1425 return crush_get_bucket_item_weight(b, i);
1426 }
1427 }
1428 }
1429 return -ENOENT;
1430 }
1431
1432 int CrushWrapper::adjust_item_weight(CephContext *cct, int id, int weight,
1433 bool update_weight_sets)
1434 {
1435 ldout(cct, 5) << __func__ << " " << id << " weight " << weight
1436 << " update_weight_sets=" << (int)update_weight_sets
1437 << dendl;
1438 int changed = 0;
1439 for (int bidx = 0; bidx < crush->max_buckets; bidx++) {
1440 if (!crush->buckets[bidx]) {
1441 continue;
1442 }
1443 int r = adjust_item_weight_in_bucket(cct, id, weight, -1-bidx,
1444 update_weight_sets);
1445 if (r > 0) {
1446 ++changed;
1447 }
1448 }
1449 if (!changed) {
1450 return -ENOENT;
1451 }
1452 return changed;
1453 }
1454
1455 int CrushWrapper::adjust_item_weight_in_bucket(
1456 CephContext *cct, int id, int weight,
1457 int bucket_id,
1458 bool update_weight_sets)
1459 {
1460 ldout(cct, 5) << __func__ << " " << id << " weight " << weight
1461 << " in bucket " << bucket_id
1462 << " update_weight_sets=" << (int)update_weight_sets
1463 << dendl;
1464 int changed = 0;
1465 if (!bucket_exists(bucket_id)) {
1466 return -ENOENT;
1467 }
1468 crush_bucket *b = get_bucket(bucket_id);
1469 for (unsigned int i = 0; i < b->size; i++) {
1470 if (b->items[i] == id) {
1471 int diff = bucket_adjust_item_weight(cct, b, id, weight,
1472 update_weight_sets);
1473 ldout(cct, 5) << __func__ << " " << id << " diff " << diff
1474 << " in bucket " << bucket_id << dendl;
1475 adjust_item_weight(cct, bucket_id, b->weight, false);
1476 changed++;
1477 }
1478 }
1479 // update weight-sets so they continue to sum
1480 for (auto& p : choose_args) {
1481 auto &cmap = p.second;
1482 if (!cmap.args) {
1483 continue;
1484 }
1485 crush_choose_arg *arg = &cmap.args[-1 - bucket_id];
1486 if (!arg->weight_set) {
1487 continue;
1488 }
1489 ceph_assert(arg->weight_set_positions > 0);
1490 vector<int> w(arg->weight_set_positions);
1491 for (unsigned i = 0; i < b->size; ++i) {
1492 for (unsigned j = 0; j < arg->weight_set_positions; ++j) {
1493 crush_weight_set *weight_set = &arg->weight_set[j];
1494 w[j] += weight_set->weights[i];
1495 }
1496 }
1497 ldout(cct,5) << __func__ << " adjusting bucket " << bucket_id
1498 << " cmap " << p.first << " weights to " << w << dendl;
1499 ostringstream ss;
1500 choose_args_adjust_item_weight(cct, cmap, bucket_id, w, &ss);
1501 }
1502 if (!changed) {
1503 return -ENOENT;
1504 }
1505 return changed;
1506 }
1507
1508 int CrushWrapper::adjust_item_weight_in_loc(
1509 CephContext *cct, int id, int weight,
1510 const map<string,string>& loc,
1511 bool update_weight_sets)
1512 {
1513 ldout(cct, 5) << "adjust_item_weight_in_loc " << id << " weight " << weight
1514 << " in " << loc
1515 << " update_weight_sets=" << (int)update_weight_sets
1516 << dendl;
1517 int changed = 0;
1518 for (auto l = loc.begin(); l != loc.end(); ++l) {
1519 int bid = get_item_id(l->second);
1520 if (!bucket_exists(bid))
1521 continue;
1522 int r = adjust_item_weight_in_bucket(cct, id, weight, bid,
1523 update_weight_sets);
1524 if (r > 0) {
1525 ++changed;
1526 }
1527 }
1528 if (!changed) {
1529 return -ENOENT;
1530 }
1531 return changed;
1532 }
1533
1534 int CrushWrapper::adjust_subtree_weight(CephContext *cct, int id, int weight,
1535 bool update_weight_sets)
1536 {
1537 ldout(cct, 5) << __func__ << " " << id << " weight " << weight << dendl;
1538 crush_bucket *b = get_bucket(id);
1539 if (IS_ERR(b))
1540 return PTR_ERR(b);
1541 int changed = 0;
1542 list<crush_bucket*> q;
1543 q.push_back(b);
1544 while (!q.empty()) {
1545 b = q.front();
1546 q.pop_front();
1547 int local_changed = 0;
1548 for (unsigned i=0; i<b->size; ++i) {
1549 int n = b->items[i];
1550 if (n >= 0) {
1551 adjust_item_weight_in_bucket(cct, n, weight, b->id, update_weight_sets);
1552 ++changed;
1553 ++local_changed;
1554 } else {
1555 crush_bucket *sub = get_bucket(n);
1556 if (IS_ERR(sub))
1557 continue;
1558 q.push_back(sub);
1559 }
1560 }
1561 }
1562 return changed;
1563 }
1564
1565 bool CrushWrapper::check_item_present(int id) const
1566 {
1567 bool found = false;
1568
1569 for (int bidx = 0; bidx < crush->max_buckets; bidx++) {
1570 crush_bucket *b = crush->buckets[bidx];
1571 if (b == 0)
1572 continue;
1573 for (unsigned i = 0; i < b->size; i++)
1574 if (b->items[i] == id)
1575 found = true;
1576 }
1577 return found;
1578 }
1579
1580
1581 pair<string,string> CrushWrapper::get_immediate_parent(int id, int *_ret) const
1582 {
1583
1584 for (int bidx = 0; bidx < crush->max_buckets; bidx++) {
1585 crush_bucket *b = crush->buckets[bidx];
1586 if (b == 0)
1587 continue;
1588 if (is_shadow_item(b->id))
1589 continue;
1590 for (unsigned i = 0; i < b->size; i++)
1591 if (b->items[i] == id) {
1592 string parent_id = name_map.at(b->id);
1593 string parent_bucket_type = type_map.at(b->type);
1594 if (_ret)
1595 *_ret = 0;
1596 return make_pair(parent_bucket_type, parent_id);
1597 }
1598 }
1599
1600 if (_ret)
1601 *_ret = -ENOENT;
1602
1603 return pair<string, string>();
1604 }
1605
1606 int CrushWrapper::get_immediate_parent_id(int id, int *parent) const
1607 {
1608 for (int bidx = 0; bidx < crush->max_buckets; bidx++) {
1609 crush_bucket *b = crush->buckets[bidx];
1610 if (b == 0)
1611 continue;
1612 if (is_shadow_item(b->id))
1613 continue;
1614 for (unsigned i = 0; i < b->size; i++) {
1615 if (b->items[i] == id) {
1616 *parent = b->id;
1617 return 0;
1618 }
1619 }
1620 }
1621 return -ENOENT;
1622 }
1623
1624 int CrushWrapper::get_parent_of_type(int item, int type, int rule) const
1625 {
1626 if (rule < 0) {
1627 // no rule specified
1628 do {
1629 int r = get_immediate_parent_id(item, &item);
1630 if (r < 0) {
1631 return 0;
1632 }
1633 } while (get_bucket_type(item) != type);
1634 return item;
1635 }
1636 set<int> roots;
1637 find_takes_by_rule(rule, &roots);
1638 for (auto root : roots) {
1639 vector<int> candidates;
1640 get_children_of_type(root, type, &candidates, false);
1641 for (auto candidate : candidates) {
1642 if (subtree_contains(candidate, item)) {
1643 // note that here we assure that no two different buckets
1644 // from a single crush rule will share a same device,
1645 // which should generally be true.
1646 return candidate;
1647 }
1648 }
1649 }
1650 return 0; // not found
1651 }
1652
1653 void CrushWrapper::get_subtree_of_type(int type, vector<int> *subtrees)
1654 {
1655 set<int> roots;
1656 find_roots(&roots);
1657 for (auto r: roots) {
1658 crush_bucket *b = get_bucket(r);
1659 if (IS_ERR(b))
1660 continue;
1661 get_children_of_type(b->id, type, subtrees);
1662 }
1663 }
1664
1665 bool CrushWrapper::class_is_in_use(int class_id, ostream *ss)
1666 {
1667 list<unsigned> rules;
1668 for (unsigned i = 0; i < crush->max_rules; ++i) {
1669 crush_rule *r = crush->rules[i];
1670 if (!r)
1671 continue;
1672 for (unsigned j = 0; j < r->len; ++j) {
1673 if (r->steps[j].op == CRUSH_RULE_TAKE) {
1674 int root = r->steps[j].arg1;
1675 for (auto &p : class_bucket) {
1676 auto& q = p.second;
1677 if (q.count(class_id) && q[class_id] == root) {
1678 rules.push_back(i);
1679 }
1680 }
1681 }
1682 }
1683 }
1684 if (rules.empty()) {
1685 return false;
1686 }
1687 if (ss) {
1688 ostringstream os;
1689 for (auto &p: rules) {
1690 os << "'" << get_rule_name(p) <<"',";
1691 }
1692 string out(os.str());
1693 out.resize(out.size() - 1); // drop last ','
1694 *ss << "still referenced by crush_rule(s): " << out;
1695 }
1696 return true;
1697 }
1698
1699 int CrushWrapper::rename_class(const string& srcname, const string& dstname)
1700 {
1701 auto i = class_rname.find(srcname);
1702 if (i == class_rname.end())
1703 return -ENOENT;
1704 auto j = class_rname.find(dstname);
1705 if (j != class_rname.end())
1706 return -EEXIST;
1707
1708 int class_id = i->second;
1709 ceph_assert(class_name.count(class_id));
1710 // rename any shadow buckets of old class name
1711 for (auto &it: class_map) {
1712 if (it.first < 0 && it.second == class_id) {
1713 string old_name = get_item_name(it.first);
1714 size_t pos = old_name.find("~");
1715 ceph_assert(pos != string::npos);
1716 string name_no_class = old_name.substr(0, pos);
1717 string old_class_name = old_name.substr(pos + 1);
1718 ceph_assert(old_class_name == srcname);
1719 string new_name = name_no_class + "~" + dstname;
1720 // we do not use set_item_name
1721 // because the name is intentionally invalid
1722 name_map[it.first] = new_name;
1723 have_rmaps = false;
1724 }
1725 }
1726
1727 // rename class
1728 class_rname.erase(srcname);
1729 class_name.erase(class_id);
1730 class_rname[dstname] = class_id;
1731 class_name[class_id] = dstname;
1732 return 0;
1733 }
1734
1735 int CrushWrapper::populate_classes(
1736 const std::map<int32_t, map<int32_t, int32_t>>& old_class_bucket)
1737 {
1738 // build set of previous used shadow ids
1739 set<int32_t> used_ids;
1740 for (auto& p : old_class_bucket) {
1741 for (auto& q : p.second) {
1742 used_ids.insert(q.second);
1743 }
1744 }
1745 // accumulate weight values for each carg and bucket as we go. because it is
1746 // depth first, we will have the nested bucket weights we need when we
1747 // finish constructing the containing buckets.
1748 map<int,map<int,vector<int>>> cmap_item_weight; // cargs -> bno -> [bucket weight for each position]
1749 set<int> roots;
1750 find_nonshadow_roots(&roots);
1751 for (auto &r : roots) {
1752 if (r >= 0)
1753 continue;
1754 for (auto &c : class_name) {
1755 int clone;
1756 int res = device_class_clone(r, c.first, old_class_bucket, used_ids,
1757 &clone, &cmap_item_weight);
1758 if (res < 0)
1759 return res;
1760 }
1761 }
1762 return 0;
1763 }
1764
1765 int CrushWrapper::trim_roots_with_class(CephContext *cct)
1766 {
1767 set<int> roots;
1768 find_shadow_roots(&roots);
1769 for (auto &r : roots) {
1770 if (r >= 0)
1771 continue;
1772 int res = remove_root(cct, r);
1773 if (res)
1774 return res;
1775 }
1776 // there is no need to reweight because we only remove from the
1777 // root and down
1778 return 0;
1779 }
1780
1781 int32_t CrushWrapper::_alloc_class_id() const {
1782 if (class_name.empty()) {
1783 return 0;
1784 }
1785 int32_t class_id = class_name.rbegin()->first + 1;
1786 if (class_id >= 0) {
1787 return class_id;
1788 }
1789 // wrapped, pick a random start and do exhaustive search
1790 uint32_t upperlimit = numeric_limits<int32_t>::max();
1791 upperlimit++;
1792 class_id = rand() % upperlimit;
1793 const auto start = class_id;
1794 do {
1795 if (!class_name.count(class_id)) {
1796 return class_id;
1797 } else {
1798 class_id++;
1799 if (class_id < 0) {
1800 class_id = 0;
1801 }
1802 }
1803 } while (class_id != start);
1804 ceph_abort_msg("no available class id");
1805 }
1806
1807 int CrushWrapper::set_subtree_class(
1808 const string& subtree,
1809 const string& new_class)
1810 {
1811 if (!name_exists(subtree)) {
1812 return -ENOENT;
1813 }
1814
1815 int new_class_id = get_or_create_class_id(new_class);
1816 int id = get_item_id(subtree);
1817 list<int> q = { id };
1818 while (!q.empty()) {
1819 int id = q.front();
1820 q.pop_front();
1821 crush_bucket *b = get_bucket(id);
1822 if (IS_ERR(b)) {
1823 return PTR_ERR(b);
1824 }
1825 for (unsigned i = 0; i < b->size; ++i) {
1826 int item = b->items[i];
1827 if (item >= 0) {
1828 class_map[item] = new_class_id;
1829 } else {
1830 q.push_back(item);
1831 }
1832 }
1833 }
1834 return 0;
1835 }
1836
1837 int CrushWrapper::reclassify(
1838 CephContext *cct,
1839 ostream& out,
1840 const map<string,string>& classify_root,
1841 const map<string,pair<string,string>>& classify_bucket
1842 )
1843 {
1844 map<int,string> reclassified_bucket; // orig_id -> class
1845
1846 // classify_root
1847 for (auto& i : classify_root) {
1848 string root = i.first;
1849 if (!name_exists(root)) {
1850 out << "root " << root << " does not exist" << std::endl;
1851 return -EINVAL;
1852 }
1853 int root_id = get_item_id(root);
1854 string new_class = i.second;
1855 int new_class_id = get_or_create_class_id(new_class);
1856 out << "classify_root " << root << " (" << root_id
1857 << ") as " << new_class << std::endl;
1858
1859 // validate rules
1860 for (unsigned j = 0; j < crush->max_rules; j++) {
1861 if (crush->rules[j]) {
1862 auto rule = crush->rules[j];
1863 for (unsigned k = 0; k < rule->len; ++k) {
1864 if (rule->steps[k].op == CRUSH_RULE_TAKE) {
1865 int step_item = get_rule_arg1(j, k);
1866 int original_item;
1867 int c;
1868 int res = split_id_class(step_item, &original_item, &c);
1869 if (res < 0)
1870 return res;
1871 if (c >= 0) {
1872 if (original_item == root_id) {
1873 out << " rule " << j << " includes take on root "
1874 << root << " class " << c << std::endl;
1875 return -EINVAL;
1876 }
1877 }
1878 }
1879 }
1880 }
1881 }
1882
1883 // rebuild new buckets for root
1884 //cout << "before class_bucket: " << class_bucket << std::endl;
1885 map<int,int> renumber;
1886 list<int> q;
1887 q.push_back(root_id);
1888 while (!q.empty()) {
1889 int id = q.front();
1890 q.pop_front();
1891 crush_bucket *bucket = get_bucket(id);
1892 if (IS_ERR(bucket)) {
1893 out << "cannot find bucket " << id
1894 << ": " << cpp_strerror(PTR_ERR(bucket)) << std::endl;
1895 return PTR_ERR(bucket);
1896 }
1897
1898 // move bucket
1899 int new_id = get_new_bucket_id();
1900 out << " renumbering bucket " << id << " -> " << new_id << std::endl;
1901 renumber[id] = new_id;
1902 crush->buckets[-1-new_id] = bucket;
1903 bucket->id = new_id;
1904 crush->buckets[-1-id] = crush_make_bucket(crush,
1905 bucket->alg,
1906 bucket->hash,
1907 bucket->type,
1908 0, NULL, NULL);
1909 crush->buckets[-1-id]->id = id;
1910 for (auto& i : choose_args) {
1911 i.second.args[-1-new_id] = i.second.args[-1-id];
1912 memset(&i.second.args[-1-id], 0, sizeof(i.second.args[0]));
1913 }
1914 class_bucket.erase(id);
1915 class_bucket[new_id][new_class_id] = id;
1916 name_map[new_id] = string(get_item_name(id));
1917 name_map[id] = string(get_item_name(id)) + "~" + new_class;
1918
1919 for (unsigned j = 0; j < bucket->size; ++j) {
1920 if (bucket->items[j] < 0) {
1921 q.push_front(bucket->items[j]);
1922 } else {
1923 // we don't reclassify the device here; if the users wants that,
1924 // they can pass --set-subtree-class separately.
1925 }
1926 }
1927 }
1928 //cout << "mid class_bucket: " << class_bucket << std::endl;
1929
1930 for (int i = 0; i < crush->max_buckets; ++i) {
1931 crush_bucket *b = crush->buckets[i];
1932 if (!b) {
1933 continue;
1934 }
1935 for (unsigned j = 0; j < b->size; ++j) {
1936 if (renumber.count(b->items[j])) {
1937 b->items[j] = renumber[b->items[j]];
1938 }
1939 }
1940 }
1941
1942 int r = rebuild_roots_with_classes(cct);
1943 if (r < 0) {
1944 out << "failed to rebuild_roots_with_classes: " << cpp_strerror(r)
1945 << std::endl;
1946 return r;
1947 }
1948 //cout << "final class_bucket: " << class_bucket << std::endl;
1949 }
1950
1951 // classify_bucket
1952 map<int,int> send_to; // source bucket -> dest bucket
1953 map<int,map<int,int>> new_class_bucket;
1954 map<int,string> new_bucket_names;
1955 map<int,map<string,string>> new_buckets;
1956 map<string,int> new_bucket_by_name;
1957 for (auto& i : classify_bucket) {
1958 const string& match = i.first; // prefix% or %suffix
1959 const string& new_class = i.second.first;
1960 const string& default_parent = i.second.second;
1961 if (!name_exists(default_parent)) {
1962 out << "default parent " << default_parent << " does not exist"
1963 << std::endl;
1964 return -EINVAL;
1965 }
1966 int default_parent_id = get_item_id(default_parent);
1967 crush_bucket *default_parent_bucket = get_bucket(default_parent_id);
1968 assert(default_parent_bucket);
1969 string default_parent_type_name = get_type_name(default_parent_bucket->type);
1970
1971 out << "classify_bucket " << match << " as " << new_class
1972 << " default bucket " << default_parent
1973 << " (" << default_parent_type_name << ")" << std::endl;
1974
1975 int new_class_id = get_or_create_class_id(new_class);
1976 for (int j = 0; j < crush->max_buckets; ++j) {
1977 crush_bucket *b = crush->buckets[j];
1978 if (!b || is_shadow_item(b->id)) {
1979 continue;
1980 }
1981 string name = get_item_name(b->id);
1982 if (name.length() < match.length()) {
1983 continue;
1984 }
1985 string basename;
1986 if (match[0] == '%') {
1987 if (match.substr(1) != name.substr(name.size() - match.size() + 1)) {
1988 continue;
1989 }
1990 basename = name.substr(0, name.size() - match.size() + 1);
1991 } else if (match[match.size() - 1] == '%') {
1992 if (match.substr(0, match.size() - 1) !=
1993 name.substr(0, match.size() - 1)) {
1994 continue;
1995 }
1996 basename = name.substr(match.size() - 1);
1997 } else if (match == name) {
1998 basename = default_parent;
1999 } else {
2000 continue;
2001 }
2002 cout << "match " << match << " to " << name << " basename " << basename
2003 << std::endl;
2004 // look up or create basename bucket
2005 int base_id;
2006 if (name_exists(basename)) {
2007 base_id = get_item_id(basename);
2008 cout << " have base " << base_id << std::endl;
2009 } else if (new_bucket_by_name.count(basename)) {
2010 base_id = new_bucket_by_name[basename];
2011 cout << " already creating base " << base_id << std::endl;
2012 } else {
2013 base_id = get_new_bucket_id();
2014 crush->buckets[-1-base_id] = crush_make_bucket(crush,
2015 b->alg,
2016 b->hash,
2017 b->type,
2018 0, NULL, NULL);
2019 crush->buckets[-1-base_id]->id = base_id;
2020 name_map[base_id] = basename;
2021 new_bucket_by_name[basename] = base_id;
2022 cout << " created base " << base_id << std::endl;
2023
2024 new_buckets[base_id][default_parent_type_name] = default_parent;
2025 }
2026 send_to[b->id] = base_id;
2027 new_class_bucket[base_id][new_class_id] = b->id;
2028 new_bucket_names[b->id] = basename + "~" + get_class_name(new_class_id);
2029
2030 // make sure devices are classified
2031 for (unsigned i = 0; i < b->size; ++i) {
2032 int item = b->items[i];
2033 if (item >= 0) {
2034 class_map[item] = new_class_id;
2035 }
2036 }
2037 }
2038 }
2039
2040 // no name_exists() works below,
2041 have_rmaps = false;
2042
2043 // copy items around
2044 //cout << "send_to " << send_to << std::endl;
2045 set<int> roots;
2046 find_roots(&roots);
2047 for (auto& i : send_to) {
2048 crush_bucket *from = get_bucket(i.first);
2049 crush_bucket *to = get_bucket(i.second);
2050 cout << "moving items from " << from->id << " (" << get_item_name(from->id)
2051 << ") to " << to->id << " (" << get_item_name(to->id) << ")"
2052 << std::endl;
2053 for (unsigned j = 0; j < from->size; ++j) {
2054 int item = from->items[j];
2055 int r;
2056 map<string,string> to_loc;
2057 to_loc[get_type_name(to->type)] = get_item_name(to->id);
2058 if (item >= 0) {
2059 if (subtree_contains(to->id, item)) {
2060 continue;
2061 }
2062 map<string,string> from_loc;
2063 from_loc[get_type_name(from->type)] = get_item_name(from->id);
2064 auto w = get_item_weightf_in_loc(item, from_loc);
2065 r = insert_item(cct, item,
2066 w,
2067 get_item_name(item),
2068 to_loc);
2069 } else {
2070 if (!send_to.count(item)) {
2071 lderr(cct) << "item " << item << " in bucket " << from->id
2072 << " is not also a reclassified bucket" << dendl;
2073 return -EINVAL;
2074 }
2075 int newitem = send_to[item];
2076 if (subtree_contains(to->id, newitem)) {
2077 continue;
2078 }
2079 r = link_bucket(cct, newitem, to_loc);
2080 }
2081 if (r != 0) {
2082 cout << __func__ << " err from insert_item: " << cpp_strerror(r)
2083 << std::endl;
2084 return r;
2085 }
2086 }
2087 }
2088
2089 // make sure new buckets have parents
2090 for (auto& i : new_buckets) {
2091 int parent;
2092 if (get_immediate_parent_id(i.first, &parent) < 0) {
2093 cout << "new bucket " << i.first << " missing parent, adding at "
2094 << i.second << std::endl;
2095 int r = link_bucket(cct, i.first, i.second);
2096 if (r != 0) {
2097 cout << __func__ << " err from insert_item: " << cpp_strerror(r)
2098 << std::endl;
2099 return r;
2100 }
2101 }
2102 }
2103
2104 // set class mappings
2105 //cout << "pre class_bucket: " << class_bucket << std::endl;
2106 for (auto& i : new_class_bucket) {
2107 for (auto& j : i.second) {
2108 class_bucket[i.first][j.first] = j.second;
2109 }
2110
2111 }
2112 //cout << "post class_bucket: " << class_bucket << std::endl;
2113 for (auto& i : new_bucket_names) {
2114 name_map[i.first] = i.second;
2115 }
2116
2117 int r = rebuild_roots_with_classes(cct);
2118 if (r < 0) {
2119 out << "failed to rebuild_roots_with_classes: " << cpp_strerror(r)
2120 << std::endl;
2121 return r;
2122 }
2123 //cout << "final class_bucket: " << class_bucket << std::endl;
2124
2125 return 0;
2126 }
2127
2128 int CrushWrapper::get_new_bucket_id()
2129 {
2130 int id = -1;
2131 while (crush->buckets[-1-id] &&
2132 -1-id < crush->max_buckets) {
2133 id--;
2134 }
2135 if (-1-id == crush->max_buckets) {
2136 ++crush->max_buckets;
2137 crush->buckets = (struct crush_bucket**)realloc(
2138 crush->buckets,
2139 sizeof(crush->buckets[0]) * crush->max_buckets);
2140 for (auto& i : choose_args) {
2141 assert(i.second.size == (__u32)crush->max_buckets - 1);
2142 ++i.second.size;
2143 i.second.args = (struct crush_choose_arg*)realloc(
2144 i.second.args,
2145 sizeof(i.second.args[0]) * i.second.size);
2146 }
2147 }
2148 return id;
2149 }
2150
2151 void CrushWrapper::reweight(CephContext *cct)
2152 {
2153 set<int> roots;
2154 find_nonshadow_roots(&roots);
2155 for (auto id : roots) {
2156 if (id >= 0)
2157 continue;
2158 crush_bucket *b = get_bucket(id);
2159 ldout(cct, 5) << "reweight root bucket " << id << dendl;
2160 int r = crush_reweight_bucket(crush, b);
2161 ceph_assert(r == 0);
2162
2163 for (auto& i : choose_args) {
2164 //cout << "carg " << i.first << std::endl;
2165 vector<uint32_t> w; // discard top-level weights
2166 reweight_bucket(b, i.second, &w);
2167 }
2168 }
2169 int r = rebuild_roots_with_classes(cct);
2170 ceph_assert(r == 0);
2171 }
2172
2173 void CrushWrapper::reweight_bucket(
2174 crush_bucket *b,
2175 crush_choose_arg_map& arg_map,
2176 vector<uint32_t> *weightv)
2177 {
2178 int idx = -1 - b->id;
2179 unsigned npos = arg_map.args[idx].weight_set_positions;
2180 //cout << __func__ << " " << b->id << " npos " << npos << std::endl;
2181 weightv->resize(npos);
2182 for (unsigned i = 0; i < b->size; ++i) {
2183 int item = b->items[i];
2184 if (item >= 0) {
2185 for (unsigned pos = 0; pos < npos; ++pos) {
2186 (*weightv)[pos] += arg_map.args[idx].weight_set->weights[i];
2187 }
2188 } else {
2189 vector<uint32_t> subw(npos);
2190 crush_bucket *sub = get_bucket(item);
2191 assert(sub);
2192 reweight_bucket(sub, arg_map, &subw);
2193 for (unsigned pos = 0; pos < npos; ++pos) {
2194 (*weightv)[pos] += subw[pos];
2195 // strash the real bucket weight as the weights for this reference
2196 arg_map.args[idx].weight_set->weights[i] = subw[pos];
2197 }
2198 }
2199 }
2200 //cout << __func__ << " finish " << b->id << " " << *weightv << std::endl;
2201 }
2202
2203 int CrushWrapper::add_simple_rule_at(
2204 string name, string root_name,
2205 string failure_domain_name,
2206 string device_class,
2207 string mode, int rule_type,
2208 int rno,
2209 ostream *err)
2210 {
2211 if (rule_exists(name)) {
2212 if (err)
2213 *err << "rule " << name << " exists";
2214 return -EEXIST;
2215 }
2216 if (rno >= 0) {
2217 if (rule_exists(rno)) {
2218 if (err)
2219 *err << "rule with ruleno " << rno << " exists";
2220 return -EEXIST;
2221 }
2222 if (ruleset_exists(rno)) {
2223 if (err)
2224 *err << "ruleset " << rno << " exists";
2225 return -EEXIST;
2226 }
2227 } else {
2228 for (rno = 0; rno < get_max_rules(); rno++) {
2229 if (!rule_exists(rno) && !ruleset_exists(rno))
2230 break;
2231 }
2232 }
2233 if (!name_exists(root_name)) {
2234 if (err)
2235 *err << "root item " << root_name << " does not exist";
2236 return -ENOENT;
2237 }
2238 int root = get_item_id(root_name);
2239 int type = 0;
2240 if (failure_domain_name.length()) {
2241 type = get_type_id(failure_domain_name);
2242 if (type < 0) {
2243 if (err)
2244 *err << "unknown type " << failure_domain_name;
2245 return -EINVAL;
2246 }
2247 }
2248 if (device_class.size()) {
2249 if (!class_exists(device_class)) {
2250 if (err)
2251 *err << "device class " << device_class << " does not exist";
2252 return -EINVAL;
2253 }
2254 int c = get_class_id(device_class);
2255 if (class_bucket.count(root) == 0 ||
2256 class_bucket[root].count(c) == 0) {
2257 if (err)
2258 *err << "root " << root_name << " has no devices with class "
2259 << device_class;
2260 return -EINVAL;
2261 }
2262 root = class_bucket[root][c];
2263 }
2264 if (mode != "firstn" && mode != "indep") {
2265 if (err)
2266 *err << "unknown mode " << mode;
2267 return -EINVAL;
2268 }
2269
2270 int steps = 3;
2271 if (mode == "indep")
2272 steps = 5;
2273 int min_rep = mode == "firstn" ? 1 : 3;
2274 int max_rep = mode == "firstn" ? 10 : 20;
2275 //set the ruleset the same as rule_id(rno)
2276 crush_rule *rule = crush_make_rule(steps, rno, rule_type, min_rep, max_rep);
2277 ceph_assert(rule);
2278 int step = 0;
2279 if (mode == "indep") {
2280 crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0);
2281 crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSE_TRIES, 100, 0);
2282 }
2283 crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, root, 0);
2284 if (type)
2285 crush_rule_set_step(rule, step++,
2286 mode == "firstn" ? CRUSH_RULE_CHOOSELEAF_FIRSTN :
2287 CRUSH_RULE_CHOOSELEAF_INDEP,
2288 CRUSH_CHOOSE_N,
2289 type);
2290 else
2291 crush_rule_set_step(rule, step++,
2292 mode == "firstn" ? CRUSH_RULE_CHOOSE_FIRSTN :
2293 CRUSH_RULE_CHOOSE_INDEP,
2294 CRUSH_CHOOSE_N,
2295 0);
2296 crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0);
2297
2298 int ret = crush_add_rule(crush, rule, rno);
2299 if(ret < 0) {
2300 *err << "failed to add rule " << rno << " because " << cpp_strerror(ret);
2301 return ret;
2302 }
2303 set_rule_name(rno, name);
2304 have_rmaps = false;
2305 return rno;
2306 }
2307
2308 int CrushWrapper::add_simple_rule(
2309 string name, string root_name,
2310 string failure_domain_name,
2311 string device_class,
2312 string mode, int rule_type,
2313 ostream *err)
2314 {
2315 return add_simple_rule_at(name, root_name, failure_domain_name, device_class,
2316 mode,
2317 rule_type, -1, err);
2318 }
2319
2320 float CrushWrapper::_get_take_weight_osd_map(int root,
2321 map<int,float> *pmap) const
2322 {
2323 float sum = 0.0;
2324 list<int> q;
2325 q.push_back(root);
2326 //breadth first iterate the OSD tree
2327 while (!q.empty()) {
2328 int bno = q.front();
2329 q.pop_front();
2330 crush_bucket *b = crush->buckets[-1-bno];
2331 ceph_assert(b);
2332 for (unsigned j=0; j<b->size; ++j) {
2333 int item_id = b->items[j];
2334 if (item_id >= 0) { //it's an OSD
2335 float w = crush_get_bucket_item_weight(b, j);
2336 (*pmap)[item_id] = w;
2337 sum += w;
2338 } else { //not an OSD, expand the child later
2339 q.push_back(item_id);
2340 }
2341 }
2342 }
2343 return sum;
2344 }
2345
2346 void CrushWrapper::_normalize_weight_map(float sum,
2347 const map<int,float>& m,
2348 map<int,float> *pmap) const
2349 {
2350 for (auto& p : m) {
2351 map<int,float>::iterator q = pmap->find(p.first);
2352 if (q == pmap->end()) {
2353 (*pmap)[p.first] = p.second / sum;
2354 } else {
2355 q->second += p.second / sum;
2356 }
2357 }
2358 }
2359
2360 int CrushWrapper::get_take_weight_osd_map(int root, map<int,float> *pmap) const
2361 {
2362 map<int,float> m;
2363 float sum = _get_take_weight_osd_map(root, &m);
2364 _normalize_weight_map(sum, m, pmap);
2365 return 0;
2366 }
2367
2368 int CrushWrapper::get_rule_weight_osd_map(unsigned ruleno,
2369 map<int,float> *pmap) const
2370 {
2371 if (ruleno >= crush->max_rules)
2372 return -ENOENT;
2373 if (crush->rules[ruleno] == NULL)
2374 return -ENOENT;
2375 crush_rule *rule = crush->rules[ruleno];
2376
2377 // build a weight map for each TAKE in the rule, and then merge them
2378
2379 // FIXME: if there are multiple takes that place a different number of
2380 // objects we do not take that into account. (Also, note that doing this
2381 // right is also a function of the pool, since the crush rule
2382 // might choose 2 + choose 2 but pool size may only be 3.)
2383 for (unsigned i=0; i<rule->len; ++i) {
2384 map<int,float> m;
2385 float sum = 0;
2386 if (rule->steps[i].op == CRUSH_RULE_TAKE) {
2387 int n = rule->steps[i].arg1;
2388 if (n >= 0) {
2389 m[n] = 1.0;
2390 sum = 1.0;
2391 } else {
2392 sum += _get_take_weight_osd_map(n, &m);
2393 }
2394 }
2395 _normalize_weight_map(sum, m, pmap);
2396 }
2397
2398 return 0;
2399 }
2400
2401 int CrushWrapper::remove_rule(int ruleno)
2402 {
2403 if (ruleno >= (int)crush->max_rules)
2404 return -ENOENT;
2405 if (crush->rules[ruleno] == NULL)
2406 return -ENOENT;
2407 crush_destroy_rule(crush->rules[ruleno]);
2408 crush->rules[ruleno] = NULL;
2409 rule_name_map.erase(ruleno);
2410 have_rmaps = false;
2411 return rebuild_roots_with_classes(nullptr);
2412 }
2413
2414 int CrushWrapper::bucket_adjust_item_weight(
2415 CephContext *cct, crush_bucket *bucket, int item, int weight,
2416 bool adjust_weight_sets)
2417 {
2418 if (adjust_weight_sets) {
2419 unsigned position;
2420 for (position = 0; position < bucket->size; position++)
2421 if (bucket->items[position] == item)
2422 break;
2423 ceph_assert(position != bucket->size);
2424 for (auto &w : choose_args) {
2425 crush_choose_arg_map &arg_map = w.second;
2426 crush_choose_arg *arg = &arg_map.args[-1-bucket->id];
2427 for (__u32 j = 0; j < arg->weight_set_positions; j++) {
2428 crush_weight_set *weight_set = &arg->weight_set[j];
2429 weight_set->weights[position] = weight;
2430 }
2431 }
2432 }
2433 return crush_bucket_adjust_item_weight(crush, bucket, item, weight);
2434 }
2435
2436 int CrushWrapper::add_bucket(
2437 int bucketno, int alg, int hash, int type, int size,
2438 int *items, int *weights, int *idout)
2439 {
2440 if (alg == 0) {
2441 alg = get_default_bucket_alg();
2442 if (alg == 0)
2443 return -EINVAL;
2444 }
2445 crush_bucket *b = crush_make_bucket(crush, alg, hash, type, size, items,
2446 weights);
2447 ceph_assert(b);
2448 ceph_assert(idout);
2449 int r = crush_add_bucket(crush, bucketno, b, idout);
2450 int pos = -1 - *idout;
2451 for (auto& p : choose_args) {
2452 crush_choose_arg_map& cmap = p.second;
2453 unsigned new_size = crush->max_buckets;
2454 if (cmap.args) {
2455 if ((int)cmap.size < crush->max_buckets) {
2456 cmap.args = static_cast<crush_choose_arg*>(realloc(
2457 cmap.args,
2458 sizeof(crush_choose_arg) * new_size));
2459 ceph_assert(cmap.args);
2460 memset(&cmap.args[cmap.size], 0,
2461 sizeof(crush_choose_arg) * (new_size - cmap.size));
2462 cmap.size = new_size;
2463 }
2464 } else {
2465 cmap.args = static_cast<crush_choose_arg*>(calloc(sizeof(crush_choose_arg),
2466 new_size));
2467 ceph_assert(cmap.args);
2468 cmap.size = new_size;
2469 }
2470 if (size > 0) {
2471 int positions = get_choose_args_positions(cmap);
2472 crush_choose_arg& carg = cmap.args[pos];
2473 carg.weight_set = static_cast<crush_weight_set*>(calloc(sizeof(crush_weight_set),
2474 size));
2475 carg.weight_set_positions = positions;
2476 for (int ppos = 0; ppos < positions; ++ppos) {
2477 carg.weight_set[ppos].weights = (__u32*)calloc(sizeof(__u32), size);
2478 carg.weight_set[ppos].size = size;
2479 for (int bpos = 0; bpos < size; ++bpos) {
2480 carg.weight_set[ppos].weights[bpos] = weights[bpos];
2481 }
2482 }
2483 }
2484 assert(crush->max_buckets == (int)cmap.size);
2485 }
2486 return r;
2487 }
2488
2489 int CrushWrapper::bucket_add_item(crush_bucket *bucket, int item, int weight)
2490 {
2491 __u32 new_size = bucket->size + 1;
2492 int r = crush_bucket_add_item(crush, bucket, item, weight);
2493 if (r < 0) {
2494 return r;
2495 }
2496 for (auto &w : choose_args) {
2497 crush_choose_arg_map &arg_map = w.second;
2498 crush_choose_arg *arg = &arg_map.args[-1-bucket->id];
2499 for (__u32 j = 0; j < arg->weight_set_positions; j++) {
2500 crush_weight_set *weight_set = &arg->weight_set[j];
2501 weight_set->weights = (__u32*)realloc(weight_set->weights,
2502 new_size * sizeof(__u32));
2503 ceph_assert(weight_set->size + 1 == new_size);
2504 weight_set->weights[weight_set->size] = weight;
2505 weight_set->size = new_size;
2506 }
2507 if (arg->ids_size) {
2508 arg->ids = (__s32 *)realloc(arg->ids, new_size * sizeof(__s32));
2509 ceph_assert(arg->ids_size + 1 == new_size);
2510 arg->ids[arg->ids_size] = item;
2511 arg->ids_size = new_size;
2512 }
2513 }
2514 return 0;
2515 }
2516
2517 int CrushWrapper::bucket_remove_item(crush_bucket *bucket, int item)
2518 {
2519 __u32 new_size = bucket->size - 1;
2520 unsigned position;
2521 for (position = 0; position < bucket->size; position++)
2522 if (bucket->items[position] == item)
2523 break;
2524 ceph_assert(position != bucket->size);
2525 int r = crush_bucket_remove_item(crush, bucket, item);
2526 if (r < 0) {
2527 return r;
2528 }
2529 for (auto &w : choose_args) {
2530 crush_choose_arg_map &arg_map = w.second;
2531 crush_choose_arg *arg = &arg_map.args[-1-bucket->id];
2532 for (__u32 j = 0; j < arg->weight_set_positions; j++) {
2533 crush_weight_set *weight_set = &arg->weight_set[j];
2534 ceph_assert(weight_set->size - 1 == new_size);
2535 for (__u32 k = position; k < new_size; k++)
2536 weight_set->weights[k] = weight_set->weights[k+1];
2537 if (new_size) {
2538 weight_set->weights = (__u32*)realloc(weight_set->weights,
2539 new_size * sizeof(__u32));
2540 } else {
2541 free(weight_set->weights);
2542 weight_set->weights = NULL;
2543 }
2544 weight_set->size = new_size;
2545 }
2546 if (arg->ids_size) {
2547 ceph_assert(arg->ids_size - 1 == new_size);
2548 for (__u32 k = position; k < new_size; k++)
2549 arg->ids[k] = arg->ids[k+1];
2550 if (new_size) {
2551 arg->ids = (__s32 *)realloc(arg->ids, new_size * sizeof(__s32));
2552 } else {
2553 free(arg->ids);
2554 arg->ids = NULL;
2555 }
2556 arg->ids_size = new_size;
2557 }
2558 }
2559 return 0;
2560 }
2561
2562 int CrushWrapper::bucket_set_alg(int bid, int alg)
2563 {
2564 crush_bucket *b = get_bucket(bid);
2565 if (!b) {
2566 return -ENOENT;
2567 }
2568 b->alg = alg;
2569 return 0;
2570 }
2571
2572 int CrushWrapper::update_device_class(int id,
2573 const string& class_name,
2574 const string& name,
2575 ostream *ss)
2576 {
2577 ceph_assert(item_exists(id));
2578 auto old_class_name = get_item_class(id);
2579 if (old_class_name && old_class_name != class_name) {
2580 *ss << "osd." << id << " has already bound to class '" << old_class_name
2581 << "', can not reset class to '" << class_name << "'; "
2582 << "use 'ceph osd crush rm-device-class <id>' to "
2583 << "remove old class first";
2584 return -EBUSY;
2585 }
2586
2587 int class_id = get_or_create_class_id(class_name);
2588 if (id < 0) {
2589 *ss << name << " id " << id << " is negative";
2590 return -EINVAL;
2591 }
2592
2593 if (class_map.count(id) != 0 && class_map[id] == class_id) {
2594 *ss << name << " already set to class " << class_name << ". ";
2595 return 0;
2596 }
2597
2598 set_item_class(id, class_id);
2599
2600 int r = rebuild_roots_with_classes(nullptr);
2601 if (r < 0)
2602 return r;
2603 return 1;
2604 }
2605
2606 int CrushWrapper::remove_device_class(CephContext *cct, int id, ostream *ss)
2607 {
2608 ceph_assert(ss);
2609 const char *name = get_item_name(id);
2610 if (!name) {
2611 *ss << "osd." << id << " does not have a name";
2612 return -ENOENT;
2613 }
2614
2615 const char *class_name = get_item_class(id);
2616 if (!class_name) {
2617 *ss << "osd." << id << " has not been bound to a specific class yet";
2618 return 0;
2619 }
2620 class_remove_item(id);
2621
2622 int r = rebuild_roots_with_classes(cct);
2623 if (r < 0) {
2624 *ss << "unable to rebuild roots with class '" << class_name << "' "
2625 << "of osd." << id << ": " << cpp_strerror(r);
2626 return r;
2627 }
2628 return 0;
2629 }
2630
2631 int CrushWrapper::device_class_clone(
2632 int original_id, int device_class,
2633 const std::map<int32_t, map<int32_t, int32_t>>& old_class_bucket,
2634 const std::set<int32_t>& used_ids,
2635 int *clone,
2636 map<int,map<int,vector<int>>> *cmap_item_weight)
2637 {
2638 const char *item_name = get_item_name(original_id);
2639 if (item_name == NULL)
2640 return -ECHILD;
2641 const char *class_name = get_class_name(device_class);
2642 if (class_name == NULL)
2643 return -EBADF;
2644 string copy_name = item_name + string("~") + class_name;
2645 if (name_exists(copy_name)) {
2646 *clone = get_item_id(copy_name);
2647 return 0;
2648 }
2649
2650 crush_bucket *original = get_bucket(original_id);
2651 ceph_assert(!IS_ERR(original));
2652 crush_bucket *copy = crush_make_bucket(crush,
2653 original->alg,
2654 original->hash,
2655 original->type,
2656 0, NULL, NULL);
2657 ceph_assert(copy);
2658
2659 vector<unsigned> item_orig_pos; // new item pos -> orig item pos
2660 for (unsigned i = 0; i < original->size; i++) {
2661 int item = original->items[i];
2662 int weight = crush_get_bucket_item_weight(original, i);
2663 if (item >= 0) {
2664 if (class_map.count(item) != 0 && class_map[item] == device_class) {
2665 int res = crush_bucket_add_item(crush, copy, item, weight);
2666 if (res)
2667 return res;
2668 } else {
2669 continue;
2670 }
2671 } else {
2672 int child_copy_id;
2673 int res = device_class_clone(item, device_class, old_class_bucket,
2674 used_ids, &child_copy_id,
2675 cmap_item_weight);
2676 if (res < 0)
2677 return res;
2678 crush_bucket *child_copy = get_bucket(child_copy_id);
2679 ceph_assert(!IS_ERR(child_copy));
2680 res = crush_bucket_add_item(crush, copy, child_copy_id,
2681 child_copy->weight);
2682 if (res)
2683 return res;
2684 }
2685 item_orig_pos.push_back(i);
2686 }
2687 ceph_assert(item_orig_pos.size() == copy->size);
2688
2689 int bno = 0;
2690 if (old_class_bucket.count(original_id) &&
2691 old_class_bucket.at(original_id).count(device_class)) {
2692 bno = old_class_bucket.at(original_id).at(device_class);
2693 } else {
2694 // pick a new shadow bucket id that is not used by the current map
2695 // *or* any previous shadow buckets.
2696 bno = -1;
2697 while (((-1-bno) < crush->max_buckets && crush->buckets[-1-bno]) ||
2698 used_ids.count(bno)) {
2699 --bno;
2700 }
2701 }
2702 int res = crush_add_bucket(crush, bno, copy, clone);
2703 if (res)
2704 return res;
2705 ceph_assert(!bno || bno == *clone);
2706
2707 res = set_item_class(*clone, device_class);
2708 if (res < 0)
2709 return res;
2710
2711 // we do not use set_item_name because the name is intentionally invalid
2712 name_map[*clone] = copy_name;
2713 if (have_rmaps)
2714 name_rmap[copy_name] = *clone;
2715 class_bucket[original_id][device_class] = *clone;
2716
2717 // set up choose_args for the new bucket.
2718 for (auto& w : choose_args) {
2719 crush_choose_arg_map& cmap = w.second;
2720 if (crush->max_buckets > (int)cmap.size) {
2721 unsigned new_size = crush->max_buckets;
2722 cmap.args = static_cast<crush_choose_arg*>(realloc(cmap.args,
2723 new_size * sizeof(cmap.args[0])));
2724 ceph_assert(cmap.args);
2725 memset(cmap.args + cmap.size, 0,
2726 (new_size - cmap.size) * sizeof(cmap.args[0]));
2727 cmap.size = new_size;
2728 }
2729 auto& o = cmap.args[-1-original_id];
2730 auto& n = cmap.args[-1-bno];
2731 n.ids_size = 0; // FIXME: implement me someday
2732 n.weight_set_positions = o.weight_set_positions;
2733 n.weight_set = static_cast<crush_weight_set*>(calloc(
2734 n.weight_set_positions, sizeof(crush_weight_set)));
2735 for (size_t s = 0; s < n.weight_set_positions; ++s) {
2736 n.weight_set[s].size = copy->size;
2737 n.weight_set[s].weights = (__u32*)calloc(copy->size, sizeof(__u32));
2738 }
2739 for (size_t s = 0; s < n.weight_set_positions; ++s) {
2740 vector<int> bucket_weights(n.weight_set_positions);
2741 for (size_t i = 0; i < copy->size; ++i) {
2742 int item = copy->items[i];
2743 if (item >= 0) {
2744 n.weight_set[s].weights[i] = o.weight_set[s].weights[item_orig_pos[i]];
2745 } else if ((*cmap_item_weight)[w.first].count(item)) {
2746 n.weight_set[s].weights[i] = (*cmap_item_weight)[w.first][item][s];
2747 } else {
2748 n.weight_set[s].weights[i] = 0;
2749 }
2750 bucket_weights[s] += n.weight_set[s].weights[i];
2751 }
2752 (*cmap_item_weight)[w.first][bno] = bucket_weights;
2753 }
2754 }
2755 return 0;
2756 }
2757
2758 int CrushWrapper::get_rules_by_class(const string &class_name, set<int> *rules)
2759 {
2760 ceph_assert(rules);
2761 rules->clear();
2762 if (!class_exists(class_name)) {
2763 return -ENOENT;
2764 }
2765 int class_id = get_class_id(class_name);
2766 for (unsigned i = 0; i < crush->max_rules; ++i) {
2767 crush_rule *r = crush->rules[i];
2768 if (!r)
2769 continue;
2770 for (unsigned j = 0; j < r->len; ++j) {
2771 if (r->steps[j].op == CRUSH_RULE_TAKE) {
2772 int step_item = r->steps[j].arg1;
2773 int original_item;
2774 int c;
2775 int res = split_id_class(step_item, &original_item, &c);
2776 if (res < 0) {
2777 return res;
2778 }
2779 if (c != -1 && c == class_id) {
2780 rules->insert(i);
2781 break;
2782 }
2783 }
2784 }
2785 }
2786 return 0;
2787 }
2788
2789 // return rules that might reference the given osd
2790 int CrushWrapper::get_rules_by_osd(int osd, set<int> *rules)
2791 {
2792 ceph_assert(rules);
2793 rules->clear();
2794 if (osd < 0) {
2795 return -EINVAL;
2796 }
2797 for (unsigned i = 0; i < crush->max_rules; ++i) {
2798 crush_rule *r = crush->rules[i];
2799 if (!r)
2800 continue;
2801 for (unsigned j = 0; j < r->len; ++j) {
2802 if (r->steps[j].op == CRUSH_RULE_TAKE) {
2803 int step_item = r->steps[j].arg1;
2804 list<int> unordered;
2805 int rc = _get_leaves(step_item, &unordered);
2806 if (rc < 0) {
2807 return rc; // propagate fatal errors!
2808 }
2809 bool match = false;
2810 for (auto &o: unordered) {
2811 ceph_assert(o >= 0);
2812 if (o == osd) {
2813 match = true;
2814 break;
2815 }
2816 }
2817 if (match) {
2818 rules->insert(i);
2819 break;
2820 }
2821 }
2822 }
2823 }
2824 return 0;
2825 }
2826
2827 bool CrushWrapper::_class_is_dead(int class_id)
2828 {
2829 for (auto &p: class_map) {
2830 if (p.first >= 0 && p.second == class_id) {
2831 return false;
2832 }
2833 }
2834 for (unsigned i = 0; i < crush->max_rules; ++i) {
2835 crush_rule *r = crush->rules[i];
2836 if (!r)
2837 continue;
2838 for (unsigned j = 0; j < r->len; ++j) {
2839 if (r->steps[j].op == CRUSH_RULE_TAKE) {
2840 int root = r->steps[j].arg1;
2841 for (auto &p : class_bucket) {
2842 auto& q = p.second;
2843 if (q.count(class_id) && q[class_id] == root) {
2844 return false;
2845 }
2846 }
2847 }
2848 }
2849 }
2850 // no more referenced by any devices or crush rules
2851 return true;
2852 }
2853
2854 void CrushWrapper::cleanup_dead_classes()
2855 {
2856 auto p = class_name.begin();
2857 while (p != class_name.end()) {
2858 if (_class_is_dead(p->first)) {
2859 string n = p->second;
2860 ++p;
2861 remove_class_name(n);
2862 } else {
2863 ++p;
2864 }
2865 }
2866 }
2867
2868 int CrushWrapper::rebuild_roots_with_classes(CephContext *cct)
2869 {
2870 std::map<int32_t, map<int32_t, int32_t> > old_class_bucket = class_bucket;
2871 cleanup_dead_classes();
2872 int r = trim_roots_with_class(cct);
2873 if (r < 0)
2874 return r;
2875 class_bucket.clear();
2876 return populate_classes(old_class_bucket);
2877 }
2878
2879 void CrushWrapper::encode(bufferlist& bl, uint64_t features) const
2880 {
2881 using ceph::encode;
2882 ceph_assert(crush);
2883
2884 __u32 magic = CRUSH_MAGIC;
2885 encode(magic, bl);
2886
2887 encode(crush->max_buckets, bl);
2888 encode(crush->max_rules, bl);
2889 encode(crush->max_devices, bl);
2890
2891 bool encode_compat_choose_args = false;
2892 crush_choose_arg_map arg_map;
2893 memset(&arg_map, '\0', sizeof(arg_map));
2894 if (has_choose_args() &&
2895 !HAVE_FEATURE(features, CRUSH_CHOOSE_ARGS)) {
2896 ceph_assert(!has_incompat_choose_args());
2897 encode_compat_choose_args = true;
2898 arg_map = choose_args.begin()->second;
2899 }
2900
2901 // buckets
2902 for (int i=0; i<crush->max_buckets; i++) {
2903 __u32 alg = 0;
2904 if (crush->buckets[i]) alg = crush->buckets[i]->alg;
2905 encode(alg, bl);
2906 if (!alg)
2907 continue;
2908
2909 encode(crush->buckets[i]->id, bl);
2910 encode(crush->buckets[i]->type, bl);
2911 encode(crush->buckets[i]->alg, bl);
2912 encode(crush->buckets[i]->hash, bl);
2913 encode(crush->buckets[i]->weight, bl);
2914 encode(crush->buckets[i]->size, bl);
2915 for (unsigned j=0; j<crush->buckets[i]->size; j++)
2916 encode(crush->buckets[i]->items[j], bl);
2917
2918 switch (crush->buckets[i]->alg) {
2919 case CRUSH_BUCKET_UNIFORM:
2920 encode((reinterpret_cast<crush_bucket_uniform*>(crush->buckets[i]))->item_weight, bl);
2921 break;
2922
2923 case CRUSH_BUCKET_LIST:
2924 for (unsigned j=0; j<crush->buckets[i]->size; j++) {
2925 encode((reinterpret_cast<crush_bucket_list*>(crush->buckets[i]))->item_weights[j], bl);
2926 encode((reinterpret_cast<crush_bucket_list*>(crush->buckets[i]))->sum_weights[j], bl);
2927 }
2928 break;
2929
2930 case CRUSH_BUCKET_TREE:
2931 encode((reinterpret_cast<crush_bucket_tree*>(crush->buckets[i]))->num_nodes, bl);
2932 for (unsigned j=0; j<(reinterpret_cast<crush_bucket_tree*>(crush->buckets[i]))->num_nodes; j++)
2933 encode((reinterpret_cast<crush_bucket_tree*>(crush->buckets[i]))->node_weights[j], bl);
2934 break;
2935
2936 case CRUSH_BUCKET_STRAW:
2937 for (unsigned j=0; j<crush->buckets[i]->size; j++) {
2938 encode((reinterpret_cast<crush_bucket_straw*>(crush->buckets[i]))->item_weights[j], bl);
2939 encode((reinterpret_cast<crush_bucket_straw*>(crush->buckets[i]))->straws[j], bl);
2940 }
2941 break;
2942
2943 case CRUSH_BUCKET_STRAW2:
2944 {
2945 __u32 *weights;
2946 if (encode_compat_choose_args &&
2947 arg_map.args[i].weight_set_positions > 0) {
2948 weights = arg_map.args[i].weight_set[0].weights;
2949 } else {
2950 weights = (reinterpret_cast<crush_bucket_straw2*>(crush->buckets[i]))->item_weights;
2951 }
2952 for (unsigned j=0; j<crush->buckets[i]->size; j++) {
2953 encode(weights[j], bl);
2954 }
2955 }
2956 break;
2957
2958 default:
2959 ceph_abort();
2960 break;
2961 }
2962 }
2963
2964 // rules
2965 for (unsigned i=0; i<crush->max_rules; i++) {
2966 __u32 yes = crush->rules[i] ? 1:0;
2967 encode(yes, bl);
2968 if (!yes)
2969 continue;
2970
2971 encode(crush->rules[i]->len, bl);
2972 encode(crush->rules[i]->mask, bl);
2973 for (unsigned j=0; j<crush->rules[i]->len; j++)
2974 encode(crush->rules[i]->steps[j], bl);
2975 }
2976
2977 // name info
2978 encode(type_map, bl);
2979 encode(name_map, bl);
2980 encode(rule_name_map, bl);
2981
2982 // tunables
2983 encode(crush->choose_local_tries, bl);
2984 encode(crush->choose_local_fallback_tries, bl);
2985 encode(crush->choose_total_tries, bl);
2986 encode(crush->chooseleaf_descend_once, bl);
2987 encode(crush->chooseleaf_vary_r, bl);
2988 encode(crush->straw_calc_version, bl);
2989 encode(crush->allowed_bucket_algs, bl);
2990 if (features & CEPH_FEATURE_CRUSH_TUNABLES5) {
2991 encode(crush->chooseleaf_stable, bl);
2992 }
2993
2994 if (HAVE_FEATURE(features, SERVER_LUMINOUS)) {
2995 // device classes
2996 encode(class_map, bl);
2997 encode(class_name, bl);
2998 encode(class_bucket, bl);
2999
3000 // choose args
3001 __u32 size = (__u32)choose_args.size();
3002 encode(size, bl);
3003 for (auto c : choose_args) {
3004 encode(c.first, bl);
3005 crush_choose_arg_map arg_map = c.second;
3006 size = 0;
3007 for (__u32 i = 0; i < arg_map.size; i++) {
3008 crush_choose_arg *arg = &arg_map.args[i];
3009 if (arg->weight_set_positions == 0 &&
3010 arg->ids_size == 0)
3011 continue;
3012 size++;
3013 }
3014 encode(size, bl);
3015 for (__u32 i = 0; i < arg_map.size; i++) {
3016 crush_choose_arg *arg = &arg_map.args[i];
3017 if (arg->weight_set_positions == 0 &&
3018 arg->ids_size == 0)
3019 continue;
3020 encode(i, bl);
3021 encode(arg->weight_set_positions, bl);
3022 for (__u32 j = 0; j < arg->weight_set_positions; j++) {
3023 crush_weight_set *weight_set = &arg->weight_set[j];
3024 encode(weight_set->size, bl);
3025 for (__u32 k = 0; k < weight_set->size; k++)
3026 encode(weight_set->weights[k], bl);
3027 }
3028 encode(arg->ids_size, bl);
3029 for (__u32 j = 0; j < arg->ids_size; j++)
3030 encode(arg->ids[j], bl);
3031 }
3032 }
3033 }
3034 }
3035
3036 static void decode_32_or_64_string_map(map<int32_t,string>& m, bufferlist::const_iterator& blp)
3037 {
3038 m.clear();
3039 __u32 n;
3040 decode(n, blp);
3041 while (n--) {
3042 __s32 key;
3043 decode(key, blp);
3044
3045 __u32 strlen;
3046 decode(strlen, blp);
3047 if (strlen == 0) {
3048 // der, key was actually 64-bits!
3049 decode(strlen, blp);
3050 }
3051 decode_nohead(strlen, m[key], blp);
3052 }
3053 }
3054
3055 void CrushWrapper::decode(bufferlist::const_iterator& blp)
3056 {
3057 using ceph::decode;
3058 create();
3059
3060 __u32 magic;
3061 decode(magic, blp);
3062 if (magic != CRUSH_MAGIC)
3063 throw buffer::malformed_input("bad magic number");
3064
3065 decode(crush->max_buckets, blp);
3066 decode(crush->max_rules, blp);
3067 decode(crush->max_devices, blp);
3068
3069 // legacy tunables, unless we decode something newer
3070 set_tunables_legacy();
3071
3072 try {
3073 // buckets
3074 crush->buckets = (crush_bucket**)calloc(1, crush->max_buckets * sizeof(crush_bucket*));
3075 for (int i=0; i<crush->max_buckets; i++) {
3076 decode_crush_bucket(&crush->buckets[i], blp);
3077 }
3078
3079 // rules
3080 crush->rules = (crush_rule**)calloc(1, crush->max_rules * sizeof(crush_rule*));
3081 for (unsigned i = 0; i < crush->max_rules; ++i) {
3082 __u32 yes;
3083 decode(yes, blp);
3084 if (!yes) {
3085 crush->rules[i] = NULL;
3086 continue;
3087 }
3088
3089 __u32 len;
3090 decode(len, blp);
3091 crush->rules[i] = reinterpret_cast<crush_rule*>(calloc(1, crush_rule_size(len)));
3092 crush->rules[i]->len = len;
3093 decode(crush->rules[i]->mask, blp);
3094 for (unsigned j=0; j<crush->rules[i]->len; j++)
3095 decode(crush->rules[i]->steps[j], blp);
3096 }
3097
3098 // name info
3099 // NOTE: we had a bug where we were incoding int instead of int32, which means the
3100 // 'key' field for these maps may be either 32 or 64 bits, depending. tolerate
3101 // both by assuming the string is always non-empty.
3102 decode_32_or_64_string_map(type_map, blp);
3103 decode_32_or_64_string_map(name_map, blp);
3104 decode_32_or_64_string_map(rule_name_map, blp);
3105
3106 // tunables
3107 if (!blp.end()) {
3108 decode(crush->choose_local_tries, blp);
3109 decode(crush->choose_local_fallback_tries, blp);
3110 decode(crush->choose_total_tries, blp);
3111 }
3112 if (!blp.end()) {
3113 decode(crush->chooseleaf_descend_once, blp);
3114 }
3115 if (!blp.end()) {
3116 decode(crush->chooseleaf_vary_r, blp);
3117 }
3118 if (!blp.end()) {
3119 decode(crush->straw_calc_version, blp);
3120 }
3121 if (!blp.end()) {
3122 decode(crush->allowed_bucket_algs, blp);
3123 }
3124 if (!blp.end()) {
3125 decode(crush->chooseleaf_stable, blp);
3126 }
3127 if (!blp.end()) {
3128 decode(class_map, blp);
3129 decode(class_name, blp);
3130 for (auto &c : class_name)
3131 class_rname[c.second] = c.first;
3132 decode(class_bucket, blp);
3133 }
3134 if (!blp.end()) {
3135 __u32 choose_args_size;
3136 decode(choose_args_size, blp);
3137 for (__u32 i = 0; i < choose_args_size; i++) {
3138 typename decltype(choose_args)::key_type choose_args_index;
3139 decode(choose_args_index, blp);
3140 crush_choose_arg_map arg_map;
3141 arg_map.size = crush->max_buckets;
3142 arg_map.args = static_cast<crush_choose_arg*>(calloc(
3143 arg_map.size, sizeof(crush_choose_arg)));
3144 __u32 size;
3145 decode(size, blp);
3146 for (__u32 j = 0; j < size; j++) {
3147 __u32 bucket_index;
3148 decode(bucket_index, blp);
3149 ceph_assert(bucket_index < arg_map.size);
3150 crush_choose_arg *arg = &arg_map.args[bucket_index];
3151 decode(arg->weight_set_positions, blp);
3152 if (arg->weight_set_positions) {
3153 arg->weight_set = static_cast<crush_weight_set*>(calloc(
3154 arg->weight_set_positions, sizeof(crush_weight_set)));
3155 for (__u32 k = 0; k < arg->weight_set_positions; k++) {
3156 crush_weight_set *weight_set = &arg->weight_set[k];
3157 decode(weight_set->size, blp);
3158 weight_set->weights = (__u32*)calloc(
3159 weight_set->size, sizeof(__u32));
3160 for (__u32 l = 0; l < weight_set->size; l++)
3161 decode(weight_set->weights[l], blp);
3162 }
3163 }
3164 decode(arg->ids_size, blp);
3165 if (arg->ids_size) {
3166 ceph_assert(arg->ids_size == crush->buckets[bucket_index]->size);
3167 arg->ids = (__s32 *)calloc(arg->ids_size, sizeof(__s32));
3168 for (__u32 k = 0; k < arg->ids_size; k++)
3169 decode(arg->ids[k], blp);
3170 }
3171 }
3172 choose_args[choose_args_index] = arg_map;
3173 }
3174 }
3175 update_choose_args(nullptr); // in case we decode a legacy "corrupted" map
3176 finalize();
3177 }
3178 catch (...) {
3179 crush_destroy(crush);
3180 throw;
3181 }
3182 }
3183
3184 void CrushWrapper::decode_crush_bucket(crush_bucket** bptr, bufferlist::const_iterator &blp)
3185 {
3186 using ceph::decode;
3187 __u32 alg;
3188 decode(alg, blp);
3189 if (!alg) {
3190 *bptr = NULL;
3191 return;
3192 }
3193
3194 int size = 0;
3195 switch (alg) {
3196 case CRUSH_BUCKET_UNIFORM:
3197 size = sizeof(crush_bucket_uniform);
3198 break;
3199 case CRUSH_BUCKET_LIST:
3200 size = sizeof(crush_bucket_list);
3201 break;
3202 case CRUSH_BUCKET_TREE:
3203 size = sizeof(crush_bucket_tree);
3204 break;
3205 case CRUSH_BUCKET_STRAW:
3206 size = sizeof(crush_bucket_straw);
3207 break;
3208 case CRUSH_BUCKET_STRAW2:
3209 size = sizeof(crush_bucket_straw2);
3210 break;
3211 default:
3212 {
3213 char str[128];
3214 snprintf(str, sizeof(str), "unsupported bucket algorithm: %d", alg);
3215 throw buffer::malformed_input(str);
3216 }
3217 }
3218 crush_bucket *bucket = reinterpret_cast<crush_bucket*>(calloc(1, size));
3219 *bptr = bucket;
3220
3221 decode(bucket->id, blp);
3222 decode(bucket->type, blp);
3223 decode(bucket->alg, blp);
3224 decode(bucket->hash, blp);
3225 decode(bucket->weight, blp);
3226 decode(bucket->size, blp);
3227
3228 bucket->items = (__s32*)calloc(1, bucket->size * sizeof(__s32));
3229 for (unsigned j = 0; j < bucket->size; ++j) {
3230 decode(bucket->items[j], blp);
3231 }
3232
3233 switch (bucket->alg) {
3234 case CRUSH_BUCKET_UNIFORM:
3235 decode((reinterpret_cast<crush_bucket_uniform*>(bucket))->item_weight, blp);
3236 break;
3237
3238 case CRUSH_BUCKET_LIST: {
3239 crush_bucket_list* cbl = reinterpret_cast<crush_bucket_list*>(bucket);
3240 cbl->item_weights = (__u32*)calloc(1, bucket->size * sizeof(__u32));
3241 cbl->sum_weights = (__u32*)calloc(1, bucket->size * sizeof(__u32));
3242
3243 for (unsigned j = 0; j < bucket->size; ++j) {
3244 decode(cbl->item_weights[j], blp);
3245 decode(cbl->sum_weights[j], blp);
3246 }
3247 break;
3248 }
3249
3250 case CRUSH_BUCKET_TREE: {
3251 crush_bucket_tree* cbt = reinterpret_cast<crush_bucket_tree*>(bucket);
3252 decode(cbt->num_nodes, blp);
3253 cbt->node_weights = (__u32*)calloc(1, cbt->num_nodes * sizeof(__u32));
3254 for (unsigned j=0; j<cbt->num_nodes; j++) {
3255 decode(cbt->node_weights[j], blp);
3256 }
3257 break;
3258 }
3259
3260 case CRUSH_BUCKET_STRAW: {
3261 crush_bucket_straw* cbs = reinterpret_cast<crush_bucket_straw*>(bucket);
3262 cbs->straws = (__u32*)calloc(1, bucket->size * sizeof(__u32));
3263 cbs->item_weights = (__u32*)calloc(1, bucket->size * sizeof(__u32));
3264 for (unsigned j = 0; j < bucket->size; ++j) {
3265 decode(cbs->item_weights[j], blp);
3266 decode(cbs->straws[j], blp);
3267 }
3268 break;
3269 }
3270
3271 case CRUSH_BUCKET_STRAW2: {
3272 crush_bucket_straw2* cbs = reinterpret_cast<crush_bucket_straw2*>(bucket);
3273 cbs->item_weights = (__u32*)calloc(1, bucket->size * sizeof(__u32));
3274 for (unsigned j = 0; j < bucket->size; ++j) {
3275 decode(cbs->item_weights[j], blp);
3276 }
3277 break;
3278 }
3279
3280 default:
3281 // We should have handled this case in the first switch statement
3282 ceph_abort();
3283 break;
3284 }
3285 }
3286
3287
3288 void CrushWrapper::dump(Formatter *f) const
3289 {
3290 f->open_array_section("devices");
3291 for (int i=0; i<get_max_devices(); i++) {
3292 f->open_object_section("device");
3293 f->dump_int("id", i);
3294 const char *n = get_item_name(i);
3295 if (n) {
3296 f->dump_string("name", n);
3297 } else {
3298 char name[20];
3299 sprintf(name, "device%d", i);
3300 f->dump_string("name", name);
3301 }
3302 const char *device_class = get_item_class(i);
3303 if (device_class != NULL)
3304 f->dump_string("class", device_class);
3305 f->close_section();
3306 }
3307 f->close_section();
3308
3309 f->open_array_section("types");
3310 int n = get_num_type_names();
3311 for (int i=0; n; i++) {
3312 const char *name = get_type_name(i);
3313 if (!name) {
3314 if (i == 0) {
3315 f->open_object_section("type");
3316 f->dump_int("type_id", 0);
3317 f->dump_string("name", "device");
3318 f->close_section();
3319 }
3320 continue;
3321 }
3322 n--;
3323 f->open_object_section("type");
3324 f->dump_int("type_id", i);
3325 f->dump_string("name", name);
3326 f->close_section();
3327 }
3328 f->close_section();
3329
3330 f->open_array_section("buckets");
3331 for (int bucket = -1; bucket > -1-get_max_buckets(); --bucket) {
3332 if (!bucket_exists(bucket))
3333 continue;
3334 f->open_object_section("bucket");
3335 f->dump_int("id", bucket);
3336 if (get_item_name(bucket))
3337 f->dump_string("name", get_item_name(bucket));
3338 f->dump_int("type_id", get_bucket_type(bucket));
3339 if (get_type_name(get_bucket_type(bucket)))
3340 f->dump_string("type_name", get_type_name(get_bucket_type(bucket)));
3341 f->dump_int("weight", get_bucket_weight(bucket));
3342 f->dump_string("alg", crush_bucket_alg_name(get_bucket_alg(bucket)));
3343 f->dump_string("hash", crush_hash_name(get_bucket_hash(bucket)));
3344 f->open_array_section("items");
3345 for (int j=0; j<get_bucket_size(bucket); j++) {
3346 f->open_object_section("item");
3347 f->dump_int("id", get_bucket_item(bucket, j));
3348 f->dump_int("weight", get_bucket_item_weight(bucket, j));
3349 f->dump_int("pos", j);
3350 f->close_section();
3351 }
3352 f->close_section();
3353 f->close_section();
3354 }
3355 f->close_section();
3356
3357 f->open_array_section("rules");
3358 dump_rules(f);
3359 f->close_section();
3360
3361 f->open_object_section("tunables");
3362 dump_tunables(f);
3363 f->close_section();
3364
3365 dump_choose_args(f);
3366 }
3367
3368 namespace {
3369 // depth first walker
3370 class TreeDumper {
3371 typedef CrushTreeDumper::Item Item;
3372 const CrushWrapper *crush;
3373 const CrushTreeDumper::name_map_t& weight_set_names;
3374 public:
3375 explicit TreeDumper(const CrushWrapper *crush,
3376 const CrushTreeDumper::name_map_t& wsnames)
3377 : crush(crush), weight_set_names(wsnames) {}
3378
3379 void dump(Formatter *f) {
3380 set<int> roots;
3381 crush->find_roots(&roots);
3382 for (set<int>::iterator root = roots.begin(); root != roots.end(); ++root) {
3383 dump_item(Item(*root, 0, 0, crush->get_bucket_weightf(*root)), f);
3384 }
3385 }
3386
3387 private:
3388 void dump_item(const Item& qi, Formatter* f) {
3389 if (qi.is_bucket()) {
3390 f->open_object_section("bucket");
3391 CrushTreeDumper::dump_item_fields(crush, weight_set_names, qi, f);
3392 dump_bucket_children(qi, f);
3393 f->close_section();
3394 } else {
3395 f->open_object_section("device");
3396 CrushTreeDumper::dump_item_fields(crush, weight_set_names, qi, f);
3397 f->close_section();
3398 }
3399 }
3400
3401 void dump_bucket_children(const Item& parent, Formatter* f) {
3402 f->open_array_section("items");
3403 const int max_pos = crush->get_bucket_size(parent.id);
3404 for (int pos = 0; pos < max_pos; pos++) {
3405 int id = crush->get_bucket_item(parent.id, pos);
3406 float weight = crush->get_bucket_item_weightf(parent.id, pos);
3407 dump_item(Item(id, parent.id, parent.depth + 1, weight), f);
3408 }
3409 f->close_section();
3410 }
3411 };
3412 }
3413
3414 void CrushWrapper::dump_tree(
3415 Formatter *f,
3416 const CrushTreeDumper::name_map_t& weight_set_names) const
3417 {
3418 ceph_assert(f);
3419 TreeDumper(this, weight_set_names).dump(f);
3420 }
3421
3422 void CrushWrapper::dump_tunables(Formatter *f) const
3423 {
3424 f->dump_int("choose_local_tries", get_choose_local_tries());
3425 f->dump_int("choose_local_fallback_tries", get_choose_local_fallback_tries());
3426 f->dump_int("choose_total_tries", get_choose_total_tries());
3427 f->dump_int("chooseleaf_descend_once", get_chooseleaf_descend_once());
3428 f->dump_int("chooseleaf_vary_r", get_chooseleaf_vary_r());
3429 f->dump_int("chooseleaf_stable", get_chooseleaf_stable());
3430 f->dump_int("straw_calc_version", get_straw_calc_version());
3431 f->dump_int("allowed_bucket_algs", get_allowed_bucket_algs());
3432
3433 // be helpful about it
3434 if (has_jewel_tunables())
3435 f->dump_string("profile", "jewel");
3436 else if (has_hammer_tunables())
3437 f->dump_string("profile", "hammer");
3438 else if (has_firefly_tunables())
3439 f->dump_string("profile", "firefly");
3440 else if (has_bobtail_tunables())
3441 f->dump_string("profile", "bobtail");
3442 else if (has_argonaut_tunables())
3443 f->dump_string("profile", "argonaut");
3444 else
3445 f->dump_string("profile", "unknown");
3446 f->dump_int("optimal_tunables", (int)has_optimal_tunables());
3447 f->dump_int("legacy_tunables", (int)has_legacy_tunables());
3448
3449 // be helpful about minimum version required
3450 f->dump_string("minimum_required_version", get_min_required_version());
3451
3452 f->dump_int("require_feature_tunables", (int)has_nondefault_tunables());
3453 f->dump_int("require_feature_tunables2", (int)has_nondefault_tunables2());
3454 f->dump_int("has_v2_rules", (int)has_v2_rules());
3455 f->dump_int("require_feature_tunables3", (int)has_nondefault_tunables3());
3456 f->dump_int("has_v3_rules", (int)has_v3_rules());
3457 f->dump_int("has_v4_buckets", (int)has_v4_buckets());
3458 f->dump_int("require_feature_tunables5", (int)has_nondefault_tunables5());
3459 f->dump_int("has_v5_rules", (int)has_v5_rules());
3460 }
3461
3462 void CrushWrapper::dump_choose_args(Formatter *f) const
3463 {
3464 f->open_object_section("choose_args");
3465 for (auto c : choose_args) {
3466 crush_choose_arg_map arg_map = c.second;
3467 f->open_array_section(stringify(c.first).c_str());
3468 for (__u32 i = 0; i < arg_map.size; i++) {
3469 crush_choose_arg *arg = &arg_map.args[i];
3470 if (arg->weight_set_positions == 0 &&
3471 arg->ids_size == 0)
3472 continue;
3473 f->open_object_section("choose_args");
3474 int bucket_index = i;
3475 f->dump_int("bucket_id", -1-bucket_index);
3476 if (arg->weight_set_positions > 0) {
3477 f->open_array_section("weight_set");
3478 for (__u32 j = 0; j < arg->weight_set_positions; j++) {
3479 f->open_array_section("weights");
3480 __u32 *weights = arg->weight_set[j].weights;
3481 __u32 size = arg->weight_set[j].size;
3482 for (__u32 k = 0; k < size; k++) {
3483 f->dump_float("weight", (float)weights[k]/(float)0x10000);
3484 }
3485 f->close_section();
3486 }
3487 f->close_section();
3488 }
3489 if (arg->ids_size > 0) {
3490 f->open_array_section("ids");
3491 for (__u32 j = 0; j < arg->ids_size; j++)
3492 f->dump_int("id", arg->ids[j]);
3493 f->close_section();
3494 }
3495 f->close_section();
3496 }
3497 f->close_section();
3498 }
3499 f->close_section();
3500 }
3501
3502 void CrushWrapper::dump_rules(Formatter *f) const
3503 {
3504 for (int i=0; i<get_max_rules(); i++) {
3505 if (!rule_exists(i))
3506 continue;
3507 dump_rule(i, f);
3508 }
3509 }
3510
3511 void CrushWrapper::dump_rule(int ruleset, Formatter *f) const
3512 {
3513 f->open_object_section("rule");
3514 f->dump_int("rule_id", ruleset);
3515 if (get_rule_name(ruleset))
3516 f->dump_string("rule_name", get_rule_name(ruleset));
3517 f->dump_int("ruleset", get_rule_mask_ruleset(ruleset));
3518 f->dump_int("type", get_rule_mask_type(ruleset));
3519 f->dump_int("min_size", get_rule_mask_min_size(ruleset));
3520 f->dump_int("max_size", get_rule_mask_max_size(ruleset));
3521 f->open_array_section("steps");
3522 for (int j=0; j<get_rule_len(ruleset); j++) {
3523 f->open_object_section("step");
3524 switch (get_rule_op(ruleset, j)) {
3525 case CRUSH_RULE_NOOP:
3526 f->dump_string("op", "noop");
3527 break;
3528 case CRUSH_RULE_TAKE:
3529 f->dump_string("op", "take");
3530 {
3531 int item = get_rule_arg1(ruleset, j);
3532 f->dump_int("item", item);
3533
3534 const char *name = get_item_name(item);
3535 f->dump_string("item_name", name ? name : "");
3536 }
3537 break;
3538 case CRUSH_RULE_EMIT:
3539 f->dump_string("op", "emit");
3540 break;
3541 case CRUSH_RULE_CHOOSE_FIRSTN:
3542 f->dump_string("op", "choose_firstn");
3543 f->dump_int("num", get_rule_arg1(ruleset, j));
3544 f->dump_string("type", get_type_name(get_rule_arg2(ruleset, j)));
3545 break;
3546 case CRUSH_RULE_CHOOSE_INDEP:
3547 f->dump_string("op", "choose_indep");
3548 f->dump_int("num", get_rule_arg1(ruleset, j));
3549 f->dump_string("type", get_type_name(get_rule_arg2(ruleset, j)));
3550 break;
3551 case CRUSH_RULE_CHOOSELEAF_FIRSTN:
3552 f->dump_string("op", "chooseleaf_firstn");
3553 f->dump_int("num", get_rule_arg1(ruleset, j));
3554 f->dump_string("type", get_type_name(get_rule_arg2(ruleset, j)));
3555 break;
3556 case CRUSH_RULE_CHOOSELEAF_INDEP:
3557 f->dump_string("op", "chooseleaf_indep");
3558 f->dump_int("num", get_rule_arg1(ruleset, j));
3559 f->dump_string("type", get_type_name(get_rule_arg2(ruleset, j)));
3560 break;
3561 case CRUSH_RULE_SET_CHOOSE_TRIES:
3562 f->dump_string("op", "set_choose_tries");
3563 f->dump_int("num", get_rule_arg1(ruleset, j));
3564 break;
3565 case CRUSH_RULE_SET_CHOOSELEAF_TRIES:
3566 f->dump_string("op", "set_chooseleaf_tries");
3567 f->dump_int("num", get_rule_arg1(ruleset, j));
3568 break;
3569 default:
3570 f->dump_int("opcode", get_rule_op(ruleset, j));
3571 f->dump_int("arg1", get_rule_arg1(ruleset, j));
3572 f->dump_int("arg2", get_rule_arg2(ruleset, j));
3573 }
3574 f->close_section();
3575 }
3576 f->close_section();
3577 f->close_section();
3578 }
3579
3580 void CrushWrapper::list_rules(Formatter *f) const
3581 {
3582 for (int rule = 0; rule < get_max_rules(); rule++) {
3583 if (!rule_exists(rule))
3584 continue;
3585 f->dump_string("name", get_rule_name(rule));
3586 }
3587 }
3588
3589 void CrushWrapper::list_rules(ostream *ss) const
3590 {
3591 for (int rule = 0; rule < get_max_rules(); rule++) {
3592 if (!rule_exists(rule))
3593 continue;
3594 *ss << get_rule_name(rule) << "\n";
3595 }
3596 }
3597
3598 class CrushTreePlainDumper : public CrushTreeDumper::Dumper<TextTable> {
3599 public:
3600 typedef CrushTreeDumper::Dumper<TextTable> Parent;
3601
3602 explicit CrushTreePlainDumper(const CrushWrapper *crush,
3603 const CrushTreeDumper::name_map_t& wsnames)
3604 : Parent(crush, wsnames) {}
3605 explicit CrushTreePlainDumper(const CrushWrapper *crush,
3606 const CrushTreeDumper::name_map_t& wsnames,
3607 bool show_shadow)
3608 : Parent(crush, wsnames, show_shadow) {}
3609
3610
3611 void dump(TextTable *tbl) {
3612 tbl->define_column("ID", TextTable::LEFT, TextTable::RIGHT);
3613 tbl->define_column("CLASS", TextTable::LEFT, TextTable::RIGHT);
3614 tbl->define_column("WEIGHT", TextTable::LEFT, TextTable::RIGHT);
3615 for (auto& p : crush->choose_args) {
3616 if (p.first == CrushWrapper::DEFAULT_CHOOSE_ARGS) {
3617 tbl->define_column("(compat)", TextTable::LEFT, TextTable::RIGHT);
3618 } else {
3619 string name;
3620 auto q = weight_set_names.find(p.first);
3621 name = q != weight_set_names.end() ? q->second :
3622 stringify(p.first);
3623 tbl->define_column(name.c_str(), TextTable::LEFT, TextTable::RIGHT);
3624 }
3625 }
3626 tbl->define_column("TYPE NAME", TextTable::LEFT, TextTable::LEFT);
3627 Parent::dump(tbl);
3628 }
3629
3630 protected:
3631 void dump_item(const CrushTreeDumper::Item &qi, TextTable *tbl) override {
3632 const char *c = crush->get_item_class(qi.id);
3633 if (!c)
3634 c = "";
3635 *tbl << qi.id
3636 << c
3637 << weightf_t(qi.weight);
3638 for (auto& p : crush->choose_args) {
3639 if (qi.parent < 0) {
3640 const crush_choose_arg_map cmap = crush->choose_args_get(p.first);
3641 int bidx = -1 - qi.parent;
3642 const crush_bucket *b = crush->get_bucket(qi.parent);
3643 if (b &&
3644 bidx < (int)cmap.size &&
3645 cmap.args[bidx].weight_set &&
3646 cmap.args[bidx].weight_set_positions >= 1) {
3647 int pos;
3648 for (pos = 0;
3649 pos < (int)cmap.args[bidx].weight_set[0].size &&
3650 b->items[pos] != qi.id;
3651 ++pos) ;
3652 *tbl << weightf_t((float)cmap.args[bidx].weight_set[0].weights[pos] /
3653 (float)0x10000);
3654 continue;
3655 }
3656 }
3657 *tbl << "";
3658 }
3659 ostringstream ss;
3660 for (int k=0; k < qi.depth; k++) {
3661 ss << " ";
3662 }
3663 if (qi.is_bucket()) {
3664 ss << crush->get_type_name(crush->get_bucket_type(qi.id)) << " "
3665 << crush->get_item_name(qi.id);
3666 } else {
3667 ss << "osd." << qi.id;
3668 }
3669 *tbl << ss.str();
3670 *tbl << TextTable::endrow;
3671 }
3672 };
3673
3674
3675 class CrushTreeFormattingDumper : public CrushTreeDumper::FormattingDumper {
3676 public:
3677 typedef CrushTreeDumper::FormattingDumper Parent;
3678
3679 explicit CrushTreeFormattingDumper(
3680 const CrushWrapper *crush,
3681 const CrushTreeDumper::name_map_t& wsnames)
3682 : Parent(crush, wsnames) {}
3683
3684 explicit CrushTreeFormattingDumper(
3685 const CrushWrapper *crush,
3686 const CrushTreeDumper::name_map_t& wsnames,
3687 bool show_shadow)
3688 : Parent(crush, wsnames, show_shadow) {}
3689
3690 void dump(Formatter *f) {
3691 f->open_array_section("nodes");
3692 Parent::dump(f);
3693 f->close_section();
3694
3695 // There is no stray bucket whose id is a negative number, so just get
3696 // the max_id and iterate from 0 to max_id to dump stray osds.
3697 f->open_array_section("stray");
3698 int32_t max_id = -1;
3699 if (!crush->name_map.empty()) {
3700 max_id = crush->name_map.rbegin()->first;
3701 }
3702 for (int32_t i = 0; i <= max_id; i++) {
3703 if (crush->item_exists(i) && !is_touched(i) && should_dump(i)) {
3704 dump_item(CrushTreeDumper::Item(i, 0, 0, 0), f);
3705 }
3706 }
3707 f->close_section();
3708 }
3709 };
3710
3711
3712 void CrushWrapper::dump_tree(
3713 ostream *out,
3714 Formatter *f,
3715 const CrushTreeDumper::name_map_t& weight_set_names,
3716 bool show_shadow) const
3717 {
3718 if (out) {
3719 TextTable tbl;
3720 CrushTreePlainDumper(this, weight_set_names, show_shadow).dump(&tbl);
3721 *out << tbl;
3722 }
3723 if (f) {
3724 CrushTreeFormattingDumper(this, weight_set_names, show_shadow).dump(f);
3725 }
3726 }
3727
3728 void CrushWrapper::generate_test_instances(list<CrushWrapper*>& o)
3729 {
3730 o.push_back(new CrushWrapper);
3731 // fixme
3732 }
3733
3734 /**
3735 * Determine the default CRUSH ruleset ID to be used with
3736 * newly created replicated pools.
3737 *
3738 * @returns a ruleset ID (>=0) or -1 if no suitable ruleset found
3739 */
3740 int CrushWrapper::get_osd_pool_default_crush_replicated_ruleset(CephContext *cct)
3741 {
3742 int crush_ruleset = cct->_conf.get_val<int64_t>("osd_pool_default_crush_rule");
3743 if (crush_ruleset < 0) {
3744 crush_ruleset = find_first_ruleset(pg_pool_t::TYPE_REPLICATED);
3745 } else if (!ruleset_exists(crush_ruleset)) {
3746 crush_ruleset = -1; // match find_first_ruleset() retval
3747 }
3748 return crush_ruleset;
3749 }
3750
3751 bool CrushWrapper::is_valid_crush_name(const string& s)
3752 {
3753 if (s.empty())
3754 return false;
3755 for (string::const_iterator p = s.begin(); p != s.end(); ++p) {
3756 if (!(*p == '-') &&
3757 !(*p == '_') &&
3758 !(*p == '.') &&
3759 !(*p >= '0' && *p <= '9') &&
3760 !(*p >= 'A' && *p <= 'Z') &&
3761 !(*p >= 'a' && *p <= 'z'))
3762 return false;
3763 }
3764 return true;
3765 }
3766
3767 bool CrushWrapper::is_valid_crush_loc(CephContext *cct,
3768 const map<string,string>& loc)
3769 {
3770 for (map<string,string>::const_iterator l = loc.begin(); l != loc.end(); ++l) {
3771 if (!is_valid_crush_name(l->first) ||
3772 !is_valid_crush_name(l->second)) {
3773 ldout(cct, 1) << "loc["
3774 << l->first << "] = '"
3775 << l->second << "' not a valid crush name ([A-Za-z0-9_-.]+)"
3776 << dendl;
3777 return false;
3778 }
3779 }
3780 return true;
3781 }
3782
3783 int CrushWrapper::_choose_type_stack(
3784 CephContext *cct,
3785 const vector<pair<int,int>>& stack,
3786 const set<int>& overfull,
3787 const vector<int>& underfull,
3788 const vector<int>& more_underfull,
3789 const vector<int>& orig,
3790 vector<int>::const_iterator& i,
3791 set<int>& used,
3792 vector<int> *pw,
3793 int root_bucket,
3794 int rule) const
3795 {
3796 vector<int> w = *pw;
3797 vector<int> o;
3798
3799 ldout(cct, 10) << __func__ << " stack " << stack
3800 << " orig " << orig
3801 << " at " << *i
3802 << " pw " << *pw
3803 << dendl;
3804 ceph_assert(root_bucket < 0);
3805 vector<int> cumulative_fanout(stack.size());
3806 int f = 1;
3807 for (int j = (int)stack.size() - 1; j >= 0; --j) {
3808 cumulative_fanout[j] = f;
3809 f *= stack[j].second;
3810 }
3811 ldout(cct, 10) << __func__ << " cumulative_fanout " << cumulative_fanout
3812 << dendl;
3813
3814 // identify underfull targets for each intermediate level.
3815 // this serves two purposes:
3816 // 1. we can tell when we are selecting a bucket that does not have any underfull
3817 // devices beneath it. that means that if the current input includes an overfull
3818 // device, we won't be able to find an underfull device with this parent to
3819 // swap for it.
3820 // 2. when we decide we should reject a bucket due to the above, this list gives us
3821 // a list of peers to consider that *do* have underfull devices available.. (we
3822 // are careful to pick one that has the same parent.)
3823 vector<set<int>> underfull_buckets; // level -> set of buckets with >0 underfull item(s)
3824 underfull_buckets.resize(stack.size() - 1);
3825 for (auto osd : underfull) {
3826 int item = osd;
3827 for (int j = (int)stack.size() - 2; j >= 0; --j) {
3828 int type = stack[j].first;
3829 item = get_parent_of_type(item, type, rule);
3830 ldout(cct, 10) << __func__ << " underfull " << osd << " type " << type
3831 << " is " << item << dendl;
3832 if (!subtree_contains(root_bucket, item)) {
3833 ldout(cct, 20) << __func__ << " not in root subtree " << root_bucket << dendl;
3834 continue;
3835 }
3836 underfull_buckets[j].insert(item);
3837 }
3838 }
3839 ldout(cct, 20) << __func__ << " underfull_buckets " << underfull_buckets << dendl;
3840
3841 for (unsigned j = 0; j < stack.size(); ++j) {
3842 int type = stack[j].first;
3843 int fanout = stack[j].second;
3844 int cum_fanout = cumulative_fanout[j];
3845 ldout(cct, 10) << " level " << j << ": type " << type << " fanout " << fanout
3846 << " cumulative " << cum_fanout
3847 << " w " << w << dendl;
3848 vector<int> o;
3849 auto tmpi = i;
3850 if (i == orig.end()) {
3851 ldout(cct, 10) << __func__ << " end of orig, break 0" << dendl;
3852 break;
3853 }
3854 for (auto from : w) {
3855 ldout(cct, 10) << " from " << from << dendl;
3856 // identify leaves under each choice. we use this to check whether any of these
3857 // leaves are overfull. (if so, we need to make sure there are underfull candidates
3858 // to swap for them.)
3859 vector<set<int>> leaves;
3860 leaves.resize(fanout);
3861 for (int pos = 0; pos < fanout; ++pos) {
3862 if (type > 0) {
3863 // non-leaf
3864 int item = get_parent_of_type(*tmpi, type, rule);
3865 o.push_back(item);
3866 int n = cum_fanout;
3867 while (n-- && tmpi != orig.end()) {
3868 leaves[pos].insert(*tmpi++);
3869 }
3870 ldout(cct, 10) << __func__ << " from " << *tmpi << " got " << item
3871 << " of type " << type << " over leaves " << leaves[pos] << dendl;
3872 } else {
3873 // leaf
3874 bool replaced = false;
3875 if (overfull.count(*i)) {
3876 for (auto item : underfull) {
3877 ldout(cct, 10) << __func__ << " pos " << pos
3878 << " was " << *i << " considering " << item
3879 << dendl;
3880 if (used.count(item)) {
3881 ldout(cct, 20) << __func__ << " in used " << used << dendl;
3882 continue;
3883 }
3884 if (!subtree_contains(from, item)) {
3885 ldout(cct, 20) << __func__ << " not in subtree " << from << dendl;
3886 continue;
3887 }
3888 if (std::find(orig.begin(), orig.end(), item) != orig.end()) {
3889 ldout(cct, 20) << __func__ << " in orig " << orig << dendl;
3890 continue;
3891 }
3892 o.push_back(item);
3893 used.insert(item);
3894 ldout(cct, 10) << __func__ << " pos " << pos << " replace "
3895 << *i << " -> " << item << dendl;
3896 replaced = true;
3897 ceph_assert(i != orig.end());
3898 ++i;
3899 break;
3900 }
3901 if (!replaced) {
3902 for (auto item : more_underfull) {
3903 ldout(cct, 10) << __func__ << " more underfull pos " << pos
3904 << " was " << *i << " considering " << item
3905 << dendl;
3906 if (used.count(item)) {
3907 ldout(cct, 20) << __func__ << " in used " << used << dendl;
3908 continue;
3909 }
3910 if (!subtree_contains(from, item)) {
3911 ldout(cct, 20) << __func__ << " not in subtree " << from << dendl;
3912 continue;
3913 }
3914 if (std::find(orig.begin(), orig.end(), item) != orig.end()) {
3915 ldout(cct, 20) << __func__ << " in orig " << orig << dendl;
3916 continue;
3917 }
3918 o.push_back(item);
3919 used.insert(item);
3920 ldout(cct, 10) << __func__ << " pos " << pos << " replace "
3921 << *i << " -> " << item << dendl;
3922 replaced = true;
3923 assert(i != orig.end());
3924 ++i;
3925 break;
3926 }
3927 }
3928 }
3929 if (!replaced) {
3930 ldout(cct, 10) << __func__ << " pos " << pos << " keep " << *i
3931 << dendl;
3932 ceph_assert(i != orig.end());
3933 o.push_back(*i);
3934 ++i;
3935 }
3936 if (i == orig.end()) {
3937 ldout(cct, 10) << __func__ << " end of orig, break 1" << dendl;
3938 break;
3939 }
3940 }
3941 }
3942 if (j + 1 < stack.size()) {
3943 // check if any buckets have overfull leaves but no underfull candidates
3944 for (int pos = 0; pos < fanout; ++pos) {
3945 if (underfull_buckets[j].count(o[pos]) == 0) {
3946 // are any leaves overfull?
3947 bool any_overfull = false;
3948 for (auto osd : leaves[pos]) {
3949 if (overfull.count(osd)) {
3950 any_overfull = true;
3951 break;
3952 }
3953 }
3954 if (any_overfull) {
3955 ldout(cct, 10) << " bucket " << o[pos] << " has no underfull targets and "
3956 << ">0 leaves " << leaves[pos] << " is overfull; alts "
3957 << underfull_buckets[j]
3958 << dendl;
3959 for (auto alt : underfull_buckets[j]) {
3960 if (std::find(o.begin(), o.end(), alt) == o.end()) {
3961 // see if alt has the same parent
3962 if (j == 0 ||
3963 get_parent_of_type(o[pos], stack[j-1].first, rule) ==
3964 get_parent_of_type(alt, stack[j-1].first, rule)) {
3965 if (j)
3966 ldout(cct, 10) << " replacing " << o[pos]
3967 << " (which has no underfull leaves) with " << alt
3968 << " (same parent "
3969 << get_parent_of_type(alt, stack[j-1].first, rule) << " type "
3970 << type << ")" << dendl;
3971 else
3972 ldout(cct, 10) << " replacing " << o[pos]
3973 << " (which has no underfull leaves) with " << alt
3974 << " (first level)" << dendl;
3975 o[pos] = alt;
3976 break;
3977 } else {
3978 ldout(cct, 30) << " alt " << alt << " for " << o[pos]
3979 << " has different parent, skipping" << dendl;
3980 }
3981 }
3982 }
3983 }
3984 }
3985 }
3986 }
3987 if (i == orig.end()) {
3988 ldout(cct, 10) << __func__ << " end of orig, break 2" << dendl;
3989 break;
3990 }
3991 }
3992 ldout(cct, 10) << __func__ << " w <- " << o << " was " << w << dendl;
3993 w.swap(o);
3994 }
3995 *pw = w;
3996 return 0;
3997 }
3998
3999 int CrushWrapper::try_remap_rule(
4000 CephContext *cct,
4001 int ruleno,
4002 int maxout,
4003 const set<int>& overfull,
4004 const vector<int>& underfull,
4005 const vector<int>& more_underfull,
4006 const vector<int>& orig,
4007 vector<int> *out) const
4008 {
4009 const crush_map *map = crush;
4010 const crush_rule *rule = get_rule(ruleno);
4011 ceph_assert(rule);
4012
4013 ldout(cct, 10) << __func__ << " ruleno " << ruleno
4014 << " numrep " << maxout << " overfull " << overfull
4015 << " underfull " << underfull
4016 << " more_underfull " << more_underfull
4017 << " orig " << orig
4018 << dendl;
4019 vector<int> w; // working set
4020 out->clear();
4021
4022 auto i = orig.begin();
4023 set<int> used;
4024
4025 vector<pair<int,int>> type_stack; // (type, fan-out)
4026 int root_bucket = 0;
4027 for (unsigned step = 0; step < rule->len; ++step) {
4028 const crush_rule_step *curstep = &rule->steps[step];
4029 ldout(cct, 10) << __func__ << " step " << step << " w " << w << dendl;
4030 switch (curstep->op) {
4031 case CRUSH_RULE_TAKE:
4032 if ((curstep->arg1 >= 0 && curstep->arg1 < map->max_devices) ||
4033 (-1-curstep->arg1 >= 0 && -1-curstep->arg1 < map->max_buckets &&
4034 map->buckets[-1-curstep->arg1])) {
4035 w.clear();
4036 w.push_back(curstep->arg1);
4037 root_bucket = curstep->arg1;
4038 ldout(cct, 10) << __func__ << " take " << w << dendl;
4039 } else {
4040 ldout(cct, 1) << " bad take value " << curstep->arg1 << dendl;
4041 }
4042 break;
4043
4044 case CRUSH_RULE_CHOOSELEAF_FIRSTN:
4045 case CRUSH_RULE_CHOOSELEAF_INDEP:
4046 {
4047 int numrep = curstep->arg1;
4048 int type = curstep->arg2;
4049 if (numrep <= 0)
4050 numrep += maxout;
4051 type_stack.push_back(make_pair(type, numrep));
4052 if (type > 0)
4053 type_stack.push_back(make_pair(0, 1));
4054 int r = _choose_type_stack(cct, type_stack, overfull, underfull, more_underfull, orig,
4055 i, used, &w, root_bucket, ruleno);
4056 if (r < 0)
4057 return r;
4058 type_stack.clear();
4059 }
4060 break;
4061
4062 case CRUSH_RULE_CHOOSE_FIRSTN:
4063 case CRUSH_RULE_CHOOSE_INDEP:
4064 {
4065 int numrep = curstep->arg1;
4066 int type = curstep->arg2;
4067 if (numrep <= 0)
4068 numrep += maxout;
4069 type_stack.push_back(make_pair(type, numrep));
4070 }
4071 break;
4072
4073 case CRUSH_RULE_EMIT:
4074 ldout(cct, 10) << " emit " << w << dendl;
4075 if (!type_stack.empty()) {
4076 int r = _choose_type_stack(cct, type_stack, overfull, underfull, more_underfull, orig,
4077 i, used, &w, root_bucket, ruleno);
4078 if (r < 0)
4079 return r;
4080 type_stack.clear();
4081 }
4082 for (auto item : w) {
4083 out->push_back(item);
4084 }
4085 w.clear();
4086 break;
4087
4088 default:
4089 // ignore
4090 break;
4091 }
4092 }
4093
4094 return 0;
4095 }
4096
4097
4098 int CrushWrapper::_choose_args_adjust_item_weight_in_bucket(
4099 CephContext *cct,
4100 crush_choose_arg_map cmap,
4101 int bucketid,
4102 int id,
4103 const vector<int>& weight,
4104 ostream *ss)
4105 {
4106 int changed = 0;
4107 int bidx = -1 - bucketid;
4108 crush_bucket *b = crush->buckets[bidx];
4109 if (bidx >= (int)cmap.size) {
4110 if (ss)
4111 *ss << "no weight-set for bucket " << b->id;
4112 ldout(cct, 10) << __func__ << " no crush_choose_arg for bucket " << b->id
4113 << dendl;
4114 return 0;
4115 }
4116 crush_choose_arg *carg = &cmap.args[bidx];
4117 if (carg->weight_set == NULL) {
4118 // create a weight-set for this bucket and populate it with the
4119 // bucket weights
4120 unsigned positions = get_choose_args_positions(cmap);
4121 carg->weight_set_positions = positions;
4122 carg->weight_set = static_cast<crush_weight_set*>(
4123 calloc(sizeof(crush_weight_set), positions));
4124 for (unsigned p = 0; p < positions; ++p) {
4125 carg->weight_set[p].size = b->size;
4126 carg->weight_set[p].weights = (__u32*)calloc(b->size, sizeof(__u32));
4127 for (unsigned i = 0; i < b->size; ++i) {
4128 carg->weight_set[p].weights[i] = crush_get_bucket_item_weight(b, i);
4129 }
4130 }
4131 changed++;
4132 }
4133 if (carg->weight_set_positions != weight.size()) {
4134 if (ss)
4135 *ss << "weight_set_positions != " << weight.size() << " for bucket " << b->id;
4136 ldout(cct, 10) << __func__ << " weight_set_positions != " << weight.size()
4137 << " for bucket " << b->id << dendl;
4138 return 0;
4139 }
4140 for (unsigned i = 0; i < b->size; i++) {
4141 if (b->items[i] == id) {
4142 for (unsigned j = 0; j < weight.size(); ++j) {
4143 carg->weight_set[j].weights[i] = weight[j];
4144 }
4145 ldout(cct, 5) << __func__ << " set " << id << " to " << weight
4146 << " in bucket " << b->id << dendl;
4147 changed++;
4148 }
4149 }
4150 if (changed) {
4151 vector<int> bucket_weight(weight.size(), 0);
4152 for (unsigned i = 0; i < b->size; i++) {
4153 for (unsigned j = 0; j < weight.size(); ++j) {
4154 bucket_weight[j] += carg->weight_set[j].weights[i];
4155 }
4156 }
4157 choose_args_adjust_item_weight(cct, cmap, b->id, bucket_weight, nullptr);
4158 }
4159 return changed;
4160 }
4161
4162 int CrushWrapper::choose_args_adjust_item_weight(
4163 CephContext *cct,
4164 crush_choose_arg_map cmap,
4165 int id,
4166 const vector<int>& weight,
4167 ostream *ss)
4168 {
4169 ldout(cct, 5) << __func__ << " " << id << " weight " << weight << dendl;
4170 int changed = 0;
4171 for (int bidx = 0; bidx < crush->max_buckets; bidx++) {
4172 crush_bucket *b = crush->buckets[bidx];
4173 if (b == nullptr) {
4174 continue;
4175 }
4176 changed += _choose_args_adjust_item_weight_in_bucket(
4177 cct, cmap, b->id, id, weight, ss);
4178 }
4179 if (!changed) {
4180 if (ss)
4181 *ss << "item " << id << " not found in crush map";
4182 return -ENOENT;
4183 }
4184 return changed;
4185 }