]> git.proxmox.com Git - ceph.git/blob - ceph/src/crush/CrushWrapper.cc
import ceph 15.2.13
[ceph.git] / ceph / src / crush / CrushWrapper.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include "osd/osd_types.h"
5 #include "common/debug.h"
6 #include "common/Formatter.h"
7 #include "common/errno.h"
8 #include "common/TextTable.h"
9 #include "include/stringify.h"
10
11 #include "CrushWrapper.h"
12 #include "CrushTreeDumper.h"
13
14 #define dout_subsys ceph_subsys_crush
15
16 using std::cout;
17 using std::list;
18 using std::map;
19 using std::make_pair;
20 using std::ostream;
21 using std::ostringstream;
22 using std::pair;
23 using std::set;
24 using std::string;
25 using std::vector;
26
27 using ceph::bufferlist;
28 using ceph::decode;
29 using ceph::decode_nohead;
30 using ceph::encode;
31 using ceph::Formatter;
32
33 bool CrushWrapper::has_legacy_rule_ids() const
34 {
35 for (unsigned i=0; i<crush->max_rules; i++) {
36 crush_rule *r = crush->rules[i];
37 if (r &&
38 r->mask.ruleset != i) {
39 return true;
40 }
41 }
42 return false;
43 }
44
45 std::map<int, int> CrushWrapper::renumber_rules()
46 {
47 std::map<int, int> result;
48 for (unsigned i=0; i<crush->max_rules; i++) {
49 crush_rule *r = crush->rules[i];
50 if (r && r->mask.ruleset != i) {
51 result[r->mask.ruleset] = i;
52 r->mask.ruleset = i;
53 }
54 }
55 return result;
56 }
57
58 bool CrushWrapper::has_non_straw2_buckets() const
59 {
60 for (int i=0; i<crush->max_buckets; ++i) {
61 crush_bucket *b = crush->buckets[i];
62 if (!b)
63 continue;
64 if (b->alg != CRUSH_BUCKET_STRAW2)
65 return true;
66 }
67 return false;
68 }
69
70 bool CrushWrapper::has_v2_rules() const
71 {
72 for (unsigned i=0; i<crush->max_rules; i++) {
73 if (is_v2_rule(i)) {
74 return true;
75 }
76 }
77 return false;
78 }
79
80 bool CrushWrapper::is_v2_rule(unsigned ruleid) const
81 {
82 // check rule for use of indep or new SET_* rule steps
83 if (ruleid >= crush->max_rules)
84 return false;
85 crush_rule *r = crush->rules[ruleid];
86 if (!r)
87 return false;
88 for (unsigned j=0; j<r->len; j++) {
89 if (r->steps[j].op == CRUSH_RULE_CHOOSE_INDEP ||
90 r->steps[j].op == CRUSH_RULE_CHOOSELEAF_INDEP ||
91 r->steps[j].op == CRUSH_RULE_SET_CHOOSE_TRIES ||
92 r->steps[j].op == CRUSH_RULE_SET_CHOOSELEAF_TRIES) {
93 return true;
94 }
95 }
96 return false;
97 }
98
99 bool CrushWrapper::has_v3_rules() const
100 {
101 for (unsigned i=0; i<crush->max_rules; i++) {
102 if (is_v3_rule(i)) {
103 return true;
104 }
105 }
106 return false;
107 }
108
109 bool CrushWrapper::is_v3_rule(unsigned ruleid) const
110 {
111 // check rule for use of SET_CHOOSELEAF_VARY_R step
112 if (ruleid >= crush->max_rules)
113 return false;
114 crush_rule *r = crush->rules[ruleid];
115 if (!r)
116 return false;
117 for (unsigned j=0; j<r->len; j++) {
118 if (r->steps[j].op == CRUSH_RULE_SET_CHOOSELEAF_VARY_R) {
119 return true;
120 }
121 }
122 return false;
123 }
124
125 bool CrushWrapper::has_v4_buckets() const
126 {
127 for (int i=0; i<crush->max_buckets; ++i) {
128 crush_bucket *b = crush->buckets[i];
129 if (!b)
130 continue;
131 if (b->alg == CRUSH_BUCKET_STRAW2)
132 return true;
133 }
134 return false;
135 }
136
137 bool CrushWrapper::has_v5_rules() const
138 {
139 for (unsigned i=0; i<crush->max_rules; i++) {
140 if (is_v5_rule(i)) {
141 return true;
142 }
143 }
144 return false;
145 }
146
147 bool CrushWrapper::is_v5_rule(unsigned ruleid) const
148 {
149 // check rule for use of SET_CHOOSELEAF_STABLE step
150 if (ruleid >= crush->max_rules)
151 return false;
152 crush_rule *r = crush->rules[ruleid];
153 if (!r)
154 return false;
155 for (unsigned j=0; j<r->len; j++) {
156 if (r->steps[j].op == CRUSH_RULE_SET_CHOOSELEAF_STABLE) {
157 return true;
158 }
159 }
160 return false;
161 }
162
163 bool CrushWrapper::has_choose_args() const
164 {
165 return !choose_args.empty();
166 }
167
168 bool CrushWrapper::has_incompat_choose_args() const
169 {
170 if (choose_args.empty())
171 return false;
172 if (choose_args.size() > 1)
173 return true;
174 if (choose_args.begin()->first != DEFAULT_CHOOSE_ARGS)
175 return true;
176 crush_choose_arg_map arg_map = choose_args.begin()->second;
177 for (__u32 i = 0; i < arg_map.size; i++) {
178 crush_choose_arg *arg = &arg_map.args[i];
179 if (arg->weight_set_positions == 0 &&
180 arg->ids_size == 0)
181 continue;
182 if (arg->weight_set_positions != 1)
183 return true;
184 if (arg->ids_size != 0)
185 return true;
186 }
187 return false;
188 }
189
190 int CrushWrapper::split_id_class(int i, int *idout, int *classout) const
191 {
192 if (!item_exists(i))
193 return -EINVAL;
194 string name = get_item_name(i);
195 size_t pos = name.find("~");
196 if (pos == string::npos) {
197 *idout = i;
198 *classout = -1;
199 return 0;
200 }
201 string name_no_class = name.substr(0, pos);
202 if (!name_exists(name_no_class))
203 return -ENOENT;
204 string class_name = name.substr(pos + 1);
205 if (!class_exists(class_name))
206 return -ENOENT;
207 *idout = get_item_id(name_no_class);
208 *classout = get_class_id(class_name);
209 return 0;
210 }
211
212 int CrushWrapper::can_rename_item(const string& srcname,
213 const string& dstname,
214 ostream *ss) const
215 {
216 if (name_exists(srcname)) {
217 if (name_exists(dstname)) {
218 *ss << "dstname = '" << dstname << "' already exists";
219 return -EEXIST;
220 }
221 if (is_valid_crush_name(dstname)) {
222 return 0;
223 } else {
224 *ss << "dstname = '" << dstname << "' does not match [-_.0-9a-zA-Z]+";
225 return -EINVAL;
226 }
227 } else {
228 if (name_exists(dstname)) {
229 *ss << "srcname = '" << srcname << "' does not exist "
230 << "and dstname = '" << dstname << "' already exists";
231 return -EALREADY;
232 } else {
233 *ss << "srcname = '" << srcname << "' does not exist";
234 return -ENOENT;
235 }
236 }
237 }
238
239 int CrushWrapper::rename_item(const string& srcname,
240 const string& dstname,
241 ostream *ss)
242 {
243 int ret = can_rename_item(srcname, dstname, ss);
244 if (ret < 0)
245 return ret;
246 int oldid = get_item_id(srcname);
247 return set_item_name(oldid, dstname);
248 }
249
250 int CrushWrapper::can_rename_bucket(const string& srcname,
251 const string& dstname,
252 ostream *ss) const
253 {
254 int ret = can_rename_item(srcname, dstname, ss);
255 if (ret)
256 return ret;
257 int srcid = get_item_id(srcname);
258 if (srcid >= 0) {
259 *ss << "srcname = '" << srcname << "' is not a bucket "
260 << "because its id = " << srcid << " is >= 0";
261 return -ENOTDIR;
262 }
263 return 0;
264 }
265
266 int CrushWrapper::rename_bucket(const string& srcname,
267 const string& dstname,
268 ostream *ss)
269 {
270 int ret = can_rename_bucket(srcname, dstname, ss);
271 if (ret < 0)
272 return ret;
273 int oldid = get_item_id(srcname);
274 return set_item_name(oldid, dstname);
275 }
276
277 int CrushWrapper::rename_rule(const string& srcname,
278 const string& dstname,
279 ostream *ss)
280 {
281 if (!rule_exists(srcname)) {
282 if (ss) {
283 *ss << "source rule name '" << srcname << "' does not exist";
284 }
285 return -ENOENT;
286 }
287 if (rule_exists(dstname)) {
288 if (ss) {
289 *ss << "destination rule name '" << dstname << "' already exists";
290 }
291 return -EEXIST;
292 }
293 int rule_id = get_rule_id(srcname);
294 auto it = rule_name_map.find(rule_id);
295 ceph_assert(it != rule_name_map.end());
296 it->second = dstname;
297 if (have_rmaps) {
298 rule_name_rmap.erase(srcname);
299 rule_name_rmap[dstname] = rule_id;
300 }
301 return 0;
302 }
303
304 void CrushWrapper::find_takes(set<int> *roots) const
305 {
306 for (unsigned i=0; i<crush->max_rules; i++) {
307 crush_rule *r = crush->rules[i];
308 if (!r)
309 continue;
310 for (unsigned j=0; j<r->len; j++) {
311 if (r->steps[j].op == CRUSH_RULE_TAKE)
312 roots->insert(r->steps[j].arg1);
313 }
314 }
315 }
316
317 void CrushWrapper::find_takes_by_rule(int rule, set<int> *roots) const
318 {
319 if (rule < 0 || rule >= (int)crush->max_rules)
320 return;
321 crush_rule *r = crush->rules[rule];
322 if (!r)
323 return;
324 for (unsigned i = 0; i < r->len; i++) {
325 if (r->steps[i].op == CRUSH_RULE_TAKE)
326 roots->insert(r->steps[i].arg1);
327 }
328 }
329
330 void CrushWrapper::find_roots(set<int> *roots) const
331 {
332 for (int i = 0; i < crush->max_buckets; i++) {
333 if (!crush->buckets[i])
334 continue;
335 crush_bucket *b = crush->buckets[i];
336 if (!_search_item_exists(b->id))
337 roots->insert(b->id);
338 }
339 }
340
341 bool CrushWrapper::subtree_contains(int root, int item) const
342 {
343 if (root == item)
344 return true;
345
346 if (root >= 0)
347 return false; // root is a leaf
348
349 const crush_bucket *b = get_bucket(root);
350 if (IS_ERR(b))
351 return false;
352
353 for (unsigned j=0; j<b->size; j++) {
354 if (subtree_contains(b->items[j], item))
355 return true;
356 }
357 return false;
358 }
359
360 bool CrushWrapper::_maybe_remove_last_instance(CephContext *cct, int item, bool unlink_only)
361 {
362 // last instance?
363 if (_search_item_exists(item)) {
364 return false;
365 }
366 if (item < 0 && _bucket_is_in_use(item)) {
367 return false;
368 }
369
370 if (item < 0 && !unlink_only) {
371 crush_bucket *t = get_bucket(item);
372 ldout(cct, 5) << "_maybe_remove_last_instance removing bucket " << item << dendl;
373 crush_remove_bucket(crush, t);
374 if (class_bucket.count(item) != 0)
375 class_bucket.erase(item);
376 class_remove_item(item);
377 update_choose_args(cct);
378 }
379 if ((item >= 0 || !unlink_only) && name_map.count(item)) {
380 ldout(cct, 5) << "_maybe_remove_last_instance removing name for item " << item << dendl;
381 name_map.erase(item);
382 have_rmaps = false;
383 if (item >= 0 && !unlink_only) {
384 class_remove_item(item);
385 }
386 }
387 rebuild_roots_with_classes(cct);
388 return true;
389 }
390
391 int CrushWrapper::remove_root(CephContext *cct, int item)
392 {
393 crush_bucket *b = get_bucket(item);
394 if (IS_ERR(b)) {
395 // should be idempotent
396 // e.g.: we use 'crush link' to link same host into
397 // different roots, which as a result can cause different
398 // shadow trees reference same hosts too. This means
399 // we may need to destory the same buckets(hosts, racks, etc.)
400 // multiple times during rebuilding all shadow trees.
401 return 0;
402 }
403
404 for (unsigned n = 0; n < b->size; n++) {
405 if (b->items[n] >= 0)
406 continue;
407 int r = remove_root(cct, b->items[n]);
408 if (r < 0)
409 return r;
410 }
411
412 crush_remove_bucket(crush, b);
413 if (name_map.count(item) != 0) {
414 name_map.erase(item);
415 have_rmaps = false;
416 }
417 if (class_bucket.count(item) != 0)
418 class_bucket.erase(item);
419 class_remove_item(item);
420 update_choose_args(cct);
421 return 0;
422 }
423
424 void CrushWrapper::update_choose_args(CephContext *cct)
425 {
426 for (auto& i : choose_args) {
427 crush_choose_arg_map &arg_map = i.second;
428 assert(arg_map.size == (unsigned)crush->max_buckets);
429 unsigned positions = get_choose_args_positions(arg_map);
430 for (int j = 0; j < crush->max_buckets; ++j) {
431 crush_bucket *b = crush->buckets[j];
432 assert(j < (int)arg_map.size);
433 auto& carg = arg_map.args[j];
434 // strip out choose_args for any buckets that no longer exist
435 if (!b || b->alg != CRUSH_BUCKET_STRAW2) {
436 if (carg.ids) {
437 if (cct)
438 ldout(cct,10) << __func__ << " removing " << i.first << " bucket "
439 << (-1-j) << " ids" << dendl;
440 free(carg.ids);
441 carg.ids = 0;
442 carg.ids_size = 0;
443 }
444 if (carg.weight_set) {
445 if (cct)
446 ldout(cct,10) << __func__ << " removing " << i.first << " bucket "
447 << (-1-j) << " weight_sets" << dendl;
448 for (unsigned p = 0; p < carg.weight_set_positions; ++p) {
449 free(carg.weight_set[p].weights);
450 }
451 free(carg.weight_set);
452 carg.weight_set = 0;
453 carg.weight_set_positions = 0;
454 }
455 continue;
456 }
457 if (carg.weight_set_positions == 0) {
458 continue; // skip it
459 }
460 if (carg.weight_set_positions != positions) {
461 if (cct)
462 lderr(cct) << __func__ << " " << i.first << " bucket "
463 << (-1-j) << " positions " << carg.weight_set_positions
464 << " -> " << positions << dendl;
465 continue; // wth... skip!
466 }
467 // mis-sized weight_sets? this shouldn't ever happen.
468 for (unsigned p = 0; p < positions; ++p) {
469 if (carg.weight_set[p].size != b->size) {
470 if (cct)
471 lderr(cct) << __func__ << " fixing " << i.first << " bucket "
472 << (-1-j) << " position " << p
473 << " size " << carg.weight_set[p].size << " -> "
474 << b->size << dendl;
475 auto old_ws = carg.weight_set[p];
476 carg.weight_set[p].size = b->size;
477 carg.weight_set[p].weights = (__u32*)calloc(b->size, sizeof(__u32));
478 auto max = std::min<unsigned>(old_ws.size, b->size);
479 for (unsigned k = 0; k < max; ++k) {
480 carg.weight_set[p].weights[k] = old_ws.weights[k];
481 }
482 free(old_ws.weights);
483 }
484 }
485 }
486 }
487 }
488
489 int CrushWrapper::remove_item(CephContext *cct, int item, bool unlink_only)
490 {
491 ldout(cct, 5) << "remove_item " << item
492 << (unlink_only ? " unlink_only":"") << dendl;
493
494 int ret = -ENOENT;
495
496 if (item < 0 && !unlink_only) {
497 crush_bucket *t = get_bucket(item);
498 if (IS_ERR(t)) {
499 ldout(cct, 1) << "remove_item bucket " << item << " does not exist"
500 << dendl;
501 return -ENOENT;
502 }
503
504 if (t->size) {
505 ldout(cct, 1) << "remove_item bucket " << item << " has " << t->size
506 << " items, not empty" << dendl;
507 return -ENOTEMPTY;
508 }
509 if (_bucket_is_in_use(item)) {
510 return -EBUSY;
511 }
512 }
513
514 for (int i = 0; i < crush->max_buckets; i++) {
515 if (!crush->buckets[i])
516 continue;
517 crush_bucket *b = crush->buckets[i];
518
519 for (unsigned i=0; i<b->size; ++i) {
520 int id = b->items[i];
521 if (id == item) {
522 ldout(cct, 5) << "remove_item removing item " << item
523 << " from bucket " << b->id << dendl;
524 adjust_item_weight_in_bucket(cct, item, 0, b->id, true);
525 bucket_remove_item(b, item);
526 ret = 0;
527 }
528 }
529 }
530
531 if (_maybe_remove_last_instance(cct, item, unlink_only))
532 ret = 0;
533
534 return ret;
535 }
536
537 bool CrushWrapper::_search_item_exists(int item) const
538 {
539 for (int i = 0; i < crush->max_buckets; i++) {
540 if (!crush->buckets[i])
541 continue;
542 crush_bucket *b = crush->buckets[i];
543 for (unsigned j=0; j<b->size; ++j) {
544 if (b->items[j] == item)
545 return true;
546 }
547 }
548 return false;
549 }
550
551 bool CrushWrapper::_bucket_is_in_use(int item)
552 {
553 for (auto &i : class_bucket)
554 for (auto &j : i.second)
555 if (j.second == item)
556 return true;
557 for (unsigned i = 0; i < crush->max_rules; ++i) {
558 crush_rule *r = crush->rules[i];
559 if (!r)
560 continue;
561 for (unsigned j = 0; j < r->len; ++j) {
562 if (r->steps[j].op == CRUSH_RULE_TAKE) {
563 int step_item = r->steps[j].arg1;
564 int original_item;
565 int c;
566 int res = split_id_class(step_item, &original_item, &c);
567 if (res < 0)
568 return false;
569 if (step_item == item || original_item == item)
570 return true;
571 }
572 }
573 }
574 return false;
575 }
576
577 int CrushWrapper::_remove_item_under(
578 CephContext *cct, int item, int ancestor, bool unlink_only)
579 {
580 ldout(cct, 5) << "_remove_item_under " << item << " under " << ancestor
581 << (unlink_only ? " unlink_only":"") << dendl;
582
583 if (ancestor >= 0) {
584 return -EINVAL;
585 }
586
587 if (!bucket_exists(ancestor))
588 return -EINVAL;
589
590 int ret = -ENOENT;
591
592 crush_bucket *b = get_bucket(ancestor);
593 for (unsigned i=0; i<b->size; ++i) {
594 int id = b->items[i];
595 if (id == item) {
596 ldout(cct, 5) << "_remove_item_under removing item " << item
597 << " from bucket " << b->id << dendl;
598 adjust_item_weight_in_bucket(cct, item, 0, b->id, true);
599 bucket_remove_item(b, item);
600 ret = 0;
601 } else if (id < 0) {
602 int r = remove_item_under(cct, item, id, unlink_only);
603 if (r == 0)
604 ret = 0;
605 }
606 }
607 return ret;
608 }
609
610 int CrushWrapper::remove_item_under(
611 CephContext *cct, int item, int ancestor, bool unlink_only)
612 {
613 ldout(cct, 5) << "remove_item_under " << item << " under " << ancestor
614 << (unlink_only ? " unlink_only":"") << dendl;
615
616 if (!unlink_only && _bucket_is_in_use(item)) {
617 return -EBUSY;
618 }
619
620 int ret = _remove_item_under(cct, item, ancestor, unlink_only);
621 if (ret < 0)
622 return ret;
623
624 if (item < 0 && !unlink_only) {
625 crush_bucket *t = get_bucket(item);
626 if (IS_ERR(t)) {
627 ldout(cct, 1) << "remove_item_under bucket " << item
628 << " does not exist" << dendl;
629 return -ENOENT;
630 }
631
632 if (t->size) {
633 ldout(cct, 1) << "remove_item_under bucket " << item << " has " << t->size
634 << " items, not empty" << dendl;
635 return -ENOTEMPTY;
636 }
637 }
638
639 if (_maybe_remove_last_instance(cct, item, unlink_only))
640 ret = 0;
641
642 return ret;
643 }
644
645 int CrushWrapper::get_common_ancestor_distance(CephContext *cct, int id,
646 const std::multimap<string,string>& loc) const
647 {
648 ldout(cct, 5) << __func__ << " " << id << " " << loc << dendl;
649 if (!item_exists(id))
650 return -ENOENT;
651 map<string,string> id_loc = get_full_location(id);
652 ldout(cct, 20) << " id is at " << id_loc << dendl;
653
654 for (map<int,string>::const_iterator p = type_map.begin();
655 p != type_map.end();
656 ++p) {
657 map<string,string>::iterator ip = id_loc.find(p->second);
658 if (ip == id_loc.end())
659 continue;
660 for (std::multimap<string,string>::const_iterator q = loc.find(p->second);
661 q != loc.end();
662 ++q) {
663 if (q->first != p->second)
664 break;
665 if (q->second == ip->second)
666 return p->first;
667 }
668 }
669 return -ERANGE;
670 }
671
672 int CrushWrapper::parse_loc_map(const std::vector<string>& args,
673 std::map<string,string> *ploc)
674 {
675 ploc->clear();
676 for (unsigned i = 0; i < args.size(); ++i) {
677 const char *s = args[i].c_str();
678 const char *pos = strchr(s, '=');
679 if (!pos)
680 return -EINVAL;
681 string key(s, 0, pos-s);
682 string value(pos+1);
683 if (value.length())
684 (*ploc)[key] = value;
685 else
686 return -EINVAL;
687 }
688 return 0;
689 }
690
691 int CrushWrapper::parse_loc_multimap(const std::vector<string>& args,
692 std::multimap<string,string> *ploc)
693 {
694 ploc->clear();
695 for (unsigned i = 0; i < args.size(); ++i) {
696 const char *s = args[i].c_str();
697 const char *pos = strchr(s, '=');
698 if (!pos)
699 return -EINVAL;
700 string key(s, 0, pos-s);
701 string value(pos+1);
702 if (value.length())
703 ploc->insert(make_pair(key, value));
704 else
705 return -EINVAL;
706 }
707 return 0;
708 }
709
710 bool CrushWrapper::check_item_loc(CephContext *cct, int item, const map<string,string>& loc,
711 int *weight)
712 {
713 ldout(cct, 5) << "check_item_loc item " << item << " loc " << loc << dendl;
714
715 for (map<int,string>::const_iterator p = type_map.begin(); p != type_map.end(); ++p) {
716 // ignore device
717 if (p->first == 0)
718 continue;
719
720 // ignore types that aren't specified in loc
721 map<string,string>::const_iterator q = loc.find(p->second);
722 if (q == loc.end()) {
723 ldout(cct, 2) << "warning: did not specify location for '" << p->second << "' level (levels are "
724 << type_map << ")" << dendl;
725 continue;
726 }
727
728 if (!name_exists(q->second)) {
729 ldout(cct, 5) << "check_item_loc bucket " << q->second << " dne" << dendl;
730 return false;
731 }
732
733 int id = get_item_id(q->second);
734 if (id >= 0) {
735 ldout(cct, 5) << "check_item_loc requested " << q->second << " for type " << p->second
736 << " is a device, not bucket" << dendl;
737 return false;
738 }
739
740 ceph_assert(bucket_exists(id));
741 crush_bucket *b = get_bucket(id);
742
743 // see if item exists in this bucket
744 for (unsigned j=0; j<b->size; j++) {
745 if (b->items[j] == item) {
746 ldout(cct, 2) << "check_item_loc " << item << " exists in bucket " << b->id << dendl;
747 if (weight)
748 *weight = crush_get_bucket_item_weight(b, j);
749 return true;
750 }
751 }
752 return false;
753 }
754
755 ldout(cct, 2) << __func__ << " item " << item << " loc " << loc << dendl;
756 return false;
757 }
758
759 map<string, string> CrushWrapper::get_full_location(int id) const
760 {
761 vector<pair<string, string> > full_location_ordered;
762 map<string,string> full_location;
763
764 get_full_location_ordered(id, full_location_ordered);
765
766 std::copy(full_location_ordered.begin(),
767 full_location_ordered.end(),
768 std::inserter(full_location, full_location.begin()));
769
770 return full_location;
771 }
772
773 int CrushWrapper::get_full_location(const string& name,
774 map<string,string> *ploc)
775 {
776 build_rmaps();
777 auto p = name_rmap.find(name);
778 if (p == name_rmap.end()) {
779 return -ENOENT;
780 }
781 *ploc = get_full_location(p->second);
782 return 0;
783 }
784
785 int CrushWrapper::get_full_location_ordered(int id, vector<pair<string, string> >& path) const
786 {
787 if (!item_exists(id))
788 return -ENOENT;
789 int cur = id;
790 int ret;
791 while (true) {
792 pair<string, string> parent_coord = get_immediate_parent(cur, &ret);
793 if (ret != 0)
794 break;
795 path.push_back(parent_coord);
796 cur = get_item_id(parent_coord.second);
797 }
798 return 0;
799 }
800
801 string CrushWrapper::get_full_location_ordered_string(int id) const
802 {
803 vector<pair<string, string> > full_location_ordered;
804 string full_location;
805 get_full_location_ordered(id, full_location_ordered);
806 reverse(begin(full_location_ordered), end(full_location_ordered));
807 for(auto i = full_location_ordered.begin(); i != full_location_ordered.end(); i++) {
808 full_location = full_location + i->first + "=" + i->second;
809 if (i != full_location_ordered.end() - 1) {
810 full_location = full_location + ",";
811 }
812 }
813 return full_location;
814 }
815
816 map<int, string> CrushWrapper::get_parent_hierarchy(int id) const
817 {
818 map<int,string> parent_hierarchy;
819 pair<string, string> parent_coord = get_immediate_parent(id);
820 int parent_id;
821
822 // get the integer type for id and create a counter from there
823 int type_counter = get_bucket_type(id);
824
825 // if we get a negative type then we can assume that we have an OSD
826 // change behavior in get_item_type FIXME
827 if (type_counter < 0)
828 type_counter = 0;
829
830 // read the type map and get the name of the type with the largest ID
831 int high_type = 0;
832 if (!type_map.empty())
833 high_type = type_map.rbegin()->first;
834
835 parent_id = get_item_id(parent_coord.second);
836
837 while (type_counter < high_type) {
838 type_counter++;
839 parent_hierarchy[ type_counter ] = parent_coord.first;
840
841 if (type_counter < high_type){
842 // get the coordinate information for the next parent
843 parent_coord = get_immediate_parent(parent_id);
844 parent_id = get_item_id(parent_coord.second);
845 }
846 }
847
848 return parent_hierarchy;
849 }
850
851 int CrushWrapper::get_children(int id, list<int> *children) const
852 {
853 // leaf?
854 if (id >= 0) {
855 return 0;
856 }
857
858 auto *b = get_bucket(id);
859 if (IS_ERR(b)) {
860 return -ENOENT;
861 }
862
863 for (unsigned n=0; n<b->size; n++) {
864 children->push_back(b->items[n]);
865 }
866 return b->size;
867 }
868
869 int CrushWrapper::get_all_children(int id, set<int> *children) const
870 {
871 // leaf?
872 if (id >= 0) {
873 return 0;
874 }
875
876 auto *b = get_bucket(id);
877 if (IS_ERR(b)) {
878 return -ENOENT;
879 }
880
881 int c = 0;
882 for (unsigned n = 0; n < b->size; n++) {
883 children->insert(b->items[n]);
884 c++;
885 auto r = get_all_children(b->items[n], children);
886 if (r < 0)
887 return r;
888 c += r;
889 }
890 return c;
891 }
892
893 void CrushWrapper::get_children_of_type(int id,
894 int type,
895 vector<int> *children,
896 bool exclude_shadow) const
897 {
898 if (id >= 0) {
899 if (type == 0) {
900 // want leaf?
901 children->push_back(id);
902 }
903 return;
904 }
905 auto b = get_bucket(id);
906 if (IS_ERR(b)) {
907 return;
908 }
909 if (b->type < type) {
910 // give up
911 return;
912 } else if (b->type == type) {
913 if (!is_shadow_item(b->id) || !exclude_shadow) {
914 children->push_back(b->id);
915 }
916 return;
917 }
918 for (unsigned n = 0; n < b->size; n++) {
919 get_children_of_type(b->items[n], type, children, exclude_shadow);
920 }
921 }
922
923 int CrushWrapper::verify_upmap(CephContext *cct,
924 int rule_id,
925 int pool_size,
926 const vector<int>& up)
927 {
928 auto rule = get_rule(rule_id);
929 if (IS_ERR(rule) || !rule) {
930 lderr(cct) << __func__ << " rule " << rule_id << " does not exist"
931 << dendl;
932 return -ENOENT;
933 }
934 int root_bucket = 0;
935 int cursor = 0;
936 std::map<int, int> type_stack;
937 for (unsigned step = 0; step < rule->len; ++step) {
938 auto curstep = &rule->steps[step];
939 ldout(cct, 10) << __func__ << " step " << step << dendl;
940 switch (curstep->op) {
941 case CRUSH_RULE_TAKE:
942 {
943 root_bucket = curstep->arg1;
944 }
945 break;
946 case CRUSH_RULE_CHOOSELEAF_FIRSTN:
947 case CRUSH_RULE_CHOOSELEAF_INDEP:
948 {
949 int numrep = curstep->arg1;
950 int type = curstep->arg2;
951 if (numrep <= 0)
952 numrep += pool_size;
953 type_stack.emplace(type, numrep);
954 if (type == 0) // osd
955 break;
956 map<int, set<int>> osds_by_parent; // parent_of_desired_type -> osds
957 for (auto osd : up) {
958 auto parent = get_parent_of_type(osd, type, rule_id);
959 if (parent < 0) {
960 osds_by_parent[parent].insert(osd);
961 } else {
962 ldout(cct, 1) << __func__ << " unable to get parent of osd." << osd
963 << ", skipping for now"
964 << dendl;
965 }
966 }
967 for (auto i : osds_by_parent) {
968 if (i.second.size() > 1) {
969 lderr(cct) << __func__ << " multiple osds " << i.second
970 << " come from same failure domain " << i.first
971 << dendl;
972 return -EINVAL;
973 }
974 }
975 }
976 break;
977
978 case CRUSH_RULE_CHOOSE_FIRSTN:
979 case CRUSH_RULE_CHOOSE_INDEP:
980 {
981 int numrep = curstep->arg1;
982 int type = curstep->arg2;
983 if (numrep <= 0)
984 numrep += pool_size;
985 type_stack.emplace(type, numrep);
986 if (type == 0) // osd
987 break;
988 set<int> parents_of_type;
989 for (auto osd : up) {
990 auto parent = get_parent_of_type(osd, type, rule_id);
991 if (parent < 0) {
992 parents_of_type.insert(parent);
993 } else {
994 ldout(cct, 1) << __func__ << " unable to get parent of osd." << osd
995 << ", skipping for now"
996 << dendl;
997 }
998 }
999 if ((int)parents_of_type.size() > numrep) {
1000 lderr(cct) << __func__ << " number of buckets "
1001 << parents_of_type.size() << " exceeds desired " << numrep
1002 << dendl;
1003 return -EINVAL;
1004 }
1005 }
1006 break;
1007
1008 case CRUSH_RULE_EMIT:
1009 {
1010 if (root_bucket < 0) {
1011 int num_osds = 1;
1012 for (auto &item : type_stack) {
1013 num_osds *= item.second;
1014 }
1015 // validate the osd's in subtree
1016 for (int c = 0; cursor < (int)up.size() && c < num_osds; ++cursor, ++c) {
1017 int osd = up[cursor];
1018 if (!subtree_contains(root_bucket, osd)) {
1019 lderr(cct) << __func__ << " osd " << osd << " not in bucket " << root_bucket << dendl;
1020 return -EINVAL;
1021 }
1022 }
1023 }
1024 type_stack.clear();
1025 root_bucket = 0;
1026 }
1027 break;
1028 default:
1029 // ignore
1030 break;
1031 }
1032 }
1033 return 0;
1034 }
1035
1036 int CrushWrapper::_get_leaves(int id, list<int> *leaves) const
1037 {
1038 ceph_assert(leaves);
1039
1040 // Already leaf?
1041 if (id >= 0) {
1042 leaves->push_back(id);
1043 return 0;
1044 }
1045
1046 auto b = get_bucket(id);
1047 if (IS_ERR(b)) {
1048 return -ENOENT;
1049 }
1050
1051 for (unsigned n = 0; n < b->size; n++) {
1052 if (b->items[n] >= 0) {
1053 leaves->push_back(b->items[n]);
1054 } else {
1055 // is a bucket, do recursive call
1056 int r = _get_leaves(b->items[n], leaves);
1057 if (r < 0) {
1058 return r;
1059 }
1060 }
1061 }
1062
1063 return 0; // all is well
1064 }
1065
1066 int CrushWrapper::get_leaves(const string &name, set<int> *leaves) const
1067 {
1068 ceph_assert(leaves);
1069 leaves->clear();
1070
1071 if (!name_exists(name)) {
1072 return -ENOENT;
1073 }
1074
1075 int id = get_item_id(name);
1076 if (id >= 0) {
1077 // already leaf
1078 leaves->insert(id);
1079 return 0;
1080 }
1081
1082 list<int> unordered;
1083 int r = _get_leaves(id, &unordered);
1084 if (r < 0) {
1085 return r;
1086 }
1087
1088 for (auto &p : unordered) {
1089 leaves->insert(p);
1090 }
1091
1092 return 0;
1093 }
1094
1095 int CrushWrapper::insert_item(
1096 CephContext *cct, int item, float weight, string name,
1097 const map<string,string>& loc, // typename -> bucketname
1098 bool init_weight_sets)
1099 {
1100 ldout(cct, 5) << "insert_item item " << item << " weight " << weight
1101 << " name " << name << " loc " << loc << dendl;
1102
1103 if (!is_valid_crush_name(name))
1104 return -EINVAL;
1105
1106 if (!is_valid_crush_loc(cct, loc))
1107 return -EINVAL;
1108
1109 int r = validate_weightf(weight);
1110 if (r < 0) {
1111 return r;
1112 }
1113
1114 if (name_exists(name)) {
1115 if (get_item_id(name) != item) {
1116 ldout(cct, 10) << "device name '" << name << "' already exists as id "
1117 << get_item_id(name) << dendl;
1118 return -EEXIST;
1119 }
1120 } else {
1121 set_item_name(item, name);
1122 }
1123
1124 int cur = item;
1125
1126 // create locations if locations don't exist and add child in
1127 // location with 0 weight the more detail in the insert_item method
1128 // declaration in CrushWrapper.h
1129 for (auto p = type_map.begin(); p != type_map.end(); ++p) {
1130 // ignore device type
1131 if (p->first == 0)
1132 continue;
1133
1134 // skip types that are unspecified
1135 map<string,string>::const_iterator q = loc.find(p->second);
1136 if (q == loc.end()) {
1137 ldout(cct, 2) << "warning: did not specify location for '"
1138 << p->second << "' level (levels are "
1139 << type_map << ")" << dendl;
1140 continue;
1141 }
1142
1143 if (!name_exists(q->second)) {
1144 ldout(cct, 5) << "insert_item creating bucket " << q->second << dendl;
1145 int empty = 0, newid;
1146 int r = add_bucket(0, 0,
1147 CRUSH_HASH_DEFAULT, p->first, 1, &cur, &empty, &newid);
1148 if (r < 0) {
1149 ldout(cct, 1) << "add_bucket failure error: " << cpp_strerror(r)
1150 << dendl;
1151 return r;
1152 }
1153 set_item_name(newid, q->second);
1154
1155 cur = newid;
1156 continue;
1157 }
1158
1159 // add to an existing bucket
1160 int id = get_item_id(q->second);
1161 if (!bucket_exists(id)) {
1162 ldout(cct, 1) << "insert_item doesn't have bucket " << id << dendl;
1163 return -EINVAL;
1164 }
1165
1166 // check that we aren't creating a cycle.
1167 if (subtree_contains(id, cur)) {
1168 ldout(cct, 1) << "insert_item item " << cur << " already exists beneath "
1169 << id << dendl;
1170 return -EINVAL;
1171 }
1172
1173 // we have done sanity check above
1174 crush_bucket *b = get_bucket(id);
1175
1176 if (p->first != b->type) {
1177 ldout(cct, 1) << "insert_item existing bucket has type "
1178 << "'" << type_map[b->type] << "' != "
1179 << "'" << type_map[p->first] << "'" << dendl;
1180 return -EINVAL;
1181 }
1182
1183 // are we forming a loop?
1184 if (subtree_contains(cur, b->id)) {
1185 ldout(cct, 1) << "insert_item " << cur << " already contains " << b->id
1186 << "; cannot form loop" << dendl;
1187 return -ELOOP;
1188 }
1189
1190 ldout(cct, 5) << "insert_item adding " << cur << " weight " << weight
1191 << " to bucket " << id << dendl;
1192 [[maybe_unused]] int r = bucket_add_item(b, cur, 0);
1193 ceph_assert(!r);
1194 break;
1195 }
1196
1197 // adjust the item's weight in location
1198 if (adjust_item_weightf_in_loc(cct, item, weight, loc,
1199 item >= 0 && init_weight_sets) > 0) {
1200 if (item >= crush->max_devices) {
1201 crush->max_devices = item + 1;
1202 ldout(cct, 5) << "insert_item max_devices now " << crush->max_devices
1203 << dendl;
1204 }
1205 r = rebuild_roots_with_classes(cct);
1206 if (r < 0) {
1207 ldout(cct, 0) << __func__ << " unable to rebuild roots with classes: "
1208 << cpp_strerror(r) << dendl;
1209 return r;
1210 }
1211 return 0;
1212 }
1213
1214 ldout(cct, 1) << "error: didn't find anywhere to add item " << item
1215 << " in " << loc << dendl;
1216 return -EINVAL;
1217 }
1218
1219
1220 int CrushWrapper::move_bucket(
1221 CephContext *cct, int id, const map<string,string>& loc)
1222 {
1223 // sorry this only works for buckets
1224 if (id >= 0)
1225 return -EINVAL;
1226
1227 if (!item_exists(id))
1228 return -ENOENT;
1229
1230 // get the name of the bucket we are trying to move for later
1231 string id_name = get_item_name(id);
1232
1233 // detach the bucket
1234 int bucket_weight = detach_bucket(cct, id);
1235
1236 // insert the bucket back into the hierarchy
1237 return insert_item(cct, id, bucket_weight / (float)0x10000, id_name, loc,
1238 false);
1239 }
1240
1241 int CrushWrapper::detach_bucket(CephContext *cct, int item)
1242 {
1243 if (!crush)
1244 return (-EINVAL);
1245
1246 if (item >= 0)
1247 return (-EINVAL);
1248
1249 // check that the bucket that we want to detach exists
1250 ceph_assert(bucket_exists(item));
1251
1252 // get the bucket's weight
1253 crush_bucket *b = get_bucket(item);
1254 unsigned bucket_weight = b->weight;
1255
1256 // get where the bucket is located
1257 pair<string, string> bucket_location = get_immediate_parent(item);
1258
1259 // get the id of the parent bucket
1260 int parent_id = get_item_id(bucket_location.second);
1261
1262 // get the parent bucket
1263 crush_bucket *parent_bucket = get_bucket(parent_id);
1264
1265 if (!IS_ERR(parent_bucket)) {
1266 // zero out the bucket weight
1267 adjust_item_weight_in_bucket(cct, item, 0, parent_bucket->id, true);
1268
1269 // remove the bucket from the parent
1270 bucket_remove_item(parent_bucket, item);
1271 } else if (PTR_ERR(parent_bucket) != -ENOENT) {
1272 return PTR_ERR(parent_bucket);
1273 }
1274
1275 // check that we're happy
1276 int test_weight = 0;
1277 map<string,string> test_location;
1278 test_location[ bucket_location.first ] = (bucket_location.second);
1279
1280 bool successful_detach = !(check_item_loc(cct, item, test_location,
1281 &test_weight));
1282 ceph_assert(successful_detach);
1283 ceph_assert(test_weight == 0);
1284
1285 return bucket_weight;
1286 }
1287
1288 bool CrushWrapper::is_parent_of(int child, int p) const
1289 {
1290 int parent = 0;
1291 while (!get_immediate_parent_id(child, &parent)) {
1292 if (parent == p) {
1293 return true;
1294 }
1295 child = parent;
1296 }
1297 return false;
1298 }
1299
1300 int CrushWrapper::swap_bucket(CephContext *cct, int src, int dst)
1301 {
1302 if (src >= 0 || dst >= 0)
1303 return -EINVAL;
1304 if (!item_exists(src) || !item_exists(dst))
1305 return -EINVAL;
1306 crush_bucket *a = get_bucket(src);
1307 crush_bucket *b = get_bucket(dst);
1308 if (is_parent_of(a->id, b->id) || is_parent_of(b->id, a->id)) {
1309 return -EINVAL;
1310 }
1311 unsigned aw = a->weight;
1312 unsigned bw = b->weight;
1313
1314 // swap weights
1315 adjust_item_weight(cct, a->id, bw);
1316 adjust_item_weight(cct, b->id, aw);
1317
1318 // swap items
1319 map<int,unsigned> tmp;
1320 unsigned as = a->size;
1321 unsigned bs = b->size;
1322 for (unsigned i = 0; i < as; ++i) {
1323 int item = a->items[0];
1324 int itemw = crush_get_bucket_item_weight(a, 0);
1325 tmp[item] = itemw;
1326 bucket_remove_item(a, item);
1327 }
1328 ceph_assert(a->size == 0);
1329 ceph_assert(b->size == bs);
1330 for (unsigned i = 0; i < bs; ++i) {
1331 int item = b->items[0];
1332 int itemw = crush_get_bucket_item_weight(b, 0);
1333 bucket_remove_item(b, item);
1334 bucket_add_item(a, item, itemw);
1335 }
1336 ceph_assert(a->size == bs);
1337 ceph_assert(b->size == 0);
1338 for (auto t : tmp) {
1339 bucket_add_item(b, t.first, t.second);
1340 }
1341 ceph_assert(a->size == bs);
1342 ceph_assert(b->size == as);
1343
1344 // swap names
1345 swap_names(src, dst);
1346 return rebuild_roots_with_classes(cct);
1347 }
1348
1349 int CrushWrapper::link_bucket(
1350 CephContext *cct, int id, const map<string,string>& loc)
1351 {
1352 // sorry this only works for buckets
1353 if (id >= 0)
1354 return -EINVAL;
1355
1356 if (!item_exists(id))
1357 return -ENOENT;
1358
1359 // get the name of the bucket we are trying to move for later
1360 string id_name = get_item_name(id);
1361
1362 crush_bucket *b = get_bucket(id);
1363 unsigned bucket_weight = b->weight;
1364
1365 return insert_item(cct, id, bucket_weight / (float)0x10000, id_name, loc);
1366 }
1367
1368 int CrushWrapper::create_or_move_item(
1369 CephContext *cct, int item, float weight, string name,
1370 const map<string,string>& loc, // typename -> bucketname
1371 bool init_weight_sets)
1372 {
1373 int ret = 0;
1374 int old_iweight;
1375
1376 if (!is_valid_crush_name(name))
1377 return -EINVAL;
1378
1379 if (check_item_loc(cct, item, loc, &old_iweight)) {
1380 ldout(cct, 5) << "create_or_move_item " << item << " already at " << loc
1381 << dendl;
1382 } else {
1383 if (_search_item_exists(item)) {
1384 weight = get_item_weightf(item);
1385 ldout(cct, 10) << "create_or_move_item " << item
1386 << " exists with weight " << weight << dendl;
1387 remove_item(cct, item, true);
1388 }
1389 ldout(cct, 5) << "create_or_move_item adding " << item
1390 << " weight " << weight
1391 << " at " << loc << dendl;
1392 ret = insert_item(cct, item, weight, name, loc,
1393 item >= 0 && init_weight_sets);
1394 if (ret == 0)
1395 ret = 1; // changed
1396 }
1397 return ret;
1398 }
1399
1400 int CrushWrapper::update_item(
1401 CephContext *cct, int item, float weight, string name,
1402 const map<string,string>& loc) // typename -> bucketname
1403 {
1404 ldout(cct, 5) << "update_item item " << item << " weight " << weight
1405 << " name " << name << " loc " << loc << dendl;
1406 int ret = 0;
1407
1408 if (!is_valid_crush_name(name))
1409 return -EINVAL;
1410
1411 if (!is_valid_crush_loc(cct, loc))
1412 return -EINVAL;
1413
1414 ret = validate_weightf(weight);
1415 if (ret < 0) {
1416 return ret;
1417 }
1418
1419 // compare quantized (fixed-point integer) weights!
1420 int iweight = (int)(weight * (float)0x10000);
1421 int old_iweight;
1422 if (check_item_loc(cct, item, loc, &old_iweight)) {
1423 ldout(cct, 5) << "update_item " << item << " already at " << loc << dendl;
1424 if (old_iweight != iweight) {
1425 ldout(cct, 5) << "update_item " << item << " adjusting weight "
1426 << ((float)old_iweight/(float)0x10000) << " -> " << weight
1427 << dendl;
1428 adjust_item_weight_in_loc(cct, item, iweight, loc);
1429 ret = rebuild_roots_with_classes(cct);
1430 if (ret < 0) {
1431 ldout(cct, 0) << __func__ << " unable to rebuild roots with classes: "
1432 << cpp_strerror(ret) << dendl;
1433 return ret;
1434 }
1435 ret = 1;
1436 }
1437 if (get_item_name(item) != name) {
1438 ldout(cct, 5) << "update_item setting " << item << " name to " << name
1439 << dendl;
1440 set_item_name(item, name);
1441 ret = 1;
1442 }
1443 } else {
1444 if (item_exists(item)) {
1445 remove_item(cct, item, true);
1446 }
1447 ldout(cct, 5) << "update_item adding " << item << " weight " << weight
1448 << " at " << loc << dendl;
1449 ret = insert_item(cct, item, weight, name, loc);
1450 if (ret == 0)
1451 ret = 1; // changed
1452 }
1453 return ret;
1454 }
1455
1456 int CrushWrapper::get_item_weight(int id) const
1457 {
1458 for (int bidx = 0; bidx < crush->max_buckets; bidx++) {
1459 crush_bucket *b = crush->buckets[bidx];
1460 if (b == NULL)
1461 continue;
1462 if (b->id == id)
1463 return b->weight;
1464 for (unsigned i = 0; i < b->size; i++)
1465 if (b->items[i] == id)
1466 return crush_get_bucket_item_weight(b, i);
1467 }
1468 return -ENOENT;
1469 }
1470
1471 int CrushWrapper::get_item_weight_in_loc(int id, const map<string,string> &loc)
1472 {
1473 for (map<string,string>::const_iterator l = loc.begin(); l != loc.end(); ++l) {
1474
1475 int bid = get_item_id(l->second);
1476 if (!bucket_exists(bid))
1477 continue;
1478 crush_bucket *b = get_bucket(bid);
1479 for (unsigned int i = 0; i < b->size; i++) {
1480 if (b->items[i] == id) {
1481 return crush_get_bucket_item_weight(b, i);
1482 }
1483 }
1484 }
1485 return -ENOENT;
1486 }
1487
1488 int CrushWrapper::adjust_item_weight(CephContext *cct, int id, int weight,
1489 bool update_weight_sets)
1490 {
1491 ldout(cct, 5) << __func__ << " " << id << " weight " << weight
1492 << " update_weight_sets=" << (int)update_weight_sets
1493 << dendl;
1494 int changed = 0;
1495 for (int bidx = 0; bidx < crush->max_buckets; bidx++) {
1496 if (!crush->buckets[bidx]) {
1497 continue;
1498 }
1499 int r = adjust_item_weight_in_bucket(cct, id, weight, -1-bidx,
1500 update_weight_sets);
1501 if (r > 0) {
1502 ++changed;
1503 }
1504 }
1505 if (!changed) {
1506 return -ENOENT;
1507 }
1508 return changed;
1509 }
1510
1511 int CrushWrapper::adjust_item_weight_in_bucket(
1512 CephContext *cct, int id, int weight,
1513 int bucket_id,
1514 bool update_weight_sets)
1515 {
1516 ldout(cct, 5) << __func__ << " " << id << " weight " << weight
1517 << " in bucket " << bucket_id
1518 << " update_weight_sets=" << (int)update_weight_sets
1519 << dendl;
1520 int changed = 0;
1521 if (!bucket_exists(bucket_id)) {
1522 return -ENOENT;
1523 }
1524 crush_bucket *b = get_bucket(bucket_id);
1525 for (unsigned int i = 0; i < b->size; i++) {
1526 if (b->items[i] == id) {
1527 int diff = bucket_adjust_item_weight(cct, b, id, weight,
1528 update_weight_sets);
1529 ldout(cct, 5) << __func__ << " " << id << " diff " << diff
1530 << " in bucket " << bucket_id << dendl;
1531 adjust_item_weight(cct, bucket_id, b->weight, false);
1532 changed++;
1533 }
1534 }
1535 // update weight-sets so they continue to sum
1536 for (auto& p : choose_args) {
1537 auto &cmap = p.second;
1538 if (!cmap.args) {
1539 continue;
1540 }
1541 crush_choose_arg *arg = &cmap.args[-1 - bucket_id];
1542 if (!arg->weight_set) {
1543 continue;
1544 }
1545 ceph_assert(arg->weight_set_positions > 0);
1546 vector<int> w(arg->weight_set_positions);
1547 for (unsigned i = 0; i < b->size; ++i) {
1548 for (unsigned j = 0; j < arg->weight_set_positions; ++j) {
1549 crush_weight_set *weight_set = &arg->weight_set[j];
1550 w[j] += weight_set->weights[i];
1551 }
1552 }
1553 ldout(cct,5) << __func__ << " adjusting bucket " << bucket_id
1554 << " cmap " << p.first << " weights to " << w << dendl;
1555 ostringstream ss;
1556 choose_args_adjust_item_weight(cct, cmap, bucket_id, w, &ss);
1557 }
1558 if (!changed) {
1559 return -ENOENT;
1560 }
1561 return changed;
1562 }
1563
1564 int CrushWrapper::adjust_item_weight_in_loc(
1565 CephContext *cct, int id, int weight,
1566 const map<string,string>& loc,
1567 bool update_weight_sets)
1568 {
1569 ldout(cct, 5) << "adjust_item_weight_in_loc " << id << " weight " << weight
1570 << " in " << loc
1571 << " update_weight_sets=" << (int)update_weight_sets
1572 << dendl;
1573 int changed = 0;
1574 for (auto l = loc.begin(); l != loc.end(); ++l) {
1575 int bid = get_item_id(l->second);
1576 if (!bucket_exists(bid))
1577 continue;
1578 int r = adjust_item_weight_in_bucket(cct, id, weight, bid,
1579 update_weight_sets);
1580 if (r > 0) {
1581 ++changed;
1582 }
1583 }
1584 if (!changed) {
1585 return -ENOENT;
1586 }
1587 return changed;
1588 }
1589
1590 int CrushWrapper::adjust_subtree_weight(CephContext *cct, int id, int weight,
1591 bool update_weight_sets)
1592 {
1593 ldout(cct, 5) << __func__ << " " << id << " weight " << weight << dendl;
1594 crush_bucket *b = get_bucket(id);
1595 if (IS_ERR(b))
1596 return PTR_ERR(b);
1597 int changed = 0;
1598 list<crush_bucket*> q;
1599 q.push_back(b);
1600 while (!q.empty()) {
1601 b = q.front();
1602 q.pop_front();
1603 int local_changed = 0;
1604 for (unsigned i=0; i<b->size; ++i) {
1605 int n = b->items[i];
1606 if (n >= 0) {
1607 adjust_item_weight_in_bucket(cct, n, weight, b->id, update_weight_sets);
1608 ++changed;
1609 ++local_changed;
1610 } else {
1611 crush_bucket *sub = get_bucket(n);
1612 if (IS_ERR(sub))
1613 continue;
1614 q.push_back(sub);
1615 }
1616 }
1617 }
1618 int ret = rebuild_roots_with_classes(cct);
1619 if (ret < 0) {
1620 ldout(cct, 0) << __func__ << " unable to rebuild roots with classes: "
1621 << cpp_strerror(ret) << dendl;
1622 return ret;
1623 }
1624 return changed;
1625 }
1626
1627 bool CrushWrapper::check_item_present(int id) const
1628 {
1629 bool found = false;
1630
1631 for (int bidx = 0; bidx < crush->max_buckets; bidx++) {
1632 crush_bucket *b = crush->buckets[bidx];
1633 if (b == 0)
1634 continue;
1635 for (unsigned i = 0; i < b->size; i++)
1636 if (b->items[i] == id)
1637 found = true;
1638 }
1639 return found;
1640 }
1641
1642
1643 pair<string,string> CrushWrapper::get_immediate_parent(int id, int *_ret) const
1644 {
1645
1646 for (int bidx = 0; bidx < crush->max_buckets; bidx++) {
1647 crush_bucket *b = crush->buckets[bidx];
1648 if (b == 0)
1649 continue;
1650 if (is_shadow_item(b->id))
1651 continue;
1652 for (unsigned i = 0; i < b->size; i++)
1653 if (b->items[i] == id) {
1654 string parent_id = name_map.at(b->id);
1655 string parent_bucket_type = type_map.at(b->type);
1656 if (_ret)
1657 *_ret = 0;
1658 return make_pair(parent_bucket_type, parent_id);
1659 }
1660 }
1661
1662 if (_ret)
1663 *_ret = -ENOENT;
1664
1665 return pair<string, string>();
1666 }
1667
1668 int CrushWrapper::get_immediate_parent_id(int id, int *parent) const
1669 {
1670 for (int bidx = 0; bidx < crush->max_buckets; bidx++) {
1671 crush_bucket *b = crush->buckets[bidx];
1672 if (b == 0)
1673 continue;
1674 if (is_shadow_item(b->id))
1675 continue;
1676 for (unsigned i = 0; i < b->size; i++) {
1677 if (b->items[i] == id) {
1678 *parent = b->id;
1679 return 0;
1680 }
1681 }
1682 }
1683 return -ENOENT;
1684 }
1685
1686 int CrushWrapper::get_parent_of_type(int item, int type, int rule) const
1687 {
1688 if (rule < 0) {
1689 // no rule specified
1690 do {
1691 int r = get_immediate_parent_id(item, &item);
1692 if (r < 0) {
1693 return 0;
1694 }
1695 } while (get_bucket_type(item) != type);
1696 return item;
1697 }
1698 set<int> roots;
1699 find_takes_by_rule(rule, &roots);
1700 for (auto root : roots) {
1701 vector<int> candidates;
1702 get_children_of_type(root, type, &candidates, false);
1703 for (auto candidate : candidates) {
1704 if (subtree_contains(candidate, item)) {
1705 // note that here we assure that no two different buckets
1706 // from a single crush rule will share a same device,
1707 // which should generally be true.
1708 return candidate;
1709 }
1710 }
1711 }
1712 return 0; // not found
1713 }
1714
1715 void CrushWrapper::get_subtree_of_type(int type, vector<int> *subtrees)
1716 {
1717 set<int> roots;
1718 find_roots(&roots);
1719 for (auto r: roots) {
1720 crush_bucket *b = get_bucket(r);
1721 if (IS_ERR(b))
1722 continue;
1723 get_children_of_type(b->id, type, subtrees);
1724 }
1725 }
1726
1727 bool CrushWrapper::class_is_in_use(int class_id, ostream *ss)
1728 {
1729 list<unsigned> rules;
1730 for (unsigned i = 0; i < crush->max_rules; ++i) {
1731 crush_rule *r = crush->rules[i];
1732 if (!r)
1733 continue;
1734 for (unsigned j = 0; j < r->len; ++j) {
1735 if (r->steps[j].op == CRUSH_RULE_TAKE) {
1736 int root = r->steps[j].arg1;
1737 for (auto &p : class_bucket) {
1738 auto& q = p.second;
1739 if (q.count(class_id) && q[class_id] == root) {
1740 rules.push_back(i);
1741 }
1742 }
1743 }
1744 }
1745 }
1746 if (rules.empty()) {
1747 return false;
1748 }
1749 if (ss) {
1750 ostringstream os;
1751 for (auto &p: rules) {
1752 os << "'" << get_rule_name(p) <<"',";
1753 }
1754 string out(os.str());
1755 out.resize(out.size() - 1); // drop last ','
1756 *ss << "still referenced by crush_rule(s): " << out;
1757 }
1758 return true;
1759 }
1760
1761 int CrushWrapper::rename_class(const string& srcname, const string& dstname)
1762 {
1763 auto i = class_rname.find(srcname);
1764 if (i == class_rname.end())
1765 return -ENOENT;
1766 auto j = class_rname.find(dstname);
1767 if (j != class_rname.end())
1768 return -EEXIST;
1769
1770 int class_id = i->second;
1771 ceph_assert(class_name.count(class_id));
1772 // rename any shadow buckets of old class name
1773 for (auto &it: class_map) {
1774 if (it.first < 0 && it.second == class_id) {
1775 string old_name = get_item_name(it.first);
1776 size_t pos = old_name.find("~");
1777 ceph_assert(pos != string::npos);
1778 string name_no_class = old_name.substr(0, pos);
1779 string old_class_name = old_name.substr(pos + 1);
1780 ceph_assert(old_class_name == srcname);
1781 string new_name = name_no_class + "~" + dstname;
1782 // we do not use set_item_name
1783 // because the name is intentionally invalid
1784 name_map[it.first] = new_name;
1785 have_rmaps = false;
1786 }
1787 }
1788
1789 // rename class
1790 class_rname.erase(srcname);
1791 class_name.erase(class_id);
1792 class_rname[dstname] = class_id;
1793 class_name[class_id] = dstname;
1794 return 0;
1795 }
1796
1797 int CrushWrapper::populate_classes(
1798 const std::map<int32_t, map<int32_t, int32_t>>& old_class_bucket)
1799 {
1800 // build set of previous used shadow ids
1801 set<int32_t> used_ids;
1802 for (auto& p : old_class_bucket) {
1803 for (auto& q : p.second) {
1804 used_ids.insert(q.second);
1805 }
1806 }
1807 // accumulate weight values for each carg and bucket as we go. because it is
1808 // depth first, we will have the nested bucket weights we need when we
1809 // finish constructing the containing buckets.
1810 map<int,map<int,vector<int>>> cmap_item_weight; // cargs -> bno -> [bucket weight for each position]
1811 set<int> roots;
1812 find_nonshadow_roots(&roots);
1813 for (auto &r : roots) {
1814 if (r >= 0)
1815 continue;
1816 for (auto &c : class_name) {
1817 int clone;
1818 int res = device_class_clone(r, c.first, old_class_bucket, used_ids,
1819 &clone, &cmap_item_weight);
1820 if (res < 0)
1821 return res;
1822 }
1823 }
1824 return 0;
1825 }
1826
1827 int CrushWrapper::trim_roots_with_class(CephContext *cct)
1828 {
1829 set<int> roots;
1830 find_shadow_roots(&roots);
1831 for (auto &r : roots) {
1832 if (r >= 0)
1833 continue;
1834 int res = remove_root(cct, r);
1835 if (res)
1836 return res;
1837 }
1838 // there is no need to reweight because we only remove from the
1839 // root and down
1840 return 0;
1841 }
1842
1843 int32_t CrushWrapper::_alloc_class_id() const {
1844 if (class_name.empty()) {
1845 return 0;
1846 }
1847 int32_t class_id = class_name.rbegin()->first + 1;
1848 if (class_id >= 0) {
1849 return class_id;
1850 }
1851 // wrapped, pick a random start and do exhaustive search
1852 uint32_t upperlimit = std::numeric_limits<int32_t>::max();
1853 upperlimit++;
1854 class_id = rand() % upperlimit;
1855 const auto start = class_id;
1856 do {
1857 if (!class_name.count(class_id)) {
1858 return class_id;
1859 } else {
1860 class_id++;
1861 if (class_id < 0) {
1862 class_id = 0;
1863 }
1864 }
1865 } while (class_id != start);
1866 ceph_abort_msg("no available class id");
1867 }
1868
1869 int CrushWrapper::set_subtree_class(
1870 const string& subtree,
1871 const string& new_class)
1872 {
1873 if (!name_exists(subtree)) {
1874 return -ENOENT;
1875 }
1876
1877 int new_class_id = get_or_create_class_id(new_class);
1878 int id = get_item_id(subtree);
1879 list<int> q = { id };
1880 while (!q.empty()) {
1881 int id = q.front();
1882 q.pop_front();
1883 crush_bucket *b = get_bucket(id);
1884 if (IS_ERR(b)) {
1885 return PTR_ERR(b);
1886 }
1887 for (unsigned i = 0; i < b->size; ++i) {
1888 int item = b->items[i];
1889 if (item >= 0) {
1890 class_map[item] = new_class_id;
1891 } else {
1892 q.push_back(item);
1893 }
1894 }
1895 }
1896 return 0;
1897 }
1898
1899 int CrushWrapper::reclassify(
1900 CephContext *cct,
1901 ostream& out,
1902 const map<string,string>& classify_root,
1903 const map<string,pair<string,string>>& classify_bucket
1904 )
1905 {
1906 map<int,string> reclassified_bucket; // orig_id -> class
1907
1908 // classify_root
1909 for (auto& i : classify_root) {
1910 string root = i.first;
1911 if (!name_exists(root)) {
1912 out << "root " << root << " does not exist" << std::endl;
1913 return -EINVAL;
1914 }
1915 int root_id = get_item_id(root);
1916 string new_class = i.second;
1917 int new_class_id = get_or_create_class_id(new_class);
1918 out << "classify_root " << root << " (" << root_id
1919 << ") as " << new_class << std::endl;
1920
1921 // validate rules
1922 for (unsigned j = 0; j < crush->max_rules; j++) {
1923 if (crush->rules[j]) {
1924 auto rule = crush->rules[j];
1925 for (unsigned k = 0; k < rule->len; ++k) {
1926 if (rule->steps[k].op == CRUSH_RULE_TAKE) {
1927 int step_item = get_rule_arg1(j, k);
1928 int original_item;
1929 int c;
1930 int res = split_id_class(step_item, &original_item, &c);
1931 if (res < 0)
1932 return res;
1933 if (c >= 0) {
1934 if (original_item == root_id) {
1935 out << " rule " << j << " includes take on root "
1936 << root << " class " << c << std::endl;
1937 return -EINVAL;
1938 }
1939 }
1940 }
1941 }
1942 }
1943 }
1944
1945 // rebuild new buckets for root
1946 //cout << "before class_bucket: " << class_bucket << std::endl;
1947 map<int,int> renumber;
1948 list<int> q;
1949 q.push_back(root_id);
1950 while (!q.empty()) {
1951 int id = q.front();
1952 q.pop_front();
1953 crush_bucket *bucket = get_bucket(id);
1954 if (IS_ERR(bucket)) {
1955 out << "cannot find bucket " << id
1956 << ": " << cpp_strerror(PTR_ERR(bucket)) << std::endl;
1957 return PTR_ERR(bucket);
1958 }
1959
1960 // move bucket
1961 int new_id = get_new_bucket_id();
1962 out << " renumbering bucket " << id << " -> " << new_id << std::endl;
1963 renumber[id] = new_id;
1964 crush->buckets[-1-new_id] = bucket;
1965 bucket->id = new_id;
1966 crush->buckets[-1-id] = crush_make_bucket(crush,
1967 bucket->alg,
1968 bucket->hash,
1969 bucket->type,
1970 0, NULL, NULL);
1971 crush->buckets[-1-id]->id = id;
1972 for (auto& i : choose_args) {
1973 i.second.args[-1-new_id] = i.second.args[-1-id];
1974 memset(&i.second.args[-1-id], 0, sizeof(i.second.args[0]));
1975 }
1976 class_bucket.erase(id);
1977 class_bucket[new_id][new_class_id] = id;
1978 name_map[new_id] = string(get_item_name(id));
1979 name_map[id] = string(get_item_name(id)) + "~" + new_class;
1980
1981 for (unsigned j = 0; j < bucket->size; ++j) {
1982 if (bucket->items[j] < 0) {
1983 q.push_front(bucket->items[j]);
1984 } else {
1985 // we don't reclassify the device here; if the users wants that,
1986 // they can pass --set-subtree-class separately.
1987 }
1988 }
1989 }
1990 //cout << "mid class_bucket: " << class_bucket << std::endl;
1991
1992 for (int i = 0; i < crush->max_buckets; ++i) {
1993 crush_bucket *b = crush->buckets[i];
1994 if (!b) {
1995 continue;
1996 }
1997 for (unsigned j = 0; j < b->size; ++j) {
1998 if (renumber.count(b->items[j])) {
1999 b->items[j] = renumber[b->items[j]];
2000 }
2001 }
2002 }
2003
2004 int r = rebuild_roots_with_classes(cct);
2005 if (r < 0) {
2006 out << "failed to rebuild_roots_with_classes: " << cpp_strerror(r)
2007 << std::endl;
2008 return r;
2009 }
2010 //cout << "final class_bucket: " << class_bucket << std::endl;
2011 }
2012
2013 // classify_bucket
2014 map<int,int> send_to; // source bucket -> dest bucket
2015 map<int,map<int,int>> new_class_bucket;
2016 map<int,string> new_bucket_names;
2017 map<int,map<string,string>> new_buckets;
2018 map<string,int> new_bucket_by_name;
2019 for (auto& i : classify_bucket) {
2020 const string& match = i.first; // prefix% or %suffix
2021 const string& new_class = i.second.first;
2022 const string& default_parent = i.second.second;
2023 if (!name_exists(default_parent)) {
2024 out << "default parent " << default_parent << " does not exist"
2025 << std::endl;
2026 return -EINVAL;
2027 }
2028 int default_parent_id = get_item_id(default_parent);
2029 crush_bucket *default_parent_bucket = get_bucket(default_parent_id);
2030 assert(default_parent_bucket);
2031 string default_parent_type_name = get_type_name(default_parent_bucket->type);
2032
2033 out << "classify_bucket " << match << " as " << new_class
2034 << " default bucket " << default_parent
2035 << " (" << default_parent_type_name << ")" << std::endl;
2036
2037 int new_class_id = get_or_create_class_id(new_class);
2038 for (int j = 0; j < crush->max_buckets; ++j) {
2039 crush_bucket *b = crush->buckets[j];
2040 if (!b || is_shadow_item(b->id)) {
2041 continue;
2042 }
2043 string name = get_item_name(b->id);
2044 if (name.length() < match.length()) {
2045 continue;
2046 }
2047 string basename;
2048 if (match[0] == '%') {
2049 if (match.substr(1) != name.substr(name.size() - match.size() + 1)) {
2050 continue;
2051 }
2052 basename = name.substr(0, name.size() - match.size() + 1);
2053 } else if (match[match.size() - 1] == '%') {
2054 if (match.substr(0, match.size() - 1) !=
2055 name.substr(0, match.size() - 1)) {
2056 continue;
2057 }
2058 basename = name.substr(match.size() - 1);
2059 } else if (match == name) {
2060 basename = default_parent;
2061 } else {
2062 continue;
2063 }
2064 cout << "match " << match << " to " << name << " basename " << basename
2065 << std::endl;
2066 // look up or create basename bucket
2067 int base_id;
2068 if (name_exists(basename)) {
2069 base_id = get_item_id(basename);
2070 cout << " have base " << base_id << std::endl;
2071 } else if (new_bucket_by_name.count(basename)) {
2072 base_id = new_bucket_by_name[basename];
2073 cout << " already creating base " << base_id << std::endl;
2074 } else {
2075 base_id = get_new_bucket_id();
2076 crush->buckets[-1-base_id] = crush_make_bucket(crush,
2077 b->alg,
2078 b->hash,
2079 b->type,
2080 0, NULL, NULL);
2081 crush->buckets[-1-base_id]->id = base_id;
2082 name_map[base_id] = basename;
2083 new_bucket_by_name[basename] = base_id;
2084 cout << " created base " << base_id << std::endl;
2085
2086 new_buckets[base_id][default_parent_type_name] = default_parent;
2087 }
2088 send_to[b->id] = base_id;
2089 new_class_bucket[base_id][new_class_id] = b->id;
2090 new_bucket_names[b->id] = basename + "~" + get_class_name(new_class_id);
2091
2092 // make sure devices are classified
2093 for (unsigned i = 0; i < b->size; ++i) {
2094 int item = b->items[i];
2095 if (item >= 0) {
2096 class_map[item] = new_class_id;
2097 }
2098 }
2099 }
2100 }
2101
2102 // no name_exists() works below,
2103 have_rmaps = false;
2104
2105 // copy items around
2106 //cout << "send_to " << send_to << std::endl;
2107 set<int> roots;
2108 find_roots(&roots);
2109 for (auto& i : send_to) {
2110 crush_bucket *from = get_bucket(i.first);
2111 crush_bucket *to = get_bucket(i.second);
2112 cout << "moving items from " << from->id << " (" << get_item_name(from->id)
2113 << ") to " << to->id << " (" << get_item_name(to->id) << ")"
2114 << std::endl;
2115 for (unsigned j = 0; j < from->size; ++j) {
2116 int item = from->items[j];
2117 int r;
2118 map<string,string> to_loc;
2119 to_loc[get_type_name(to->type)] = get_item_name(to->id);
2120 if (item >= 0) {
2121 if (subtree_contains(to->id, item)) {
2122 continue;
2123 }
2124 map<string,string> from_loc;
2125 from_loc[get_type_name(from->type)] = get_item_name(from->id);
2126 auto w = get_item_weightf_in_loc(item, from_loc);
2127 r = insert_item(cct, item,
2128 w,
2129 get_item_name(item),
2130 to_loc);
2131 } else {
2132 if (!send_to.count(item)) {
2133 lderr(cct) << "item " << item << " in bucket " << from->id
2134 << " is not also a reclassified bucket" << dendl;
2135 return -EINVAL;
2136 }
2137 int newitem = send_to[item];
2138 if (subtree_contains(to->id, newitem)) {
2139 continue;
2140 }
2141 r = link_bucket(cct, newitem, to_loc);
2142 }
2143 if (r != 0) {
2144 cout << __func__ << " err from insert_item: " << cpp_strerror(r)
2145 << std::endl;
2146 return r;
2147 }
2148 }
2149 }
2150
2151 // make sure new buckets have parents
2152 for (auto& i : new_buckets) {
2153 int parent;
2154 if (get_immediate_parent_id(i.first, &parent) < 0) {
2155 cout << "new bucket " << i.first << " missing parent, adding at "
2156 << i.second << std::endl;
2157 int r = link_bucket(cct, i.first, i.second);
2158 if (r != 0) {
2159 cout << __func__ << " err from insert_item: " << cpp_strerror(r)
2160 << std::endl;
2161 return r;
2162 }
2163 }
2164 }
2165
2166 // set class mappings
2167 //cout << "pre class_bucket: " << class_bucket << std::endl;
2168 for (auto& i : new_class_bucket) {
2169 for (auto& j : i.second) {
2170 class_bucket[i.first][j.first] = j.second;
2171 }
2172
2173 }
2174 //cout << "post class_bucket: " << class_bucket << std::endl;
2175 for (auto& i : new_bucket_names) {
2176 name_map[i.first] = i.second;
2177 }
2178
2179 int r = rebuild_roots_with_classes(cct);
2180 if (r < 0) {
2181 out << "failed to rebuild_roots_with_classes: " << cpp_strerror(r)
2182 << std::endl;
2183 return r;
2184 }
2185 //cout << "final class_bucket: " << class_bucket << std::endl;
2186
2187 return 0;
2188 }
2189
2190 int CrushWrapper::get_new_bucket_id()
2191 {
2192 int id = -1;
2193 while (crush->buckets[-1-id] &&
2194 -1-id < crush->max_buckets) {
2195 id--;
2196 }
2197 if (-1-id == crush->max_buckets) {
2198 ++crush->max_buckets;
2199 crush->buckets = (struct crush_bucket**)realloc(
2200 crush->buckets,
2201 sizeof(crush->buckets[0]) * crush->max_buckets);
2202 for (auto& i : choose_args) {
2203 assert(i.second.size == (__u32)crush->max_buckets - 1);
2204 ++i.second.size;
2205 i.second.args = (struct crush_choose_arg*)realloc(
2206 i.second.args,
2207 sizeof(i.second.args[0]) * i.second.size);
2208 }
2209 }
2210 return id;
2211 }
2212
2213 void CrushWrapper::reweight(CephContext *cct)
2214 {
2215 set<int> roots;
2216 find_nonshadow_roots(&roots);
2217 for (auto id : roots) {
2218 if (id >= 0)
2219 continue;
2220 crush_bucket *b = get_bucket(id);
2221 ldout(cct, 5) << "reweight root bucket " << id << dendl;
2222 int r = crush_reweight_bucket(crush, b);
2223 ceph_assert(r == 0);
2224
2225 for (auto& i : choose_args) {
2226 //cout << "carg " << i.first << std::endl;
2227 vector<uint32_t> w; // discard top-level weights
2228 reweight_bucket(b, i.second, &w);
2229 }
2230 }
2231 int r = rebuild_roots_with_classes(cct);
2232 ceph_assert(r == 0);
2233 }
2234
2235 void CrushWrapper::reweight_bucket(
2236 crush_bucket *b,
2237 crush_choose_arg_map& arg_map,
2238 vector<uint32_t> *weightv)
2239 {
2240 int idx = -1 - b->id;
2241 unsigned npos = arg_map.args[idx].weight_set_positions;
2242 //cout << __func__ << " " << b->id << " npos " << npos << std::endl;
2243 weightv->resize(npos);
2244 for (unsigned i = 0; i < b->size; ++i) {
2245 int item = b->items[i];
2246 if (item >= 0) {
2247 for (unsigned pos = 0; pos < npos; ++pos) {
2248 (*weightv)[pos] += arg_map.args[idx].weight_set->weights[i];
2249 }
2250 } else {
2251 vector<uint32_t> subw(npos);
2252 crush_bucket *sub = get_bucket(item);
2253 assert(sub);
2254 reweight_bucket(sub, arg_map, &subw);
2255 for (unsigned pos = 0; pos < npos; ++pos) {
2256 (*weightv)[pos] += subw[pos];
2257 // strash the real bucket weight as the weights for this reference
2258 arg_map.args[idx].weight_set->weights[i] = subw[pos];
2259 }
2260 }
2261 }
2262 //cout << __func__ << " finish " << b->id << " " << *weightv << std::endl;
2263 }
2264
2265 int CrushWrapper::add_simple_rule_at(
2266 string name, string root_name,
2267 string failure_domain_name,
2268 string device_class,
2269 string mode, int rule_type,
2270 int rno,
2271 ostream *err)
2272 {
2273 if (rule_exists(name)) {
2274 if (err)
2275 *err << "rule " << name << " exists";
2276 return -EEXIST;
2277 }
2278 if (rno >= 0) {
2279 if (rule_exists(rno)) {
2280 if (err)
2281 *err << "rule with ruleno " << rno << " exists";
2282 return -EEXIST;
2283 }
2284 if (ruleset_exists(rno)) {
2285 if (err)
2286 *err << "ruleset " << rno << " exists";
2287 return -EEXIST;
2288 }
2289 } else {
2290 for (rno = 0; rno < get_max_rules(); rno++) {
2291 if (!rule_exists(rno) && !ruleset_exists(rno))
2292 break;
2293 }
2294 }
2295 if (!name_exists(root_name)) {
2296 if (err)
2297 *err << "root item " << root_name << " does not exist";
2298 return -ENOENT;
2299 }
2300 int root = get_item_id(root_name);
2301 int type = 0;
2302 if (failure_domain_name.length()) {
2303 type = get_type_id(failure_domain_name);
2304 if (type < 0) {
2305 if (err)
2306 *err << "unknown type " << failure_domain_name;
2307 return -EINVAL;
2308 }
2309 }
2310 if (device_class.size()) {
2311 if (!class_exists(device_class)) {
2312 if (err)
2313 *err << "device class " << device_class << " does not exist";
2314 return -EINVAL;
2315 }
2316 int c = get_class_id(device_class);
2317 if (class_bucket.count(root) == 0 ||
2318 class_bucket[root].count(c) == 0) {
2319 if (err)
2320 *err << "root " << root_name << " has no devices with class "
2321 << device_class;
2322 return -EINVAL;
2323 }
2324 root = class_bucket[root][c];
2325 }
2326 if (mode != "firstn" && mode != "indep") {
2327 if (err)
2328 *err << "unknown mode " << mode;
2329 return -EINVAL;
2330 }
2331
2332 int steps = 3;
2333 if (mode == "indep")
2334 steps = 5;
2335 int min_rep = mode == "firstn" ? 1 : 3;
2336 int max_rep = mode == "firstn" ? 10 : 20;
2337 //set the ruleset the same as rule_id(rno)
2338 crush_rule *rule = crush_make_rule(steps, rno, rule_type, min_rep, max_rep);
2339 ceph_assert(rule);
2340 int step = 0;
2341 if (mode == "indep") {
2342 crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0);
2343 crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSE_TRIES, 100, 0);
2344 }
2345 crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, root, 0);
2346 if (type)
2347 crush_rule_set_step(rule, step++,
2348 mode == "firstn" ? CRUSH_RULE_CHOOSELEAF_FIRSTN :
2349 CRUSH_RULE_CHOOSELEAF_INDEP,
2350 CRUSH_CHOOSE_N,
2351 type);
2352 else
2353 crush_rule_set_step(rule, step++,
2354 mode == "firstn" ? CRUSH_RULE_CHOOSE_FIRSTN :
2355 CRUSH_RULE_CHOOSE_INDEP,
2356 CRUSH_CHOOSE_N,
2357 0);
2358 crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0);
2359
2360 int ret = crush_add_rule(crush, rule, rno);
2361 if(ret < 0) {
2362 *err << "failed to add rule " << rno << " because " << cpp_strerror(ret);
2363 return ret;
2364 }
2365 set_rule_name(rno, name);
2366 have_rmaps = false;
2367 return rno;
2368 }
2369
2370 int CrushWrapper::add_simple_rule(
2371 string name, string root_name,
2372 string failure_domain_name,
2373 string device_class,
2374 string mode, int rule_type,
2375 ostream *err)
2376 {
2377 return add_simple_rule_at(name, root_name, failure_domain_name, device_class,
2378 mode,
2379 rule_type, -1, err);
2380 }
2381
2382 float CrushWrapper::_get_take_weight_osd_map(int root,
2383 map<int,float> *pmap) const
2384 {
2385 float sum = 0.0;
2386 list<int> q;
2387 q.push_back(root);
2388 //breadth first iterate the OSD tree
2389 while (!q.empty()) {
2390 int bno = q.front();
2391 q.pop_front();
2392 crush_bucket *b = crush->buckets[-1-bno];
2393 ceph_assert(b);
2394 for (unsigned j=0; j<b->size; ++j) {
2395 int item_id = b->items[j];
2396 if (item_id >= 0) { //it's an OSD
2397 float w = crush_get_bucket_item_weight(b, j);
2398 (*pmap)[item_id] = w;
2399 sum += w;
2400 } else { //not an OSD, expand the child later
2401 q.push_back(item_id);
2402 }
2403 }
2404 }
2405 return sum;
2406 }
2407
2408 void CrushWrapper::_normalize_weight_map(float sum,
2409 const map<int,float>& m,
2410 map<int,float> *pmap) const
2411 {
2412 for (auto& p : m) {
2413 map<int,float>::iterator q = pmap->find(p.first);
2414 if (q == pmap->end()) {
2415 (*pmap)[p.first] = p.second / sum;
2416 } else {
2417 q->second += p.second / sum;
2418 }
2419 }
2420 }
2421
2422 int CrushWrapper::get_take_weight_osd_map(int root, map<int,float> *pmap) const
2423 {
2424 map<int,float> m;
2425 float sum = _get_take_weight_osd_map(root, &m);
2426 _normalize_weight_map(sum, m, pmap);
2427 return 0;
2428 }
2429
2430 int CrushWrapper::get_rule_weight_osd_map(unsigned ruleno,
2431 map<int,float> *pmap) const
2432 {
2433 if (ruleno >= crush->max_rules)
2434 return -ENOENT;
2435 if (crush->rules[ruleno] == NULL)
2436 return -ENOENT;
2437 crush_rule *rule = crush->rules[ruleno];
2438
2439 // build a weight map for each TAKE in the rule, and then merge them
2440
2441 // FIXME: if there are multiple takes that place a different number of
2442 // objects we do not take that into account. (Also, note that doing this
2443 // right is also a function of the pool, since the crush rule
2444 // might choose 2 + choose 2 but pool size may only be 3.)
2445 for (unsigned i=0; i<rule->len; ++i) {
2446 map<int,float> m;
2447 float sum = 0;
2448 if (rule->steps[i].op == CRUSH_RULE_TAKE) {
2449 int n = rule->steps[i].arg1;
2450 if (n >= 0) {
2451 m[n] = 1.0;
2452 sum = 1.0;
2453 } else {
2454 sum += _get_take_weight_osd_map(n, &m);
2455 }
2456 }
2457 _normalize_weight_map(sum, m, pmap);
2458 }
2459
2460 return 0;
2461 }
2462
2463 int CrushWrapper::remove_rule(int ruleno)
2464 {
2465 if (ruleno >= (int)crush->max_rules)
2466 return -ENOENT;
2467 if (crush->rules[ruleno] == NULL)
2468 return -ENOENT;
2469 crush_destroy_rule(crush->rules[ruleno]);
2470 crush->rules[ruleno] = NULL;
2471 rule_name_map.erase(ruleno);
2472 have_rmaps = false;
2473 return rebuild_roots_with_classes(nullptr);
2474 }
2475
2476 int CrushWrapper::bucket_adjust_item_weight(
2477 CephContext *cct, crush_bucket *bucket, int item, int weight,
2478 bool adjust_weight_sets)
2479 {
2480 if (adjust_weight_sets) {
2481 unsigned position;
2482 for (position = 0; position < bucket->size; position++)
2483 if (bucket->items[position] == item)
2484 break;
2485 ceph_assert(position != bucket->size);
2486 for (auto &w : choose_args) {
2487 crush_choose_arg_map &arg_map = w.second;
2488 crush_choose_arg *arg = &arg_map.args[-1-bucket->id];
2489 for (__u32 j = 0; j < arg->weight_set_positions; j++) {
2490 crush_weight_set *weight_set = &arg->weight_set[j];
2491 weight_set->weights[position] = weight;
2492 }
2493 }
2494 }
2495 return crush_bucket_adjust_item_weight(crush, bucket, item, weight);
2496 }
2497
2498 int CrushWrapper::add_bucket(
2499 int bucketno, int alg, int hash, int type, int size,
2500 int *items, int *weights, int *idout)
2501 {
2502 if (alg == 0) {
2503 alg = get_default_bucket_alg();
2504 if (alg == 0)
2505 return -EINVAL;
2506 }
2507 crush_bucket *b = crush_make_bucket(crush, alg, hash, type, size, items,
2508 weights);
2509 ceph_assert(b);
2510 ceph_assert(idout);
2511 int r = crush_add_bucket(crush, bucketno, b, idout);
2512 int pos = -1 - *idout;
2513 for (auto& p : choose_args) {
2514 crush_choose_arg_map& cmap = p.second;
2515 unsigned new_size = crush->max_buckets;
2516 if (cmap.args) {
2517 if ((int)cmap.size < crush->max_buckets) {
2518 cmap.args = static_cast<crush_choose_arg*>(realloc(
2519 cmap.args,
2520 sizeof(crush_choose_arg) * new_size));
2521 ceph_assert(cmap.args);
2522 memset(&cmap.args[cmap.size], 0,
2523 sizeof(crush_choose_arg) * (new_size - cmap.size));
2524 cmap.size = new_size;
2525 }
2526 } else {
2527 cmap.args = static_cast<crush_choose_arg*>(calloc(sizeof(crush_choose_arg),
2528 new_size));
2529 ceph_assert(cmap.args);
2530 cmap.size = new_size;
2531 }
2532 if (size > 0) {
2533 int positions = get_choose_args_positions(cmap);
2534 crush_choose_arg& carg = cmap.args[pos];
2535 carg.weight_set = static_cast<crush_weight_set*>(calloc(sizeof(crush_weight_set),
2536 size));
2537 carg.weight_set_positions = positions;
2538 for (int ppos = 0; ppos < positions; ++ppos) {
2539 carg.weight_set[ppos].weights = (__u32*)calloc(sizeof(__u32), size);
2540 carg.weight_set[ppos].size = size;
2541 for (int bpos = 0; bpos < size; ++bpos) {
2542 carg.weight_set[ppos].weights[bpos] = weights[bpos];
2543 }
2544 }
2545 }
2546 assert(crush->max_buckets == (int)cmap.size);
2547 }
2548 return r;
2549 }
2550
2551 int CrushWrapper::bucket_add_item(crush_bucket *bucket, int item, int weight)
2552 {
2553 __u32 new_size = bucket->size + 1;
2554 int r = crush_bucket_add_item(crush, bucket, item, weight);
2555 if (r < 0) {
2556 return r;
2557 }
2558 for (auto &w : choose_args) {
2559 crush_choose_arg_map &arg_map = w.second;
2560 crush_choose_arg *arg = &arg_map.args[-1-bucket->id];
2561 for (__u32 j = 0; j < arg->weight_set_positions; j++) {
2562 crush_weight_set *weight_set = &arg->weight_set[j];
2563 weight_set->weights = (__u32*)realloc(weight_set->weights,
2564 new_size * sizeof(__u32));
2565 ceph_assert(weight_set->size + 1 == new_size);
2566 weight_set->weights[weight_set->size] = weight;
2567 weight_set->size = new_size;
2568 }
2569 if (arg->ids_size) {
2570 arg->ids = (__s32 *)realloc(arg->ids, new_size * sizeof(__s32));
2571 ceph_assert(arg->ids_size + 1 == new_size);
2572 arg->ids[arg->ids_size] = item;
2573 arg->ids_size = new_size;
2574 }
2575 }
2576 return 0;
2577 }
2578
2579 int CrushWrapper::bucket_remove_item(crush_bucket *bucket, int item)
2580 {
2581 __u32 new_size = bucket->size - 1;
2582 unsigned position;
2583 for (position = 0; position < bucket->size; position++)
2584 if (bucket->items[position] == item)
2585 break;
2586 ceph_assert(position != bucket->size);
2587 int r = crush_bucket_remove_item(crush, bucket, item);
2588 if (r < 0) {
2589 return r;
2590 }
2591 for (auto &w : choose_args) {
2592 crush_choose_arg_map &arg_map = w.second;
2593 crush_choose_arg *arg = &arg_map.args[-1-bucket->id];
2594 for (__u32 j = 0; j < arg->weight_set_positions; j++) {
2595 crush_weight_set *weight_set = &arg->weight_set[j];
2596 ceph_assert(weight_set->size - 1 == new_size);
2597 for (__u32 k = position; k < new_size; k++)
2598 weight_set->weights[k] = weight_set->weights[k+1];
2599 if (new_size) {
2600 weight_set->weights = (__u32*)realloc(weight_set->weights,
2601 new_size * sizeof(__u32));
2602 } else {
2603 free(weight_set->weights);
2604 weight_set->weights = NULL;
2605 }
2606 weight_set->size = new_size;
2607 }
2608 if (arg->ids_size) {
2609 ceph_assert(arg->ids_size - 1 == new_size);
2610 for (__u32 k = position; k < new_size; k++)
2611 arg->ids[k] = arg->ids[k+1];
2612 if (new_size) {
2613 arg->ids = (__s32 *)realloc(arg->ids, new_size * sizeof(__s32));
2614 } else {
2615 free(arg->ids);
2616 arg->ids = NULL;
2617 }
2618 arg->ids_size = new_size;
2619 }
2620 }
2621 return 0;
2622 }
2623
2624 int CrushWrapper::bucket_set_alg(int bid, int alg)
2625 {
2626 crush_bucket *b = get_bucket(bid);
2627 if (!b) {
2628 return -ENOENT;
2629 }
2630 b->alg = alg;
2631 return 0;
2632 }
2633
2634 int CrushWrapper::update_device_class(int id,
2635 const string& class_name,
2636 const string& name,
2637 ostream *ss)
2638 {
2639 ceph_assert(item_exists(id));
2640 auto old_class_name = get_item_class(id);
2641 if (old_class_name && old_class_name != class_name) {
2642 *ss << "osd." << id << " has already bound to class '" << old_class_name
2643 << "', can not reset class to '" << class_name << "'; "
2644 << "use 'ceph osd crush rm-device-class <id>' to "
2645 << "remove old class first";
2646 return -EBUSY;
2647 }
2648
2649 int class_id = get_or_create_class_id(class_name);
2650 if (id < 0) {
2651 *ss << name << " id " << id << " is negative";
2652 return -EINVAL;
2653 }
2654
2655 if (class_map.count(id) != 0 && class_map[id] == class_id) {
2656 *ss << name << " already set to class " << class_name << ". ";
2657 return 0;
2658 }
2659
2660 set_item_class(id, class_id);
2661
2662 int r = rebuild_roots_with_classes(nullptr);
2663 if (r < 0)
2664 return r;
2665 return 1;
2666 }
2667
2668 int CrushWrapper::remove_device_class(CephContext *cct, int id, ostream *ss)
2669 {
2670 ceph_assert(ss);
2671 const char *name = get_item_name(id);
2672 if (!name) {
2673 *ss << "osd." << id << " does not have a name";
2674 return -ENOENT;
2675 }
2676
2677 const char *class_name = get_item_class(id);
2678 if (!class_name) {
2679 *ss << "osd." << id << " has not been bound to a specific class yet";
2680 return 0;
2681 }
2682 class_remove_item(id);
2683
2684 int r = rebuild_roots_with_classes(cct);
2685 if (r < 0) {
2686 *ss << "unable to rebuild roots with class '" << class_name << "' "
2687 << "of osd." << id << ": " << cpp_strerror(r);
2688 return r;
2689 }
2690 return 0;
2691 }
2692
2693 int CrushWrapper::device_class_clone(
2694 int original_id, int device_class,
2695 const std::map<int32_t, map<int32_t, int32_t>>& old_class_bucket,
2696 const std::set<int32_t>& used_ids,
2697 int *clone,
2698 map<int,map<int,vector<int>>> *cmap_item_weight)
2699 {
2700 const char *item_name = get_item_name(original_id);
2701 if (item_name == NULL)
2702 return -ECHILD;
2703 const char *class_name = get_class_name(device_class);
2704 if (class_name == NULL)
2705 return -EBADF;
2706 string copy_name = item_name + string("~") + class_name;
2707 if (name_exists(copy_name)) {
2708 *clone = get_item_id(copy_name);
2709 return 0;
2710 }
2711
2712 crush_bucket *original = get_bucket(original_id);
2713 ceph_assert(!IS_ERR(original));
2714 crush_bucket *copy = crush_make_bucket(crush,
2715 original->alg,
2716 original->hash,
2717 original->type,
2718 0, NULL, NULL);
2719 ceph_assert(copy);
2720
2721 vector<unsigned> item_orig_pos; // new item pos -> orig item pos
2722 for (unsigned i = 0; i < original->size; i++) {
2723 int item = original->items[i];
2724 int weight = crush_get_bucket_item_weight(original, i);
2725 if (item >= 0) {
2726 if (class_map.count(item) != 0 && class_map[item] == device_class) {
2727 int res = crush_bucket_add_item(crush, copy, item, weight);
2728 if (res)
2729 return res;
2730 } else {
2731 continue;
2732 }
2733 } else {
2734 int child_copy_id;
2735 int res = device_class_clone(item, device_class, old_class_bucket,
2736 used_ids, &child_copy_id,
2737 cmap_item_weight);
2738 if (res < 0)
2739 return res;
2740 crush_bucket *child_copy = get_bucket(child_copy_id);
2741 ceph_assert(!IS_ERR(child_copy));
2742 res = crush_bucket_add_item(crush, copy, child_copy_id,
2743 child_copy->weight);
2744 if (res)
2745 return res;
2746 }
2747 item_orig_pos.push_back(i);
2748 }
2749 ceph_assert(item_orig_pos.size() == copy->size);
2750
2751 int bno = 0;
2752 if (old_class_bucket.count(original_id) &&
2753 old_class_bucket.at(original_id).count(device_class)) {
2754 bno = old_class_bucket.at(original_id).at(device_class);
2755 } else {
2756 // pick a new shadow bucket id that is not used by the current map
2757 // *or* any previous shadow buckets.
2758 bno = -1;
2759 while (((-1-bno) < crush->max_buckets && crush->buckets[-1-bno]) ||
2760 used_ids.count(bno)) {
2761 --bno;
2762 }
2763 }
2764 int res = crush_add_bucket(crush, bno, copy, clone);
2765 if (res)
2766 return res;
2767 ceph_assert(!bno || bno == *clone);
2768
2769 res = set_item_class(*clone, device_class);
2770 if (res < 0)
2771 return res;
2772
2773 // we do not use set_item_name because the name is intentionally invalid
2774 name_map[*clone] = copy_name;
2775 if (have_rmaps)
2776 name_rmap[copy_name] = *clone;
2777 class_bucket[original_id][device_class] = *clone;
2778
2779 // set up choose_args for the new bucket.
2780 for (auto& w : choose_args) {
2781 crush_choose_arg_map& cmap = w.second;
2782 if (crush->max_buckets > (int)cmap.size) {
2783 unsigned new_size = crush->max_buckets;
2784 cmap.args = static_cast<crush_choose_arg*>(realloc(cmap.args,
2785 new_size * sizeof(cmap.args[0])));
2786 ceph_assert(cmap.args);
2787 memset(cmap.args + cmap.size, 0,
2788 (new_size - cmap.size) * sizeof(cmap.args[0]));
2789 cmap.size = new_size;
2790 }
2791 auto& o = cmap.args[-1-original_id];
2792 auto& n = cmap.args[-1-bno];
2793 n.ids_size = 0; // FIXME: implement me someday
2794 n.weight_set_positions = o.weight_set_positions;
2795 n.weight_set = static_cast<crush_weight_set*>(calloc(
2796 n.weight_set_positions, sizeof(crush_weight_set)));
2797 for (size_t s = 0; s < n.weight_set_positions; ++s) {
2798 n.weight_set[s].size = copy->size;
2799 n.weight_set[s].weights = (__u32*)calloc(copy->size, sizeof(__u32));
2800 }
2801 for (size_t s = 0; s < n.weight_set_positions; ++s) {
2802 vector<int> bucket_weights(n.weight_set_positions);
2803 for (size_t i = 0; i < copy->size; ++i) {
2804 int item = copy->items[i];
2805 if (item >= 0) {
2806 n.weight_set[s].weights[i] = o.weight_set[s].weights[item_orig_pos[i]];
2807 } else if ((*cmap_item_weight)[w.first].count(item)) {
2808 n.weight_set[s].weights[i] = (*cmap_item_weight)[w.first][item][s];
2809 } else {
2810 n.weight_set[s].weights[i] = 0;
2811 }
2812 bucket_weights[s] += n.weight_set[s].weights[i];
2813 }
2814 (*cmap_item_weight)[w.first][bno] = bucket_weights;
2815 }
2816 }
2817 return 0;
2818 }
2819
2820 int CrushWrapper::get_rules_by_class(const string &class_name, set<int> *rules)
2821 {
2822 ceph_assert(rules);
2823 rules->clear();
2824 if (!class_exists(class_name)) {
2825 return -ENOENT;
2826 }
2827 int class_id = get_class_id(class_name);
2828 for (unsigned i = 0; i < crush->max_rules; ++i) {
2829 crush_rule *r = crush->rules[i];
2830 if (!r)
2831 continue;
2832 for (unsigned j = 0; j < r->len; ++j) {
2833 if (r->steps[j].op == CRUSH_RULE_TAKE) {
2834 int step_item = r->steps[j].arg1;
2835 int original_item;
2836 int c;
2837 int res = split_id_class(step_item, &original_item, &c);
2838 if (res < 0) {
2839 return res;
2840 }
2841 if (c != -1 && c == class_id) {
2842 rules->insert(i);
2843 break;
2844 }
2845 }
2846 }
2847 }
2848 return 0;
2849 }
2850
2851 // return rules that might reference the given osd
2852 int CrushWrapper::get_rules_by_osd(int osd, set<int> *rules)
2853 {
2854 ceph_assert(rules);
2855 rules->clear();
2856 if (osd < 0) {
2857 return -EINVAL;
2858 }
2859 for (unsigned i = 0; i < crush->max_rules; ++i) {
2860 crush_rule *r = crush->rules[i];
2861 if (!r)
2862 continue;
2863 for (unsigned j = 0; j < r->len; ++j) {
2864 if (r->steps[j].op == CRUSH_RULE_TAKE) {
2865 int step_item = r->steps[j].arg1;
2866 list<int> unordered;
2867 int rc = _get_leaves(step_item, &unordered);
2868 if (rc < 0) {
2869 return rc; // propagate fatal errors!
2870 }
2871 bool match = false;
2872 for (auto &o: unordered) {
2873 ceph_assert(o >= 0);
2874 if (o == osd) {
2875 match = true;
2876 break;
2877 }
2878 }
2879 if (match) {
2880 rules->insert(i);
2881 break;
2882 }
2883 }
2884 }
2885 }
2886 return 0;
2887 }
2888
2889 bool CrushWrapper::_class_is_dead(int class_id)
2890 {
2891 for (auto &p: class_map) {
2892 if (p.first >= 0 && p.second == class_id) {
2893 return false;
2894 }
2895 }
2896 for (unsigned i = 0; i < crush->max_rules; ++i) {
2897 crush_rule *r = crush->rules[i];
2898 if (!r)
2899 continue;
2900 for (unsigned j = 0; j < r->len; ++j) {
2901 if (r->steps[j].op == CRUSH_RULE_TAKE) {
2902 int root = r->steps[j].arg1;
2903 for (auto &p : class_bucket) {
2904 auto& q = p.second;
2905 if (q.count(class_id) && q[class_id] == root) {
2906 return false;
2907 }
2908 }
2909 }
2910 }
2911 }
2912 // no more referenced by any devices or crush rules
2913 return true;
2914 }
2915
2916 void CrushWrapper::cleanup_dead_classes()
2917 {
2918 auto p = class_name.begin();
2919 while (p != class_name.end()) {
2920 if (_class_is_dead(p->first)) {
2921 string n = p->second;
2922 ++p;
2923 remove_class_name(n);
2924 } else {
2925 ++p;
2926 }
2927 }
2928 }
2929
2930 int CrushWrapper::rebuild_roots_with_classes(CephContext *cct)
2931 {
2932 std::map<int32_t, map<int32_t, int32_t> > old_class_bucket = class_bucket;
2933 cleanup_dead_classes();
2934 int r = trim_roots_with_class(cct);
2935 if (r < 0)
2936 return r;
2937 class_bucket.clear();
2938 return populate_classes(old_class_bucket);
2939 }
2940
2941 void CrushWrapper::encode(bufferlist& bl, uint64_t features) const
2942 {
2943 using ceph::encode;
2944 ceph_assert(crush);
2945
2946 __u32 magic = CRUSH_MAGIC;
2947 encode(magic, bl);
2948
2949 encode(crush->max_buckets, bl);
2950 encode(crush->max_rules, bl);
2951 encode(crush->max_devices, bl);
2952
2953 bool encode_compat_choose_args = false;
2954 crush_choose_arg_map arg_map;
2955 memset(&arg_map, '\0', sizeof(arg_map));
2956 if (has_choose_args() &&
2957 !HAVE_FEATURE(features, CRUSH_CHOOSE_ARGS)) {
2958 ceph_assert(!has_incompat_choose_args());
2959 encode_compat_choose_args = true;
2960 arg_map = choose_args.begin()->second;
2961 }
2962
2963 // buckets
2964 for (int i=0; i<crush->max_buckets; i++) {
2965 __u32 alg = 0;
2966 if (crush->buckets[i]) alg = crush->buckets[i]->alg;
2967 encode(alg, bl);
2968 if (!alg)
2969 continue;
2970
2971 encode(crush->buckets[i]->id, bl);
2972 encode(crush->buckets[i]->type, bl);
2973 encode(crush->buckets[i]->alg, bl);
2974 encode(crush->buckets[i]->hash, bl);
2975 encode(crush->buckets[i]->weight, bl);
2976 encode(crush->buckets[i]->size, bl);
2977 for (unsigned j=0; j<crush->buckets[i]->size; j++)
2978 encode(crush->buckets[i]->items[j], bl);
2979
2980 switch (crush->buckets[i]->alg) {
2981 case CRUSH_BUCKET_UNIFORM:
2982 encode((reinterpret_cast<crush_bucket_uniform*>(crush->buckets[i]))->item_weight, bl);
2983 break;
2984
2985 case CRUSH_BUCKET_LIST:
2986 for (unsigned j=0; j<crush->buckets[i]->size; j++) {
2987 encode((reinterpret_cast<crush_bucket_list*>(crush->buckets[i]))->item_weights[j], bl);
2988 encode((reinterpret_cast<crush_bucket_list*>(crush->buckets[i]))->sum_weights[j], bl);
2989 }
2990 break;
2991
2992 case CRUSH_BUCKET_TREE:
2993 encode((reinterpret_cast<crush_bucket_tree*>(crush->buckets[i]))->num_nodes, bl);
2994 for (unsigned j=0; j<(reinterpret_cast<crush_bucket_tree*>(crush->buckets[i]))->num_nodes; j++)
2995 encode((reinterpret_cast<crush_bucket_tree*>(crush->buckets[i]))->node_weights[j], bl);
2996 break;
2997
2998 case CRUSH_BUCKET_STRAW:
2999 for (unsigned j=0; j<crush->buckets[i]->size; j++) {
3000 encode((reinterpret_cast<crush_bucket_straw*>(crush->buckets[i]))->item_weights[j], bl);
3001 encode((reinterpret_cast<crush_bucket_straw*>(crush->buckets[i]))->straws[j], bl);
3002 }
3003 break;
3004
3005 case CRUSH_BUCKET_STRAW2:
3006 {
3007 __u32 *weights;
3008 if (encode_compat_choose_args &&
3009 arg_map.args[i].weight_set_positions > 0) {
3010 weights = arg_map.args[i].weight_set[0].weights;
3011 } else {
3012 weights = (reinterpret_cast<crush_bucket_straw2*>(crush->buckets[i]))->item_weights;
3013 }
3014 for (unsigned j=0; j<crush->buckets[i]->size; j++) {
3015 encode(weights[j], bl);
3016 }
3017 }
3018 break;
3019
3020 default:
3021 ceph_abort();
3022 break;
3023 }
3024 }
3025
3026 // rules
3027 for (unsigned i=0; i<crush->max_rules; i++) {
3028 __u32 yes = crush->rules[i] ? 1:0;
3029 encode(yes, bl);
3030 if (!yes)
3031 continue;
3032
3033 encode(crush->rules[i]->len, bl);
3034 encode(crush->rules[i]->mask, bl);
3035 for (unsigned j=0; j<crush->rules[i]->len; j++)
3036 encode(crush->rules[i]->steps[j], bl);
3037 }
3038
3039 // name info
3040 encode(type_map, bl);
3041 encode(name_map, bl);
3042 encode(rule_name_map, bl);
3043
3044 // tunables
3045 encode(crush->choose_local_tries, bl);
3046 encode(crush->choose_local_fallback_tries, bl);
3047 encode(crush->choose_total_tries, bl);
3048 encode(crush->chooseleaf_descend_once, bl);
3049 encode(crush->chooseleaf_vary_r, bl);
3050 encode(crush->straw_calc_version, bl);
3051 encode(crush->allowed_bucket_algs, bl);
3052 if (features & CEPH_FEATURE_CRUSH_TUNABLES5) {
3053 encode(crush->chooseleaf_stable, bl);
3054 }
3055
3056 if (HAVE_FEATURE(features, SERVER_LUMINOUS)) {
3057 // device classes
3058 encode(class_map, bl);
3059 encode(class_name, bl);
3060 encode(class_bucket, bl);
3061
3062 // choose args
3063 __u32 size = (__u32)choose_args.size();
3064 encode(size, bl);
3065 for (auto c : choose_args) {
3066 encode(c.first, bl);
3067 crush_choose_arg_map arg_map = c.second;
3068 size = 0;
3069 for (__u32 i = 0; i < arg_map.size; i++) {
3070 crush_choose_arg *arg = &arg_map.args[i];
3071 if (arg->weight_set_positions == 0 &&
3072 arg->ids_size == 0)
3073 continue;
3074 size++;
3075 }
3076 encode(size, bl);
3077 for (__u32 i = 0; i < arg_map.size; i++) {
3078 crush_choose_arg *arg = &arg_map.args[i];
3079 if (arg->weight_set_positions == 0 &&
3080 arg->ids_size == 0)
3081 continue;
3082 encode(i, bl);
3083 encode(arg->weight_set_positions, bl);
3084 for (__u32 j = 0; j < arg->weight_set_positions; j++) {
3085 crush_weight_set *weight_set = &arg->weight_set[j];
3086 encode(weight_set->size, bl);
3087 for (__u32 k = 0; k < weight_set->size; k++)
3088 encode(weight_set->weights[k], bl);
3089 }
3090 encode(arg->ids_size, bl);
3091 for (__u32 j = 0; j < arg->ids_size; j++)
3092 encode(arg->ids[j], bl);
3093 }
3094 }
3095 }
3096 }
3097
3098 static void decode_32_or_64_string_map(map<int32_t,string>& m, bufferlist::const_iterator& blp)
3099 {
3100 m.clear();
3101 __u32 n;
3102 decode(n, blp);
3103 while (n--) {
3104 __s32 key;
3105 decode(key, blp);
3106
3107 __u32 strlen;
3108 decode(strlen, blp);
3109 if (strlen == 0) {
3110 // der, key was actually 64-bits!
3111 decode(strlen, blp);
3112 }
3113 decode_nohead(strlen, m[key], blp);
3114 }
3115 }
3116
3117 void CrushWrapper::decode(bufferlist::const_iterator& blp)
3118 {
3119 using ceph::decode;
3120 create();
3121
3122 __u32 magic;
3123 decode(magic, blp);
3124 if (magic != CRUSH_MAGIC)
3125 throw ceph::buffer::malformed_input("bad magic number");
3126
3127 decode(crush->max_buckets, blp);
3128 decode(crush->max_rules, blp);
3129 decode(crush->max_devices, blp);
3130
3131 // legacy tunables, unless we decode something newer
3132 set_tunables_legacy();
3133
3134 try {
3135 // buckets
3136 crush->buckets = (crush_bucket**)calloc(1, crush->max_buckets * sizeof(crush_bucket*));
3137 for (int i=0; i<crush->max_buckets; i++) {
3138 decode_crush_bucket(&crush->buckets[i], blp);
3139 }
3140
3141 // rules
3142 crush->rules = (crush_rule**)calloc(1, crush->max_rules * sizeof(crush_rule*));
3143 for (unsigned i = 0; i < crush->max_rules; ++i) {
3144 __u32 yes;
3145 decode(yes, blp);
3146 if (!yes) {
3147 crush->rules[i] = NULL;
3148 continue;
3149 }
3150
3151 __u32 len;
3152 decode(len, blp);
3153 crush->rules[i] = reinterpret_cast<crush_rule*>(calloc(1, crush_rule_size(len)));
3154 crush->rules[i]->len = len;
3155 decode(crush->rules[i]->mask, blp);
3156 for (unsigned j=0; j<crush->rules[i]->len; j++)
3157 decode(crush->rules[i]->steps[j], blp);
3158 }
3159
3160 // name info
3161 // NOTE: we had a bug where we were incoding int instead of int32, which means the
3162 // 'key' field for these maps may be either 32 or 64 bits, depending. tolerate
3163 // both by assuming the string is always non-empty.
3164 decode_32_or_64_string_map(type_map, blp);
3165 decode_32_or_64_string_map(name_map, blp);
3166 decode_32_or_64_string_map(rule_name_map, blp);
3167
3168 // tunables
3169 if (!blp.end()) {
3170 decode(crush->choose_local_tries, blp);
3171 decode(crush->choose_local_fallback_tries, blp);
3172 decode(crush->choose_total_tries, blp);
3173 }
3174 if (!blp.end()) {
3175 decode(crush->chooseleaf_descend_once, blp);
3176 }
3177 if (!blp.end()) {
3178 decode(crush->chooseleaf_vary_r, blp);
3179 }
3180 if (!blp.end()) {
3181 decode(crush->straw_calc_version, blp);
3182 }
3183 if (!blp.end()) {
3184 decode(crush->allowed_bucket_algs, blp);
3185 }
3186 if (!blp.end()) {
3187 decode(crush->chooseleaf_stable, blp);
3188 }
3189 if (!blp.end()) {
3190 decode(class_map, blp);
3191 decode(class_name, blp);
3192 for (auto &c : class_name)
3193 class_rname[c.second] = c.first;
3194 decode(class_bucket, blp);
3195 }
3196 if (!blp.end()) {
3197 __u32 choose_args_size;
3198 decode(choose_args_size, blp);
3199 for (__u32 i = 0; i < choose_args_size; i++) {
3200 typename decltype(choose_args)::key_type choose_args_index;
3201 decode(choose_args_index, blp);
3202 crush_choose_arg_map arg_map;
3203 arg_map.size = crush->max_buckets;
3204 arg_map.args = static_cast<crush_choose_arg*>(calloc(
3205 arg_map.size, sizeof(crush_choose_arg)));
3206 __u32 size;
3207 decode(size, blp);
3208 for (__u32 j = 0; j < size; j++) {
3209 __u32 bucket_index;
3210 decode(bucket_index, blp);
3211 ceph_assert(bucket_index < arg_map.size);
3212 crush_choose_arg *arg = &arg_map.args[bucket_index];
3213 decode(arg->weight_set_positions, blp);
3214 if (arg->weight_set_positions) {
3215 arg->weight_set = static_cast<crush_weight_set*>(calloc(
3216 arg->weight_set_positions, sizeof(crush_weight_set)));
3217 for (__u32 k = 0; k < arg->weight_set_positions; k++) {
3218 crush_weight_set *weight_set = &arg->weight_set[k];
3219 decode(weight_set->size, blp);
3220 weight_set->weights = (__u32*)calloc(
3221 weight_set->size, sizeof(__u32));
3222 for (__u32 l = 0; l < weight_set->size; l++)
3223 decode(weight_set->weights[l], blp);
3224 }
3225 }
3226 decode(arg->ids_size, blp);
3227 if (arg->ids_size) {
3228 ceph_assert(arg->ids_size == crush->buckets[bucket_index]->size);
3229 arg->ids = (__s32 *)calloc(arg->ids_size, sizeof(__s32));
3230 for (__u32 k = 0; k < arg->ids_size; k++)
3231 decode(arg->ids[k], blp);
3232 }
3233 }
3234 choose_args[choose_args_index] = arg_map;
3235 }
3236 }
3237 update_choose_args(nullptr); // in case we decode a legacy "corrupted" map
3238 finalize();
3239 }
3240 catch (...) {
3241 crush_destroy(crush);
3242 throw;
3243 }
3244 }
3245
3246 void CrushWrapper::decode_crush_bucket(crush_bucket** bptr, bufferlist::const_iterator &blp)
3247 {
3248 using ceph::decode;
3249 __u32 alg;
3250 decode(alg, blp);
3251 if (!alg) {
3252 *bptr = NULL;
3253 return;
3254 }
3255
3256 int size = 0;
3257 switch (alg) {
3258 case CRUSH_BUCKET_UNIFORM:
3259 size = sizeof(crush_bucket_uniform);
3260 break;
3261 case CRUSH_BUCKET_LIST:
3262 size = sizeof(crush_bucket_list);
3263 break;
3264 case CRUSH_BUCKET_TREE:
3265 size = sizeof(crush_bucket_tree);
3266 break;
3267 case CRUSH_BUCKET_STRAW:
3268 size = sizeof(crush_bucket_straw);
3269 break;
3270 case CRUSH_BUCKET_STRAW2:
3271 size = sizeof(crush_bucket_straw2);
3272 break;
3273 default:
3274 {
3275 char str[128];
3276 snprintf(str, sizeof(str), "unsupported bucket algorithm: %d", alg);
3277 throw ceph::buffer::malformed_input(str);
3278 }
3279 }
3280 crush_bucket *bucket = reinterpret_cast<crush_bucket*>(calloc(1, size));
3281 *bptr = bucket;
3282
3283 decode(bucket->id, blp);
3284 decode(bucket->type, blp);
3285 decode(bucket->alg, blp);
3286 decode(bucket->hash, blp);
3287 decode(bucket->weight, blp);
3288 decode(bucket->size, blp);
3289
3290 bucket->items = (__s32*)calloc(1, bucket->size * sizeof(__s32));
3291 for (unsigned j = 0; j < bucket->size; ++j) {
3292 decode(bucket->items[j], blp);
3293 }
3294
3295 switch (bucket->alg) {
3296 case CRUSH_BUCKET_UNIFORM:
3297 decode((reinterpret_cast<crush_bucket_uniform*>(bucket))->item_weight, blp);
3298 break;
3299
3300 case CRUSH_BUCKET_LIST: {
3301 crush_bucket_list* cbl = reinterpret_cast<crush_bucket_list*>(bucket);
3302 cbl->item_weights = (__u32*)calloc(1, bucket->size * sizeof(__u32));
3303 cbl->sum_weights = (__u32*)calloc(1, bucket->size * sizeof(__u32));
3304
3305 for (unsigned j = 0; j < bucket->size; ++j) {
3306 decode(cbl->item_weights[j], blp);
3307 decode(cbl->sum_weights[j], blp);
3308 }
3309 break;
3310 }
3311
3312 case CRUSH_BUCKET_TREE: {
3313 crush_bucket_tree* cbt = reinterpret_cast<crush_bucket_tree*>(bucket);
3314 decode(cbt->num_nodes, blp);
3315 cbt->node_weights = (__u32*)calloc(1, cbt->num_nodes * sizeof(__u32));
3316 for (unsigned j=0; j<cbt->num_nodes; j++) {
3317 decode(cbt->node_weights[j], blp);
3318 }
3319 break;
3320 }
3321
3322 case CRUSH_BUCKET_STRAW: {
3323 crush_bucket_straw* cbs = reinterpret_cast<crush_bucket_straw*>(bucket);
3324 cbs->straws = (__u32*)calloc(1, bucket->size * sizeof(__u32));
3325 cbs->item_weights = (__u32*)calloc(1, bucket->size * sizeof(__u32));
3326 for (unsigned j = 0; j < bucket->size; ++j) {
3327 decode(cbs->item_weights[j], blp);
3328 decode(cbs->straws[j], blp);
3329 }
3330 break;
3331 }
3332
3333 case CRUSH_BUCKET_STRAW2: {
3334 crush_bucket_straw2* cbs = reinterpret_cast<crush_bucket_straw2*>(bucket);
3335 cbs->item_weights = (__u32*)calloc(1, bucket->size * sizeof(__u32));
3336 for (unsigned j = 0; j < bucket->size; ++j) {
3337 decode(cbs->item_weights[j], blp);
3338 }
3339 break;
3340 }
3341
3342 default:
3343 // We should have handled this case in the first switch statement
3344 ceph_abort();
3345 break;
3346 }
3347 }
3348
3349
3350 void CrushWrapper::dump(Formatter *f) const
3351 {
3352 f->open_array_section("devices");
3353 for (int i=0; i<get_max_devices(); i++) {
3354 f->open_object_section("device");
3355 f->dump_int("id", i);
3356 const char *n = get_item_name(i);
3357 if (n) {
3358 f->dump_string("name", n);
3359 } else {
3360 char name[20];
3361 sprintf(name, "device%d", i);
3362 f->dump_string("name", name);
3363 }
3364 const char *device_class = get_item_class(i);
3365 if (device_class != NULL)
3366 f->dump_string("class", device_class);
3367 f->close_section();
3368 }
3369 f->close_section();
3370
3371 f->open_array_section("types");
3372 int n = get_num_type_names();
3373 for (int i=0; n; i++) {
3374 const char *name = get_type_name(i);
3375 if (!name) {
3376 if (i == 0) {
3377 f->open_object_section("type");
3378 f->dump_int("type_id", 0);
3379 f->dump_string("name", "device");
3380 f->close_section();
3381 }
3382 continue;
3383 }
3384 n--;
3385 f->open_object_section("type");
3386 f->dump_int("type_id", i);
3387 f->dump_string("name", name);
3388 f->close_section();
3389 }
3390 f->close_section();
3391
3392 f->open_array_section("buckets");
3393 for (int bucket = -1; bucket > -1-get_max_buckets(); --bucket) {
3394 if (!bucket_exists(bucket))
3395 continue;
3396 f->open_object_section("bucket");
3397 f->dump_int("id", bucket);
3398 if (get_item_name(bucket))
3399 f->dump_string("name", get_item_name(bucket));
3400 f->dump_int("type_id", get_bucket_type(bucket));
3401 if (get_type_name(get_bucket_type(bucket)))
3402 f->dump_string("type_name", get_type_name(get_bucket_type(bucket)));
3403 f->dump_int("weight", get_bucket_weight(bucket));
3404 f->dump_string("alg", crush_bucket_alg_name(get_bucket_alg(bucket)));
3405 f->dump_string("hash", crush_hash_name(get_bucket_hash(bucket)));
3406 f->open_array_section("items");
3407 for (int j=0; j<get_bucket_size(bucket); j++) {
3408 f->open_object_section("item");
3409 f->dump_int("id", get_bucket_item(bucket, j));
3410 f->dump_int("weight", get_bucket_item_weight(bucket, j));
3411 f->dump_int("pos", j);
3412 f->close_section();
3413 }
3414 f->close_section();
3415 f->close_section();
3416 }
3417 f->close_section();
3418
3419 f->open_array_section("rules");
3420 dump_rules(f);
3421 f->close_section();
3422
3423 f->open_object_section("tunables");
3424 dump_tunables(f);
3425 f->close_section();
3426
3427 dump_choose_args(f);
3428 }
3429
3430 namespace {
3431 // depth first walker
3432 class TreeDumper {
3433 typedef CrushTreeDumper::Item Item;
3434 const CrushWrapper *crush;
3435 const CrushTreeDumper::name_map_t& weight_set_names;
3436 public:
3437 explicit TreeDumper(const CrushWrapper *crush,
3438 const CrushTreeDumper::name_map_t& wsnames)
3439 : crush(crush), weight_set_names(wsnames) {}
3440
3441 void dump(Formatter *f) {
3442 set<int> roots;
3443 crush->find_roots(&roots);
3444 for (set<int>::iterator root = roots.begin(); root != roots.end(); ++root) {
3445 dump_item(Item(*root, 0, 0, crush->get_bucket_weightf(*root)), f);
3446 }
3447 }
3448
3449 private:
3450 void dump_item(const Item& qi, Formatter* f) {
3451 if (qi.is_bucket()) {
3452 f->open_object_section("bucket");
3453 CrushTreeDumper::dump_item_fields(crush, weight_set_names, qi, f);
3454 dump_bucket_children(qi, f);
3455 f->close_section();
3456 } else {
3457 f->open_object_section("device");
3458 CrushTreeDumper::dump_item_fields(crush, weight_set_names, qi, f);
3459 f->close_section();
3460 }
3461 }
3462
3463 void dump_bucket_children(const Item& parent, Formatter* f) {
3464 f->open_array_section("items");
3465 const int max_pos = crush->get_bucket_size(parent.id);
3466 for (int pos = 0; pos < max_pos; pos++) {
3467 int id = crush->get_bucket_item(parent.id, pos);
3468 float weight = crush->get_bucket_item_weightf(parent.id, pos);
3469 dump_item(Item(id, parent.id, parent.depth + 1, weight), f);
3470 }
3471 f->close_section();
3472 }
3473 };
3474 }
3475
3476 void CrushWrapper::dump_tree(
3477 Formatter *f,
3478 const CrushTreeDumper::name_map_t& weight_set_names) const
3479 {
3480 ceph_assert(f);
3481 TreeDumper(this, weight_set_names).dump(f);
3482 }
3483
3484 void CrushWrapper::dump_tunables(Formatter *f) const
3485 {
3486 f->dump_int("choose_local_tries", get_choose_local_tries());
3487 f->dump_int("choose_local_fallback_tries", get_choose_local_fallback_tries());
3488 f->dump_int("choose_total_tries", get_choose_total_tries());
3489 f->dump_int("chooseleaf_descend_once", get_chooseleaf_descend_once());
3490 f->dump_int("chooseleaf_vary_r", get_chooseleaf_vary_r());
3491 f->dump_int("chooseleaf_stable", get_chooseleaf_stable());
3492 f->dump_int("straw_calc_version", get_straw_calc_version());
3493 f->dump_int("allowed_bucket_algs", get_allowed_bucket_algs());
3494
3495 // be helpful about it
3496 if (has_jewel_tunables())
3497 f->dump_string("profile", "jewel");
3498 else if (has_hammer_tunables())
3499 f->dump_string("profile", "hammer");
3500 else if (has_firefly_tunables())
3501 f->dump_string("profile", "firefly");
3502 else if (has_bobtail_tunables())
3503 f->dump_string("profile", "bobtail");
3504 else if (has_argonaut_tunables())
3505 f->dump_string("profile", "argonaut");
3506 else
3507 f->dump_string("profile", "unknown");
3508 f->dump_int("optimal_tunables", (int)has_optimal_tunables());
3509 f->dump_int("legacy_tunables", (int)has_legacy_tunables());
3510
3511 // be helpful about minimum version required
3512 f->dump_string("minimum_required_version", get_min_required_version());
3513
3514 f->dump_int("require_feature_tunables", (int)has_nondefault_tunables());
3515 f->dump_int("require_feature_tunables2", (int)has_nondefault_tunables2());
3516 f->dump_int("has_v2_rules", (int)has_v2_rules());
3517 f->dump_int("require_feature_tunables3", (int)has_nondefault_tunables3());
3518 f->dump_int("has_v3_rules", (int)has_v3_rules());
3519 f->dump_int("has_v4_buckets", (int)has_v4_buckets());
3520 f->dump_int("require_feature_tunables5", (int)has_nondefault_tunables5());
3521 f->dump_int("has_v5_rules", (int)has_v5_rules());
3522 }
3523
3524 void CrushWrapper::dump_choose_args(Formatter *f) const
3525 {
3526 f->open_object_section("choose_args");
3527 for (auto c : choose_args) {
3528 crush_choose_arg_map arg_map = c.second;
3529 f->open_array_section(stringify(c.first).c_str());
3530 for (__u32 i = 0; i < arg_map.size; i++) {
3531 crush_choose_arg *arg = &arg_map.args[i];
3532 if (arg->weight_set_positions == 0 &&
3533 arg->ids_size == 0)
3534 continue;
3535 f->open_object_section("choose_args");
3536 int bucket_index = i;
3537 f->dump_int("bucket_id", -1-bucket_index);
3538 if (arg->weight_set_positions > 0) {
3539 f->open_array_section("weight_set");
3540 for (__u32 j = 0; j < arg->weight_set_positions; j++) {
3541 f->open_array_section("weights");
3542 __u32 *weights = arg->weight_set[j].weights;
3543 __u32 size = arg->weight_set[j].size;
3544 for (__u32 k = 0; k < size; k++) {
3545 f->dump_float("weight", (float)weights[k]/(float)0x10000);
3546 }
3547 f->close_section();
3548 }
3549 f->close_section();
3550 }
3551 if (arg->ids_size > 0) {
3552 f->open_array_section("ids");
3553 for (__u32 j = 0; j < arg->ids_size; j++)
3554 f->dump_int("id", arg->ids[j]);
3555 f->close_section();
3556 }
3557 f->close_section();
3558 }
3559 f->close_section();
3560 }
3561 f->close_section();
3562 }
3563
3564 void CrushWrapper::dump_rules(Formatter *f) const
3565 {
3566 for (int i=0; i<get_max_rules(); i++) {
3567 if (!rule_exists(i))
3568 continue;
3569 dump_rule(i, f);
3570 }
3571 }
3572
3573 void CrushWrapper::dump_rule(int ruleset, Formatter *f) const
3574 {
3575 f->open_object_section("rule");
3576 f->dump_int("rule_id", ruleset);
3577 if (get_rule_name(ruleset))
3578 f->dump_string("rule_name", get_rule_name(ruleset));
3579 f->dump_int("ruleset", get_rule_mask_ruleset(ruleset));
3580 f->dump_int("type", get_rule_mask_type(ruleset));
3581 f->dump_int("min_size", get_rule_mask_min_size(ruleset));
3582 f->dump_int("max_size", get_rule_mask_max_size(ruleset));
3583 f->open_array_section("steps");
3584 for (int j=0; j<get_rule_len(ruleset); j++) {
3585 f->open_object_section("step");
3586 switch (get_rule_op(ruleset, j)) {
3587 case CRUSH_RULE_NOOP:
3588 f->dump_string("op", "noop");
3589 break;
3590 case CRUSH_RULE_TAKE:
3591 f->dump_string("op", "take");
3592 {
3593 int item = get_rule_arg1(ruleset, j);
3594 f->dump_int("item", item);
3595
3596 const char *name = get_item_name(item);
3597 f->dump_string("item_name", name ? name : "");
3598 }
3599 break;
3600 case CRUSH_RULE_EMIT:
3601 f->dump_string("op", "emit");
3602 break;
3603 case CRUSH_RULE_CHOOSE_FIRSTN:
3604 f->dump_string("op", "choose_firstn");
3605 f->dump_int("num", get_rule_arg1(ruleset, j));
3606 f->dump_string("type", get_type_name(get_rule_arg2(ruleset, j)));
3607 break;
3608 case CRUSH_RULE_CHOOSE_INDEP:
3609 f->dump_string("op", "choose_indep");
3610 f->dump_int("num", get_rule_arg1(ruleset, j));
3611 f->dump_string("type", get_type_name(get_rule_arg2(ruleset, j)));
3612 break;
3613 case CRUSH_RULE_CHOOSELEAF_FIRSTN:
3614 f->dump_string("op", "chooseleaf_firstn");
3615 f->dump_int("num", get_rule_arg1(ruleset, j));
3616 f->dump_string("type", get_type_name(get_rule_arg2(ruleset, j)));
3617 break;
3618 case CRUSH_RULE_CHOOSELEAF_INDEP:
3619 f->dump_string("op", "chooseleaf_indep");
3620 f->dump_int("num", get_rule_arg1(ruleset, j));
3621 f->dump_string("type", get_type_name(get_rule_arg2(ruleset, j)));
3622 break;
3623 case CRUSH_RULE_SET_CHOOSE_TRIES:
3624 f->dump_string("op", "set_choose_tries");
3625 f->dump_int("num", get_rule_arg1(ruleset, j));
3626 break;
3627 case CRUSH_RULE_SET_CHOOSELEAF_TRIES:
3628 f->dump_string("op", "set_chooseleaf_tries");
3629 f->dump_int("num", get_rule_arg1(ruleset, j));
3630 break;
3631 default:
3632 f->dump_int("opcode", get_rule_op(ruleset, j));
3633 f->dump_int("arg1", get_rule_arg1(ruleset, j));
3634 f->dump_int("arg2", get_rule_arg2(ruleset, j));
3635 }
3636 f->close_section();
3637 }
3638 f->close_section();
3639 f->close_section();
3640 }
3641
3642 void CrushWrapper::list_rules(Formatter *f) const
3643 {
3644 for (int rule = 0; rule < get_max_rules(); rule++) {
3645 if (!rule_exists(rule))
3646 continue;
3647 f->dump_string("name", get_rule_name(rule));
3648 }
3649 }
3650
3651 void CrushWrapper::list_rules(ostream *ss) const
3652 {
3653 for (int rule = 0; rule < get_max_rules(); rule++) {
3654 if (!rule_exists(rule))
3655 continue;
3656 *ss << get_rule_name(rule) << "\n";
3657 }
3658 }
3659
3660 class CrushTreePlainDumper : public CrushTreeDumper::Dumper<TextTable> {
3661 public:
3662 typedef CrushTreeDumper::Dumper<TextTable> Parent;
3663
3664 explicit CrushTreePlainDumper(const CrushWrapper *crush,
3665 const CrushTreeDumper::name_map_t& wsnames)
3666 : Parent(crush, wsnames) {}
3667 explicit CrushTreePlainDumper(const CrushWrapper *crush,
3668 const CrushTreeDumper::name_map_t& wsnames,
3669 bool show_shadow)
3670 : Parent(crush, wsnames, show_shadow) {}
3671
3672
3673 void dump(TextTable *tbl) {
3674 tbl->define_column("ID", TextTable::LEFT, TextTable::RIGHT);
3675 tbl->define_column("CLASS", TextTable::LEFT, TextTable::RIGHT);
3676 tbl->define_column("WEIGHT", TextTable::LEFT, TextTable::RIGHT);
3677 for (auto& p : crush->choose_args) {
3678 if (p.first == CrushWrapper::DEFAULT_CHOOSE_ARGS) {
3679 tbl->define_column("(compat)", TextTable::LEFT, TextTable::RIGHT);
3680 } else {
3681 string name;
3682 auto q = weight_set_names.find(p.first);
3683 name = q != weight_set_names.end() ? q->second :
3684 stringify(p.first);
3685 tbl->define_column(name.c_str(), TextTable::LEFT, TextTable::RIGHT);
3686 }
3687 }
3688 tbl->define_column("TYPE NAME", TextTable::LEFT, TextTable::LEFT);
3689 Parent::dump(tbl);
3690 }
3691
3692 protected:
3693 void dump_item(const CrushTreeDumper::Item &qi, TextTable *tbl) override {
3694 const char *c = crush->get_item_class(qi.id);
3695 if (!c)
3696 c = "";
3697 *tbl << qi.id
3698 << c
3699 << weightf_t(qi.weight);
3700 for (auto& p : crush->choose_args) {
3701 if (qi.parent < 0) {
3702 const crush_choose_arg_map cmap = crush->choose_args_get(p.first);
3703 int bidx = -1 - qi.parent;
3704 const crush_bucket *b = crush->get_bucket(qi.parent);
3705 if (b &&
3706 bidx < (int)cmap.size &&
3707 cmap.args[bidx].weight_set &&
3708 cmap.args[bidx].weight_set_positions >= 1) {
3709 int pos;
3710 for (pos = 0;
3711 pos < (int)cmap.args[bidx].weight_set[0].size &&
3712 b->items[pos] != qi.id;
3713 ++pos) ;
3714 *tbl << weightf_t((float)cmap.args[bidx].weight_set[0].weights[pos] /
3715 (float)0x10000);
3716 continue;
3717 }
3718 }
3719 *tbl << "";
3720 }
3721 ostringstream ss;
3722 for (int k=0; k < qi.depth; k++) {
3723 ss << " ";
3724 }
3725 if (qi.is_bucket()) {
3726 ss << crush->get_type_name(crush->get_bucket_type(qi.id)) << " "
3727 << crush->get_item_name(qi.id);
3728 } else {
3729 ss << "osd." << qi.id;
3730 }
3731 *tbl << ss.str();
3732 *tbl << TextTable::endrow;
3733 }
3734 };
3735
3736
3737 class CrushTreeFormattingDumper : public CrushTreeDumper::FormattingDumper {
3738 public:
3739 typedef CrushTreeDumper::FormattingDumper Parent;
3740
3741 explicit CrushTreeFormattingDumper(
3742 const CrushWrapper *crush,
3743 const CrushTreeDumper::name_map_t& wsnames)
3744 : Parent(crush, wsnames) {}
3745
3746 explicit CrushTreeFormattingDumper(
3747 const CrushWrapper *crush,
3748 const CrushTreeDumper::name_map_t& wsnames,
3749 bool show_shadow)
3750 : Parent(crush, wsnames, show_shadow) {}
3751
3752 void dump(Formatter *f) {
3753 f->open_array_section("nodes");
3754 Parent::dump(f);
3755 f->close_section();
3756
3757 // There is no stray bucket whose id is a negative number, so just get
3758 // the max_id and iterate from 0 to max_id to dump stray osds.
3759 f->open_array_section("stray");
3760 int32_t max_id = -1;
3761 if (!crush->name_map.empty()) {
3762 max_id = crush->name_map.rbegin()->first;
3763 }
3764 for (int32_t i = 0; i <= max_id; i++) {
3765 if (crush->item_exists(i) && !is_touched(i) && should_dump(i)) {
3766 dump_item(CrushTreeDumper::Item(i, 0, 0, 0), f);
3767 }
3768 }
3769 f->close_section();
3770 }
3771 };
3772
3773
3774 void CrushWrapper::dump_tree(
3775 ostream *out,
3776 Formatter *f,
3777 const CrushTreeDumper::name_map_t& weight_set_names,
3778 bool show_shadow) const
3779 {
3780 if (out) {
3781 TextTable tbl;
3782 CrushTreePlainDumper(this, weight_set_names, show_shadow).dump(&tbl);
3783 *out << tbl;
3784 }
3785 if (f) {
3786 CrushTreeFormattingDumper(this, weight_set_names, show_shadow).dump(f);
3787 }
3788 }
3789
3790 void CrushWrapper::generate_test_instances(list<CrushWrapper*>& o)
3791 {
3792 o.push_back(new CrushWrapper);
3793 // fixme
3794 }
3795
3796 /**
3797 * Determine the default CRUSH ruleset ID to be used with
3798 * newly created replicated pools.
3799 *
3800 * @returns a ruleset ID (>=0) or -1 if no suitable ruleset found
3801 */
3802 int CrushWrapper::get_osd_pool_default_crush_replicated_ruleset(CephContext *cct)
3803 {
3804 int crush_ruleset = cct->_conf.get_val<int64_t>("osd_pool_default_crush_rule");
3805 if (crush_ruleset < 0) {
3806 crush_ruleset = find_first_ruleset(pg_pool_t::TYPE_REPLICATED);
3807 } else if (!ruleset_exists(crush_ruleset)) {
3808 crush_ruleset = -1; // match find_first_ruleset() retval
3809 }
3810 return crush_ruleset;
3811 }
3812
3813 bool CrushWrapper::is_valid_crush_name(const string& s)
3814 {
3815 if (s.empty())
3816 return false;
3817 for (string::const_iterator p = s.begin(); p != s.end(); ++p) {
3818 if (!(*p == '-') &&
3819 !(*p == '_') &&
3820 !(*p == '.') &&
3821 !(*p >= '0' && *p <= '9') &&
3822 !(*p >= 'A' && *p <= 'Z') &&
3823 !(*p >= 'a' && *p <= 'z'))
3824 return false;
3825 }
3826 return true;
3827 }
3828
3829 bool CrushWrapper::is_valid_crush_loc(CephContext *cct,
3830 const map<string,string>& loc)
3831 {
3832 for (map<string,string>::const_iterator l = loc.begin(); l != loc.end(); ++l) {
3833 if (!is_valid_crush_name(l->first) ||
3834 !is_valid_crush_name(l->second)) {
3835 ldout(cct, 1) << "loc["
3836 << l->first << "] = '"
3837 << l->second << "' not a valid crush name ([A-Za-z0-9_-.]+)"
3838 << dendl;
3839 return false;
3840 }
3841 }
3842 return true;
3843 }
3844
3845 int CrushWrapper::_choose_type_stack(
3846 CephContext *cct,
3847 const vector<pair<int,int>>& stack,
3848 const set<int>& overfull,
3849 const vector<int>& underfull,
3850 const vector<int>& more_underfull,
3851 const vector<int>& orig,
3852 vector<int>::const_iterator& i,
3853 set<int>& used,
3854 vector<int> *pw,
3855 int root_bucket,
3856 int rule) const
3857 {
3858 vector<int> w = *pw;
3859 vector<int> o;
3860
3861 ldout(cct, 10) << __func__ << " stack " << stack
3862 << " orig " << orig
3863 << " at " << *i
3864 << " pw " << *pw
3865 << dendl;
3866 ceph_assert(root_bucket < 0);
3867 vector<int> cumulative_fanout(stack.size());
3868 int f = 1;
3869 for (int j = (int)stack.size() - 1; j >= 0; --j) {
3870 cumulative_fanout[j] = f;
3871 f *= stack[j].second;
3872 }
3873 ldout(cct, 10) << __func__ << " cumulative_fanout " << cumulative_fanout
3874 << dendl;
3875
3876 // identify underfull targets for each intermediate level.
3877 // this serves two purposes:
3878 // 1. we can tell when we are selecting a bucket that does not have any underfull
3879 // devices beneath it. that means that if the current input includes an overfull
3880 // device, we won't be able to find an underfull device with this parent to
3881 // swap for it.
3882 // 2. when we decide we should reject a bucket due to the above, this list gives us
3883 // a list of peers to consider that *do* have underfull devices available.. (we
3884 // are careful to pick one that has the same parent.)
3885 vector<set<int>> underfull_buckets; // level -> set of buckets with >0 underfull item(s)
3886 underfull_buckets.resize(stack.size() - 1);
3887 for (auto osd : underfull) {
3888 int item = osd;
3889 for (int j = (int)stack.size() - 2; j >= 0; --j) {
3890 int type = stack[j].first;
3891 item = get_parent_of_type(item, type, rule);
3892 ldout(cct, 10) << __func__ << " underfull " << osd << " type " << type
3893 << " is " << item << dendl;
3894 if (!subtree_contains(root_bucket, item)) {
3895 ldout(cct, 20) << __func__ << " not in root subtree " << root_bucket << dendl;
3896 continue;
3897 }
3898 underfull_buckets[j].insert(item);
3899 }
3900 }
3901 ldout(cct, 20) << __func__ << " underfull_buckets " << underfull_buckets << dendl;
3902
3903 for (unsigned j = 0; j < stack.size(); ++j) {
3904 int type = stack[j].first;
3905 int fanout = stack[j].second;
3906 int cum_fanout = cumulative_fanout[j];
3907 ldout(cct, 10) << " level " << j << ": type " << type << " fanout " << fanout
3908 << " cumulative " << cum_fanout
3909 << " w " << w << dendl;
3910 vector<int> o;
3911 auto tmpi = i;
3912 if (i == orig.end()) {
3913 ldout(cct, 10) << __func__ << " end of orig, break 0" << dendl;
3914 break;
3915 }
3916 for (auto from : w) {
3917 ldout(cct, 10) << " from " << from << dendl;
3918 // identify leaves under each choice. we use this to check whether any of these
3919 // leaves are overfull. (if so, we need to make sure there are underfull candidates
3920 // to swap for them.)
3921 vector<set<int>> leaves;
3922 leaves.resize(fanout);
3923 for (int pos = 0; pos < fanout; ++pos) {
3924 if (type > 0) {
3925 // non-leaf
3926 int item = get_parent_of_type(*tmpi, type, rule);
3927 o.push_back(item);
3928 int n = cum_fanout;
3929 while (n-- && tmpi != orig.end()) {
3930 leaves[pos].insert(*tmpi++);
3931 }
3932 ldout(cct, 10) << __func__ << " from " << *tmpi << " got " << item
3933 << " of type " << type << " over leaves " << leaves[pos] << dendl;
3934 } else {
3935 // leaf
3936 bool replaced = false;
3937 if (overfull.count(*i)) {
3938 for (auto item : underfull) {
3939 ldout(cct, 10) << __func__ << " pos " << pos
3940 << " was " << *i << " considering " << item
3941 << dendl;
3942 if (used.count(item)) {
3943 ldout(cct, 20) << __func__ << " in used " << used << dendl;
3944 continue;
3945 }
3946 if (!subtree_contains(from, item)) {
3947 ldout(cct, 20) << __func__ << " not in subtree " << from << dendl;
3948 continue;
3949 }
3950 if (std::find(orig.begin(), orig.end(), item) != orig.end()) {
3951 ldout(cct, 20) << __func__ << " in orig " << orig << dendl;
3952 continue;
3953 }
3954 o.push_back(item);
3955 used.insert(item);
3956 ldout(cct, 10) << __func__ << " pos " << pos << " replace "
3957 << *i << " -> " << item << dendl;
3958 replaced = true;
3959 ceph_assert(i != orig.end());
3960 ++i;
3961 break;
3962 }
3963 if (!replaced) {
3964 for (auto item : more_underfull) {
3965 ldout(cct, 10) << __func__ << " more underfull pos " << pos
3966 << " was " << *i << " considering " << item
3967 << dendl;
3968 if (used.count(item)) {
3969 ldout(cct, 20) << __func__ << " in used " << used << dendl;
3970 continue;
3971 }
3972 if (!subtree_contains(from, item)) {
3973 ldout(cct, 20) << __func__ << " not in subtree " << from << dendl;
3974 continue;
3975 }
3976 if (std::find(orig.begin(), orig.end(), item) != orig.end()) {
3977 ldout(cct, 20) << __func__ << " in orig " << orig << dendl;
3978 continue;
3979 }
3980 o.push_back(item);
3981 used.insert(item);
3982 ldout(cct, 10) << __func__ << " pos " << pos << " replace "
3983 << *i << " -> " << item << dendl;
3984 replaced = true;
3985 assert(i != orig.end());
3986 ++i;
3987 break;
3988 }
3989 }
3990 }
3991 if (!replaced) {
3992 ldout(cct, 10) << __func__ << " pos " << pos << " keep " << *i
3993 << dendl;
3994 ceph_assert(i != orig.end());
3995 o.push_back(*i);
3996 ++i;
3997 }
3998 if (i == orig.end()) {
3999 ldout(cct, 10) << __func__ << " end of orig, break 1" << dendl;
4000 break;
4001 }
4002 }
4003 }
4004 if (j + 1 < stack.size()) {
4005 // check if any buckets have overfull leaves but no underfull candidates
4006 for (int pos = 0; pos < fanout; ++pos) {
4007 if (underfull_buckets[j].count(o[pos]) == 0) {
4008 // are any leaves overfull?
4009 bool any_overfull = false;
4010 for (auto osd : leaves[pos]) {
4011 if (overfull.count(osd)) {
4012 any_overfull = true;
4013 break;
4014 }
4015 }
4016 if (any_overfull) {
4017 ldout(cct, 10) << " bucket " << o[pos] << " has no underfull targets and "
4018 << ">0 leaves " << leaves[pos] << " is overfull; alts "
4019 << underfull_buckets[j]
4020 << dendl;
4021 for (auto alt : underfull_buckets[j]) {
4022 if (std::find(o.begin(), o.end(), alt) == o.end()) {
4023 // see if alt has the same parent
4024 if (j == 0 ||
4025 get_parent_of_type(o[pos], stack[j-1].first, rule) ==
4026 get_parent_of_type(alt, stack[j-1].first, rule)) {
4027 if (j)
4028 ldout(cct, 10) << " replacing " << o[pos]
4029 << " (which has no underfull leaves) with " << alt
4030 << " (same parent "
4031 << get_parent_of_type(alt, stack[j-1].first, rule) << " type "
4032 << type << ")" << dendl;
4033 else
4034 ldout(cct, 10) << " replacing " << o[pos]
4035 << " (which has no underfull leaves) with " << alt
4036 << " (first level)" << dendl;
4037 o[pos] = alt;
4038 break;
4039 } else {
4040 ldout(cct, 30) << " alt " << alt << " for " << o[pos]
4041 << " has different parent, skipping" << dendl;
4042 }
4043 }
4044 }
4045 }
4046 }
4047 }
4048 }
4049 if (i == orig.end()) {
4050 ldout(cct, 10) << __func__ << " end of orig, break 2" << dendl;
4051 break;
4052 }
4053 }
4054 ldout(cct, 10) << __func__ << " w <- " << o << " was " << w << dendl;
4055 w.swap(o);
4056 }
4057 *pw = w;
4058 return 0;
4059 }
4060
4061 int CrushWrapper::try_remap_rule(
4062 CephContext *cct,
4063 int ruleno,
4064 int maxout,
4065 const set<int>& overfull,
4066 const vector<int>& underfull,
4067 const vector<int>& more_underfull,
4068 const vector<int>& orig,
4069 vector<int> *out) const
4070 {
4071 const crush_map *map = crush;
4072 const crush_rule *rule = get_rule(ruleno);
4073 ceph_assert(rule);
4074
4075 ldout(cct, 10) << __func__ << " ruleno " << ruleno
4076 << " numrep " << maxout << " overfull " << overfull
4077 << " underfull " << underfull
4078 << " more_underfull " << more_underfull
4079 << " orig " << orig
4080 << dendl;
4081 vector<int> w; // working set
4082 out->clear();
4083
4084 auto i = orig.begin();
4085 set<int> used;
4086
4087 vector<pair<int,int>> type_stack; // (type, fan-out)
4088 int root_bucket = 0;
4089 for (unsigned step = 0; step < rule->len; ++step) {
4090 const crush_rule_step *curstep = &rule->steps[step];
4091 ldout(cct, 10) << __func__ << " step " << step << " w " << w << dendl;
4092 switch (curstep->op) {
4093 case CRUSH_RULE_TAKE:
4094 if ((curstep->arg1 >= 0 && curstep->arg1 < map->max_devices) ||
4095 (-1-curstep->arg1 >= 0 && -1-curstep->arg1 < map->max_buckets &&
4096 map->buckets[-1-curstep->arg1])) {
4097 w.clear();
4098 w.push_back(curstep->arg1);
4099 root_bucket = curstep->arg1;
4100 ldout(cct, 10) << __func__ << " take " << w << dendl;
4101 } else {
4102 ldout(cct, 1) << " bad take value " << curstep->arg1 << dendl;
4103 }
4104 break;
4105
4106 case CRUSH_RULE_CHOOSELEAF_FIRSTN:
4107 case CRUSH_RULE_CHOOSELEAF_INDEP:
4108 {
4109 int numrep = curstep->arg1;
4110 int type = curstep->arg2;
4111 if (numrep <= 0)
4112 numrep += maxout;
4113 type_stack.push_back(make_pair(type, numrep));
4114 if (type > 0)
4115 type_stack.push_back(make_pair(0, 1));
4116 int r = _choose_type_stack(cct, type_stack, overfull, underfull, more_underfull, orig,
4117 i, used, &w, root_bucket, ruleno);
4118 if (r < 0)
4119 return r;
4120 type_stack.clear();
4121 }
4122 break;
4123
4124 case CRUSH_RULE_CHOOSE_FIRSTN:
4125 case CRUSH_RULE_CHOOSE_INDEP:
4126 {
4127 int numrep = curstep->arg1;
4128 int type = curstep->arg2;
4129 if (numrep <= 0)
4130 numrep += maxout;
4131 type_stack.push_back(make_pair(type, numrep));
4132 }
4133 break;
4134
4135 case CRUSH_RULE_EMIT:
4136 ldout(cct, 10) << " emit " << w << dendl;
4137 if (!type_stack.empty()) {
4138 int r = _choose_type_stack(cct, type_stack, overfull, underfull, more_underfull, orig,
4139 i, used, &w, root_bucket, ruleno);
4140 if (r < 0)
4141 return r;
4142 type_stack.clear();
4143 }
4144 for (auto item : w) {
4145 out->push_back(item);
4146 }
4147 w.clear();
4148 break;
4149
4150 default:
4151 // ignore
4152 break;
4153 }
4154 }
4155
4156 return 0;
4157 }
4158
4159
4160 int CrushWrapper::_choose_args_adjust_item_weight_in_bucket(
4161 CephContext *cct,
4162 crush_choose_arg_map cmap,
4163 int bucketid,
4164 int id,
4165 const vector<int>& weight,
4166 ostream *ss)
4167 {
4168 int changed = 0;
4169 int bidx = -1 - bucketid;
4170 crush_bucket *b = crush->buckets[bidx];
4171 if (bidx >= (int)cmap.size) {
4172 if (ss)
4173 *ss << "no weight-set for bucket " << b->id;
4174 ldout(cct, 10) << __func__ << " no crush_choose_arg for bucket " << b->id
4175 << dendl;
4176 return 0;
4177 }
4178 crush_choose_arg *carg = &cmap.args[bidx];
4179 if (carg->weight_set == NULL) {
4180 // create a weight-set for this bucket and populate it with the
4181 // bucket weights
4182 unsigned positions = get_choose_args_positions(cmap);
4183 carg->weight_set_positions = positions;
4184 carg->weight_set = static_cast<crush_weight_set*>(
4185 calloc(sizeof(crush_weight_set), positions));
4186 for (unsigned p = 0; p < positions; ++p) {
4187 carg->weight_set[p].size = b->size;
4188 carg->weight_set[p].weights = (__u32*)calloc(b->size, sizeof(__u32));
4189 for (unsigned i = 0; i < b->size; ++i) {
4190 carg->weight_set[p].weights[i] = crush_get_bucket_item_weight(b, i);
4191 }
4192 }
4193 changed++;
4194 }
4195 if (carg->weight_set_positions != weight.size()) {
4196 if (ss)
4197 *ss << "weight_set_positions != " << weight.size() << " for bucket " << b->id;
4198 ldout(cct, 10) << __func__ << " weight_set_positions != " << weight.size()
4199 << " for bucket " << b->id << dendl;
4200 return 0;
4201 }
4202 for (unsigned i = 0; i < b->size; i++) {
4203 if (b->items[i] == id) {
4204 for (unsigned j = 0; j < weight.size(); ++j) {
4205 carg->weight_set[j].weights[i] = weight[j];
4206 }
4207 ldout(cct, 5) << __func__ << " set " << id << " to " << weight
4208 << " in bucket " << b->id << dendl;
4209 changed++;
4210 }
4211 }
4212 if (changed) {
4213 vector<int> bucket_weight(weight.size(), 0);
4214 for (unsigned i = 0; i < b->size; i++) {
4215 for (unsigned j = 0; j < weight.size(); ++j) {
4216 bucket_weight[j] += carg->weight_set[j].weights[i];
4217 }
4218 }
4219 choose_args_adjust_item_weight(cct, cmap, b->id, bucket_weight, nullptr);
4220 }
4221 return changed;
4222 }
4223
4224 int CrushWrapper::choose_args_adjust_item_weight(
4225 CephContext *cct,
4226 crush_choose_arg_map cmap,
4227 int id,
4228 const vector<int>& weight,
4229 ostream *ss)
4230 {
4231 ldout(cct, 5) << __func__ << " " << id << " weight " << weight << dendl;
4232 int changed = 0;
4233 for (int bidx = 0; bidx < crush->max_buckets; bidx++) {
4234 crush_bucket *b = crush->buckets[bidx];
4235 if (b == nullptr) {
4236 continue;
4237 }
4238 changed += _choose_args_adjust_item_weight_in_bucket(
4239 cct, cmap, b->id, id, weight, ss);
4240 }
4241 if (!changed) {
4242 if (ss)
4243 *ss << "item " << id << " not found in crush map";
4244 return -ENOENT;
4245 }
4246 return changed;
4247 }