]> git.proxmox.com Git - ceph.git/blame - ceph/src/crush/CrushWrapper.cc
update sources to v12.1.1
[ceph.git] / ceph / src / crush / CrushWrapper.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3
4#include "osd/osd_types.h"
5#include "common/debug.h"
6#include "common/Formatter.h"
7#include "common/errno.h"
8#include "include/stringify.h"
9
10#include "CrushWrapper.h"
11#include "CrushTreeDumper.h"
12
13#define dout_subsys ceph_subsys_crush
14
31f18b77
FG
15bool CrushWrapper::has_legacy_rulesets() const
16{
17 for (unsigned i=0; i<crush->max_rules; i++) {
18 crush_rule *r = crush->rules[i];
19 if (r &&
20 r->mask.ruleset != i) {
21 return true;
22 }
23 }
24 return false;
25}
26
27int CrushWrapper::renumber_rules_by_ruleset()
28{
29 int max_ruleset = 0;
30 for (unsigned i=0; i<crush->max_rules; i++) {
31 crush_rule *r = crush->rules[i];
32 if (r && r->mask.ruleset >= max_ruleset) {
33 max_ruleset = r->mask.ruleset + 1;
34 }
35 }
36 struct crush_rule **newrules =
37 (crush_rule**)calloc(1, max_ruleset * sizeof(crush_rule*));
38 for (unsigned i=0; i<crush->max_rules; i++) {
39 crush_rule *r = crush->rules[i];
40 if (!r)
41 continue;
42 if (newrules[r->mask.ruleset]) {
43 // collision, we can't do it.
44 free(newrules);
45 return -EINVAL;
46 }
47 newrules[r->mask.ruleset] = r;
48 }
49
50 // success, swap!
51 free(crush->rules);
52 crush->rules = newrules;
53 crush->max_rules = max_ruleset;
54 return 0;
55}
56
57bool CrushWrapper::has_multirule_rulesets() const
58{
59 for (unsigned i=0; i<crush->max_rules; i++) {
60 crush_rule *r = crush->rules[i];
61 if (!r)
62 continue;
63 for (unsigned j=i+1; j<crush->max_rules; j++) {
64 crush_rule *s = crush->rules[j];
65 if (!s)
66 continue;
67 if (r->mask.ruleset == s->mask.ruleset)
68 return true;
69 }
70 }
71 return false;
72}
73
7c673cae
FG
74bool CrushWrapper::has_v2_rules() const
75{
76 for (unsigned i=0; i<crush->max_rules; i++) {
77 if (is_v2_rule(i)) {
78 return true;
79 }
80 }
81 return false;
82}
83
84bool CrushWrapper::is_v2_rule(unsigned ruleid) const
85{
86 // check rule for use of indep or new SET_* rule steps
87 if (ruleid >= crush->max_rules)
88 return false;
89 crush_rule *r = crush->rules[ruleid];
90 if (!r)
91 return false;
92 for (unsigned j=0; j<r->len; j++) {
93 if (r->steps[j].op == CRUSH_RULE_CHOOSE_INDEP ||
94 r->steps[j].op == CRUSH_RULE_CHOOSELEAF_INDEP ||
95 r->steps[j].op == CRUSH_RULE_SET_CHOOSE_TRIES ||
96 r->steps[j].op == CRUSH_RULE_SET_CHOOSELEAF_TRIES) {
97 return true;
98 }
99 }
100 return false;
101}
102
103bool CrushWrapper::has_v3_rules() const
104{
105 for (unsigned i=0; i<crush->max_rules; i++) {
106 if (is_v3_rule(i)) {
107 return true;
108 }
109 }
110 return false;
111}
112
113bool CrushWrapper::is_v3_rule(unsigned ruleid) const
114{
115 // check rule for use of SET_CHOOSELEAF_VARY_R step
116 if (ruleid >= crush->max_rules)
117 return false;
118 crush_rule *r = crush->rules[ruleid];
119 if (!r)
120 return false;
121 for (unsigned j=0; j<r->len; j++) {
122 if (r->steps[j].op == CRUSH_RULE_SET_CHOOSELEAF_VARY_R) {
123 return true;
124 }
125 }
126 return false;
127}
128
129bool CrushWrapper::has_v4_buckets() const
130{
131 for (int i=0; i<crush->max_buckets; ++i) {
132 crush_bucket *b = crush->buckets[i];
133 if (!b)
134 continue;
135 if (b->alg == CRUSH_BUCKET_STRAW2)
136 return true;
137 }
138 return false;
139}
140
141bool CrushWrapper::has_v5_rules() const
142{
143 for (unsigned i=0; i<crush->max_rules; i++) {
144 if (is_v5_rule(i)) {
145 return true;
146 }
147 }
148 return false;
149}
150
151bool CrushWrapper::is_v5_rule(unsigned ruleid) const
152{
153 // check rule for use of SET_CHOOSELEAF_STABLE step
154 if (ruleid >= crush->max_rules)
155 return false;
156 crush_rule *r = crush->rules[ruleid];
157 if (!r)
158 return false;
159 for (unsigned j=0; j<r->len; j++) {
160 if (r->steps[j].op == CRUSH_RULE_SET_CHOOSELEAF_STABLE) {
161 return true;
162 }
163 }
164 return false;
165}
166
31f18b77 167bool CrushWrapper::has_choose_args() const
7c673cae
FG
168{
169 return !choose_args.empty();
170}
171
31f18b77 172bool CrushWrapper::has_incompat_choose_args() const
7c673cae 173{
31f18b77
FG
174 if (choose_args.empty())
175 return false;
176 if (choose_args.size() > 1)
177 return true;
178 crush_choose_arg_map arg_map = choose_args.begin()->second;
179 for (__u32 i = 0; i < arg_map.size; i++) {
180 crush_choose_arg *arg = &arg_map.args[i];
181 if (arg->weight_set_size == 0 &&
182 arg->ids_size == 0)
183 continue;
184 if (arg->weight_set_size != 1)
185 return true;
186 if (arg->ids_size != 0)
187 return true;
188 }
189 return false;
7c673cae
FG
190}
191
192int CrushWrapper::split_id_class(int i, int *idout, int *classout) const
193{
194 if (!item_exists(i))
195 return -EINVAL;
196 string name = get_item_name(i);
197 size_t pos = name.find("~");
198 if (pos == string::npos) {
199 *idout = i;
200 *classout = -1;
201 return 0;
202 }
203 string name_no_class = name.substr(0, pos);
204 if (!name_exists(name_no_class))
205 return -ENOENT;
206 string class_name = name.substr(pos + 1);
207 if (!class_exists(class_name))
208 return -ENOENT;
209 *idout = get_item_id(name_no_class);
210 *classout = get_class_id(class_name);
211 return 0;
212}
213
214int CrushWrapper::can_rename_item(const string& srcname,
215 const string& dstname,
216 ostream *ss) const
217{
218 if (name_exists(srcname)) {
219 if (name_exists(dstname)) {
220 *ss << "dstname = '" << dstname << "' already exists";
221 return -EEXIST;
222 }
223 if (is_valid_crush_name(dstname)) {
224 return 0;
225 } else {
226 *ss << "dstname = '" << dstname << "' does not match [-_.0-9a-zA-Z]+";
227 return -EINVAL;
228 }
229 } else {
230 if (name_exists(dstname)) {
231 *ss << "srcname = '" << srcname << "' does not exist "
232 << "and dstname = '" << dstname << "' already exists";
233 return -EALREADY;
234 } else {
235 *ss << "srcname = '" << srcname << "' does not exist";
236 return -ENOENT;
237 }
238 }
239}
240
241int CrushWrapper::rename_item(const string& srcname,
242 const string& dstname,
243 ostream *ss)
244{
245 int ret = can_rename_item(srcname, dstname, ss);
246 if (ret < 0)
247 return ret;
248 int oldid = get_item_id(srcname);
249 return set_item_name(oldid, dstname);
250}
251
252int CrushWrapper::can_rename_bucket(const string& srcname,
253 const string& dstname,
254 ostream *ss) const
255{
256 int ret = can_rename_item(srcname, dstname, ss);
257 if (ret)
258 return ret;
259 int srcid = get_item_id(srcname);
260 if (srcid >= 0) {
261 *ss << "srcname = '" << srcname << "' is not a bucket "
262 << "because its id = " << srcid << " is >= 0";
263 return -ENOTDIR;
264 }
265 return 0;
266}
267
268int CrushWrapper::rename_bucket(const string& srcname,
269 const string& dstname,
270 ostream *ss)
271{
272 int ret = can_rename_bucket(srcname, dstname, ss);
273 if (ret < 0)
274 return ret;
275 int oldid = get_item_id(srcname);
276 return set_item_name(oldid, dstname);
277}
278
279void CrushWrapper::find_takes(set<int>& roots) const
280{
281 for (unsigned i=0; i<crush->max_rules; i++) {
282 crush_rule *r = crush->rules[i];
283 if (!r)
284 continue;
285 for (unsigned j=0; j<r->len; j++) {
286 if (r->steps[j].op == CRUSH_RULE_TAKE)
287 roots.insert(r->steps[j].arg1);
288 }
289 }
290}
291
292void CrushWrapper::find_roots(set<int>& roots) const
293{
294 for (int i = 0; i < crush->max_buckets; i++) {
295 if (!crush->buckets[i])
296 continue;
297 crush_bucket *b = crush->buckets[i];
298 if (!_search_item_exists(b->id))
299 roots.insert(b->id);
300 }
301}
302
224ce89b
WB
303void CrushWrapper::find_nonshadow_roots(set<int>& roots) const
304{
305 for (int i = 0; i < crush->max_buckets; i++) {
306 if (!crush->buckets[i])
307 continue;
308 crush_bucket *b = crush->buckets[i];
309 if (_search_item_exists(b->id))
310 continue;
311 const char *name = get_item_name(b->id);
312 if (name && !is_valid_crush_name(name))
313 continue;
314 roots.insert(b->id);
315 }
316}
317
7c673cae
FG
318bool CrushWrapper::subtree_contains(int root, int item) const
319{
320 if (root == item)
321 return true;
322
323 if (root >= 0)
324 return false; // root is a leaf
325
326 const crush_bucket *b = get_bucket(root);
327 if (IS_ERR(b))
328 return false;
329
330 for (unsigned j=0; j<b->size; j++) {
331 if (subtree_contains(b->items[j], item))
332 return true;
333 }
334 return false;
335}
336
337bool CrushWrapper::_maybe_remove_last_instance(CephContext *cct, int item, bool unlink_only)
338{
339 // last instance?
340 if (_search_item_exists(item)) {
341 return false;
342 }
343 if (item < 0 && _bucket_is_in_use(item)) {
344 return false;
345 }
346
347 if (item < 0 && !unlink_only) {
348 crush_bucket *t = get_bucket(item);
349 ldout(cct, 5) << "_maybe_remove_last_instance removing bucket " << item << dendl;
350 crush_remove_bucket(crush, t);
351 if (class_bucket.count(item) != 0)
352 class_bucket.erase(item);
224ce89b 353 class_remove_item(item);
7c673cae
FG
354 }
355 if ((item >= 0 || !unlink_only) && name_map.count(item)) {
356 ldout(cct, 5) << "_maybe_remove_last_instance removing name for item " << item << dendl;
357 name_map.erase(item);
358 have_rmaps = false;
224ce89b 359 class_remove_item(item);
7c673cae
FG
360 }
361 return true;
362}
363
364int CrushWrapper::remove_root(int item, bool unused)
365{
366 if (unused && _bucket_is_in_use(item))
367 return 0;
368
369 crush_bucket *b = get_bucket(item);
370 if (IS_ERR(b))
371 return -ENOENT;
372
373 for (unsigned n = 0; n < b->size; n++) {
374 if (b->items[n] >= 0)
375 continue;
376 int r = remove_root(b->items[n], unused);
377 if (r < 0)
378 return r;
379 }
380
381 crush_remove_bucket(crush, b);
382 if (name_map.count(item) != 0) {
383 name_map.erase(item);
384 have_rmaps = false;
385 }
386 if (class_bucket.count(item) != 0)
387 class_bucket.erase(item);
388 return 0;
389}
390
391int CrushWrapper::remove_item(CephContext *cct, int item, bool unlink_only)
392{
7c673cae
FG
393 ldout(cct, 5) << "remove_item " << item << (unlink_only ? " unlink_only":"") << dendl;
394
395 int ret = -ENOENT;
396
397 if (item < 0 && !unlink_only) {
398 crush_bucket *t = get_bucket(item);
399 if (IS_ERR(t)) {
400 ldout(cct, 1) << "remove_item bucket " << item << " does not exist" << dendl;
401 return -ENOENT;
402 }
403
404 if (t->size) {
405 ldout(cct, 1) << "remove_item bucket " << item << " has " << t->size
406 << " items, not empty" << dendl;
407 return -ENOTEMPTY;
408 }
409 if (_bucket_is_in_use(item)) {
410 return -EBUSY;
411 }
412 }
413
414 for (int i = 0; i < crush->max_buckets; i++) {
415 if (!crush->buckets[i])
416 continue;
417 crush_bucket *b = crush->buckets[i];
418
419 for (unsigned i=0; i<b->size; ++i) {
420 int id = b->items[i];
421 if (id == item) {
422 ldout(cct, 5) << "remove_item removing item " << item
423 << " from bucket " << b->id << dendl;
31f18b77 424 bucket_remove_item(b, item);
7c673cae
FG
425 adjust_item_weight(cct, b->id, b->weight);
426 ret = 0;
427 }
428 }
429 }
430
431 if (_maybe_remove_last_instance(cct, item, unlink_only))
432 ret = 0;
433
434 return ret;
435}
436
437bool CrushWrapper::_search_item_exists(int item) const
438{
439 for (int i = 0; i < crush->max_buckets; i++) {
440 if (!crush->buckets[i])
441 continue;
442 crush_bucket *b = crush->buckets[i];
443 for (unsigned j=0; j<b->size; ++j) {
444 if (b->items[j] == item)
445 return true;
446 }
447 }
448 return false;
449}
450
451bool CrushWrapper::_bucket_is_in_use(int item)
452{
453 for (auto &i : class_bucket)
454 for (auto &j : i.second)
455 if (j.second == item)
456 return true;
457 for (unsigned i = 0; i < crush->max_rules; ++i) {
458 crush_rule *r = crush->rules[i];
459 if (!r)
460 continue;
461 for (unsigned j = 0; j < r->len; ++j) {
462 if (r->steps[j].op == CRUSH_RULE_TAKE) {
463 int step_item = r->steps[j].arg1;
464 int original_item;
465 int c;
466 int res = split_id_class(step_item, &original_item, &c);
467 if (res < 0)
468 return false;
469 if (step_item == item || original_item == item)
470 return true;
471 }
472 }
473 }
474 return false;
475}
476
477int CrushWrapper::_remove_item_under(CephContext *cct, int item, int ancestor, bool unlink_only)
478{
479 ldout(cct, 5) << "_remove_item_under " << item << " under " << ancestor
480 << (unlink_only ? " unlink_only":"") << dendl;
481
482 if (ancestor >= 0) {
483 return -EINVAL;
484 }
485
486 if (!bucket_exists(ancestor))
487 return -EINVAL;
488
489 int ret = -ENOENT;
490
491 crush_bucket *b = get_bucket(ancestor);
492 for (unsigned i=0; i<b->size; ++i) {
493 int id = b->items[i];
494 if (id == item) {
495 ldout(cct, 5) << "_remove_item_under removing item " << item << " from bucket " << b->id << dendl;
31f18b77 496 bucket_remove_item(b, item);
7c673cae
FG
497 adjust_item_weight(cct, b->id, b->weight);
498 ret = 0;
499 } else if (id < 0) {
500 int r = remove_item_under(cct, item, id, unlink_only);
501 if (r == 0)
502 ret = 0;
503 }
504 }
505 return ret;
506}
507
508int CrushWrapper::remove_item_under(CephContext *cct, int item, int ancestor, bool unlink_only)
509{
510 ldout(cct, 5) << "remove_item_under " << item << " under " << ancestor
511 << (unlink_only ? " unlink_only":"") << dendl;
512
513 if (!unlink_only && _bucket_is_in_use(item)) {
514 return -EBUSY;
515 }
516
517 int ret = _remove_item_under(cct, item, ancestor, unlink_only);
518 if (ret < 0)
519 return ret;
520
521 if (item < 0 && !unlink_only) {
522 crush_bucket *t = get_bucket(item);
523 if (IS_ERR(t)) {
524 ldout(cct, 1) << "remove_item_under bucket " << item
525 << " does not exist" << dendl;
526 return -ENOENT;
527 }
528
529 if (t->size) {
530 ldout(cct, 1) << "remove_item_under bucket " << item << " has " << t->size
531 << " items, not empty" << dendl;
532 return -ENOTEMPTY;
533 }
534 }
535
536 if (_maybe_remove_last_instance(cct, item, unlink_only))
537 ret = 0;
538
539 return ret;
540}
541
542int CrushWrapper::get_common_ancestor_distance(CephContext *cct, int id,
543 const std::multimap<string,string>& loc)
544{
545 ldout(cct, 5) << __func__ << " " << id << " " << loc << dendl;
546 if (!item_exists(id))
547 return -ENOENT;
548 map<string,string> id_loc = get_full_location(id);
549 ldout(cct, 20) << " id is at " << id_loc << dendl;
550
551 for (map<int,string>::const_iterator p = type_map.begin();
552 p != type_map.end();
553 ++p) {
554 map<string,string>::iterator ip = id_loc.find(p->second);
555 if (ip == id_loc.end())
556 continue;
557 for (std::multimap<string,string>::const_iterator q = loc.find(p->second);
558 q != loc.end();
559 ++q) {
560 if (q->first != p->second)
561 break;
562 if (q->second == ip->second)
563 return p->first;
564 }
565 }
566 return -ERANGE;
567}
568
569int CrushWrapper::parse_loc_map(const std::vector<string>& args,
570 std::map<string,string> *ploc)
571{
572 ploc->clear();
573 for (unsigned i = 0; i < args.size(); ++i) {
574 const char *s = args[i].c_str();
575 const char *pos = strchr(s, '=');
576 if (!pos)
577 return -EINVAL;
578 string key(s, 0, pos-s);
579 string value(pos+1);
580 if (value.length())
581 (*ploc)[key] = value;
582 else
583 return -EINVAL;
584 }
585 return 0;
586}
587
588int CrushWrapper::parse_loc_multimap(const std::vector<string>& args,
589 std::multimap<string,string> *ploc)
590{
591 ploc->clear();
592 for (unsigned i = 0; i < args.size(); ++i) {
593 const char *s = args[i].c_str();
594 const char *pos = strchr(s, '=');
595 if (!pos)
596 return -EINVAL;
597 string key(s, 0, pos-s);
598 string value(pos+1);
599 if (value.length())
600 ploc->insert(make_pair(key, value));
601 else
602 return -EINVAL;
603 }
604 return 0;
605}
606
607bool CrushWrapper::check_item_loc(CephContext *cct, int item, const map<string,string>& loc,
608 int *weight)
609{
610 ldout(cct, 5) << "check_item_loc item " << item << " loc " << loc << dendl;
611
612 for (map<int,string>::const_iterator p = type_map.begin(); p != type_map.end(); ++p) {
613 // ignore device
614 if (p->first == 0)
615 continue;
616
617 // ignore types that aren't specified in loc
618 map<string,string>::const_iterator q = loc.find(p->second);
619 if (q == loc.end()) {
620 ldout(cct, 2) << "warning: did not specify location for '" << p->second << "' level (levels are "
621 << type_map << ")" << dendl;
622 continue;
623 }
624
625 if (!name_exists(q->second)) {
626 ldout(cct, 5) << "check_item_loc bucket " << q->second << " dne" << dendl;
627 return false;
628 }
629
630 int id = get_item_id(q->second);
631 if (id >= 0) {
632 ldout(cct, 5) << "check_item_loc requested " << q->second << " for type " << p->second
633 << " is a device, not bucket" << dendl;
634 return false;
635 }
636
637 assert(bucket_exists(id));
638 crush_bucket *b = get_bucket(id);
639
640 // see if item exists in this bucket
641 for (unsigned j=0; j<b->size; j++) {
642 if (b->items[j] == item) {
643 ldout(cct, 2) << "check_item_loc " << item << " exists in bucket " << b->id << dendl;
644 if (weight)
645 *weight = crush_get_bucket_item_weight(b, j);
646 return true;
647 }
648 }
649 return false;
650 }
651
652 ldout(cct, 1) << "check_item_loc item " << item << " loc " << loc << dendl;
653 return false;
654}
655
656map<string, string> CrushWrapper::get_full_location(int id)
657{
658 vector<pair<string, string> > full_location_ordered;
659 map<string,string> full_location;
660
661 get_full_location_ordered(id, full_location_ordered);
662
663 std::copy(full_location_ordered.begin(),
664 full_location_ordered.end(),
665 std::inserter(full_location, full_location.begin()));
666
667 return full_location;
668}
669
670int CrushWrapper::get_full_location_ordered(int id, vector<pair<string, string> >& path)
671{
672 if (!item_exists(id))
673 return -ENOENT;
674 int cur = id;
675 int ret;
676 while (true) {
677 pair<string, string> parent_coord = get_immediate_parent(cur, &ret);
678 if (ret != 0)
679 break;
680 path.push_back(parent_coord);
681 cur = get_item_id(parent_coord.second);
682 }
683 return 0;
684}
685
31f18b77
FG
686string CrushWrapper::get_full_location_ordered_string(int id)
687{
688 vector<pair<string, string> > full_location_ordered;
689 string full_location;
690 get_full_location_ordered(id, full_location_ordered);
691 reverse(begin(full_location_ordered), end(full_location_ordered));
692 for(auto i = full_location_ordered.begin(); i != full_location_ordered.end(); i++) {
693 full_location = full_location + i->first + "=" + i->second;
694 if (i != full_location_ordered.end() - 1) {
695 full_location = full_location + ",";
696 }
697 }
698 return full_location;
699}
7c673cae
FG
700
701map<int, string> CrushWrapper::get_parent_hierarchy(int id)
702{
703 map<int,string> parent_hierarchy;
704 pair<string, string> parent_coord = get_immediate_parent(id);
705 int parent_id;
706
707 // get the integer type for id and create a counter from there
708 int type_counter = get_bucket_type(id);
709
710 // if we get a negative type then we can assume that we have an OSD
711 // change behavior in get_item_type FIXME
712 if (type_counter < 0)
713 type_counter = 0;
714
715 // read the type map and get the name of the type with the largest ID
716 int high_type = 0;
717 for (map<int, string>::iterator it = type_map.begin(); it != type_map.end(); ++it){
718 if ( (*it).first > high_type )
719 high_type = (*it).first;
720 }
721
722 parent_id = get_item_id(parent_coord.second);
723
724 while (type_counter < high_type) {
725 type_counter++;
726 parent_hierarchy[ type_counter ] = parent_coord.first;
727
728 if (type_counter < high_type){
729 // get the coordinate information for the next parent
730 parent_coord = get_immediate_parent(parent_id);
731 parent_id = get_item_id(parent_coord.second);
732 }
733 }
734
735 return parent_hierarchy;
736}
737
738int CrushWrapper::get_children(int id, list<int> *children)
739{
740 // leaf?
741 if (id >= 0) {
742 return 0;
743 }
744
745 crush_bucket *b = get_bucket(id);
746 if (IS_ERR(b)) {
747 return -ENOENT;
748 }
749
750 for (unsigned n=0; n<b->size; n++) {
751 children->push_back(b->items[n]);
752 }
753 return b->size;
754}
755
31f18b77
FG
756int CrushWrapper::_get_leaves(int id, list<int> *leaves)
757{
758 assert(leaves);
7c673cae 759
31f18b77
FG
760 // Already leaf?
761 if (id >= 0) {
762 leaves->push_back(id);
763 return 0;
764 }
765
766 crush_bucket *b = get_bucket(id);
767 if (IS_ERR(b)) {
768 return -ENOENT;
769 }
770
771 for (unsigned n = 0; n < b->size; n++) {
772 if (b->items[n] >= 0) {
773 leaves->push_back(b->items[n]);
774 } else {
775 // is a bucket, do recursive call
776 int r = _get_leaves(b->items[n], leaves);
777 if (r < 0) {
778 return r;
779 }
780 }
781 }
782
783 return 0; // all is well
784}
785
786int CrushWrapper::get_leaves(const string &name, set<int> *leaves)
7c673cae 787{
31f18b77
FG
788 assert(leaves);
789 leaves->clear();
790
791 if (!name_exists(name)) {
792 return -ENOENT;
793 }
794
795 int id = get_item_id(name);
796 if (id >= 0) {
797 // already leaf
798 leaves->insert(id);
799 return 0;
7c673cae
FG
800 }
801
31f18b77
FG
802 list<int> unordered;
803 int r = _get_leaves(id, &unordered);
804 if (r < 0) {
805 return r;
806 }
807
808 for (auto &p : unordered) {
809 leaves->insert(p);
810 }
811
812 return 0;
813}
814
815int CrushWrapper::insert_item(CephContext *cct, int item, float weight, string name,
816 const map<string,string>& loc) // typename -> bucketname
817{
7c673cae
FG
818 ldout(cct, 5) << "insert_item item " << item << " weight " << weight
819 << " name " << name << " loc " << loc << dendl;
820
821 if (!is_valid_crush_name(name))
822 return -EINVAL;
823
824 if (!is_valid_crush_loc(cct, loc))
825 return -EINVAL;
826
224ce89b
WB
827 int r = validate_weightf(weight);
828 if (r < 0) {
829 return r;
830 }
831
7c673cae
FG
832 if (name_exists(name)) {
833 if (get_item_id(name) != item) {
834 ldout(cct, 10) << "device name '" << name << "' already exists as id "
835 << get_item_id(name) << dendl;
836 return -EEXIST;
837 }
838 } else {
839 set_item_name(item, name);
840 }
841
842 int cur = item;
843
844 // create locations if locations don't exist and add child in location with 0 weight
845 // the more detail in the insert_item method declaration in CrushWrapper.h
846 for (map<int,string>::iterator p = type_map.begin(); p != type_map.end(); ++p) {
847 // ignore device type
848 if (p->first == 0)
849 continue;
850
851 // skip types that are unspecified
852 map<string,string>::const_iterator q = loc.find(p->second);
853 if (q == loc.end()) {
854 ldout(cct, 2) << "warning: did not specify location for '" << p->second << "' level (levels are "
855 << type_map << ")" << dendl;
856 continue;
857 }
858
859 if (!name_exists(q->second)) {
860 ldout(cct, 5) << "insert_item creating bucket " << q->second << dendl;
861 int empty = 0, newid;
862 int r = add_bucket(0, 0,
863 CRUSH_HASH_DEFAULT, p->first, 1, &cur, &empty, &newid);
864 if (r < 0) {
865 ldout(cct, 1) << "add_bucket failure error: " << cpp_strerror(r) << dendl;
866 return r;
867 }
868 set_item_name(newid, q->second);
869
870 cur = newid;
871 continue;
872 }
873
874 // add to an existing bucket
875 int id = get_item_id(q->second);
876 if (!bucket_exists(id)) {
877 ldout(cct, 1) << "insert_item doesn't have bucket " << id << dendl;
878 return -EINVAL;
879 }
880
881 // check that we aren't creating a cycle.
882 if (subtree_contains(id, cur)) {
883 ldout(cct, 1) << "insert_item item " << cur << " already exists beneath " << id << dendl;
884 return -EINVAL;
885 }
886
887 // we have done sanity check above
888 crush_bucket *b = get_bucket(id);
889
890 if (p->first != b->type) {
891 ldout(cct, 1) << "insert_item existing bucket has type "
892 << "'" << type_map[b->type] << "' != "
893 << "'" << type_map[p->first] << "'" << dendl;
894 return -EINVAL;
895 }
896
897 // are we forming a loop?
898 if (subtree_contains(cur, b->id)) {
899 ldout(cct, 1) << "insert_item " << cur << " already contains " << b->id
900 << "; cannot form loop" << dendl;
901 return -ELOOP;
902 }
903
904 ldout(cct, 5) << "insert_item adding " << cur << " weight " << weight
905 << " to bucket " << id << dendl;
31f18b77 906 int r = bucket_add_item(b, cur, 0);
7c673cae
FG
907 assert (!r);
908 break;
909 }
910
911 // adjust the item's weight in location
912 if(adjust_item_weightf_in_loc(cct, item, weight, loc) > 0) {
913 if (item >= crush->max_devices) {
914 crush->max_devices = item + 1;
915 ldout(cct, 5) << "insert_item max_devices now " << crush->max_devices << dendl;
916 }
917 return 0;
918 }
919
920 ldout(cct, 1) << "error: didn't find anywhere to add item " << item << " in " << loc << dendl;
921 return -EINVAL;
922}
923
924int CrushWrapper::move_bucket(CephContext *cct, int id, const map<string,string>& loc)
925{
7c673cae
FG
926 // sorry this only works for buckets
927 if (id >= 0)
928 return -EINVAL;
929
930 if (!item_exists(id))
931 return -ENOENT;
932
933 // get the name of the bucket we are trying to move for later
934 string id_name = get_item_name(id);
935
936 // detach the bucket
937 int bucket_weight = detach_bucket(cct, id);
938
939 // insert the bucket back into the hierarchy
940 return insert_item(cct, id, bucket_weight / (float)0x10000, id_name, loc);
941}
942
31f18b77 943int CrushWrapper::swap_bucket(CephContext *cct, int src, int dst)
7c673cae 944{
31f18b77
FG
945 if (src >= 0 || dst >= 0)
946 return -EINVAL;
947 if (!item_exists(src) || !item_exists(dst))
948 return -EINVAL;
949 crush_bucket *a = get_bucket(src);
950 crush_bucket *b = get_bucket(dst);
951 unsigned aw = a->weight;
952 unsigned bw = b->weight;
953
954 // swap weights
955 adjust_item_weight(cct, a->id, bw);
956 adjust_item_weight(cct, b->id, aw);
957
958 // swap items
959 map<int,unsigned> tmp;
960 unsigned as = a->size;
961 unsigned bs = b->size;
962 for (unsigned i = 0; i < as; ++i) {
963 int item = a->items[0];
964 int itemw = crush_get_bucket_item_weight(a, 0);
965 tmp[item] = itemw;
966 bucket_remove_item(a, item);
967 }
968 assert(a->size == 0);
969 assert(b->size == bs);
970 for (unsigned i = 0; i < bs; ++i) {
971 int item = b->items[0];
972 int itemw = crush_get_bucket_item_weight(b, 0);
973 bucket_remove_item(b, item);
974 bucket_add_item(a, item, itemw);
975 }
976 assert(a->size == bs);
977 assert(b->size == 0);
978 for (auto t : tmp) {
979 bucket_add_item(b, t.first, t.second);
980 }
981 assert(a->size == bs);
982 assert(b->size == as);
983
984 // swap names
985 swap_names(src, dst);
986 return 0;
987}
7c673cae 988
31f18b77
FG
989int CrushWrapper::link_bucket(CephContext *cct, int id, const map<string,string>& loc)
990{
7c673cae
FG
991 // sorry this only works for buckets
992 if (id >= 0)
993 return -EINVAL;
994
995 if (!item_exists(id))
996 return -ENOENT;
997
998 // get the name of the bucket we are trying to move for later
999 string id_name = get_item_name(id);
1000
1001 crush_bucket *b = get_bucket(id);
1002 unsigned bucket_weight = b->weight;
1003
1004 return insert_item(cct, id, bucket_weight / (float)0x10000, id_name, loc);
1005}
1006
1007int CrushWrapper::create_or_move_item(CephContext *cct, int item, float weight, string name,
1008 const map<string,string>& loc) // typename -> bucketname
1009{
7c673cae
FG
1010 int ret = 0;
1011 int old_iweight;
1012
1013 if (!is_valid_crush_name(name))
1014 return -EINVAL;
1015
1016 if (check_item_loc(cct, item, loc, &old_iweight)) {
1017 ldout(cct, 5) << "create_or_move_item " << item << " already at " << loc << dendl;
1018 } else {
1019 if (_search_item_exists(item)) {
1020 weight = get_item_weightf(item);
1021 ldout(cct, 10) << "create_or_move_item " << item << " exists with weight " << weight << dendl;
1022 remove_item(cct, item, true);
1023 }
1024 ldout(cct, 5) << "create_or_move_item adding " << item << " weight " << weight
1025 << " at " << loc << dendl;
1026 ret = insert_item(cct, item, weight, name, loc);
1027 if (ret == 0)
1028 ret = 1; // changed
1029 }
1030 return ret;
1031}
1032
1033int CrushWrapper::update_item(CephContext *cct, int item, float weight, string name,
1034 const map<string,string>& loc) // typename -> bucketname
1035{
7c673cae
FG
1036 ldout(cct, 5) << "update_item item " << item << " weight " << weight
1037 << " name " << name << " loc " << loc << dendl;
1038 int ret = 0;
1039
1040 if (!is_valid_crush_name(name))
1041 return -EINVAL;
1042
1043 if (!is_valid_crush_loc(cct, loc))
1044 return -EINVAL;
1045
224ce89b
WB
1046 ret = validate_weightf(weight);
1047 if (ret < 0) {
1048 return ret;
1049 }
1050
7c673cae
FG
1051 // compare quantized (fixed-point integer) weights!
1052 int iweight = (int)(weight * (float)0x10000);
1053 int old_iweight;
1054 if (check_item_loc(cct, item, loc, &old_iweight)) {
1055 ldout(cct, 5) << "update_item " << item << " already at " << loc << dendl;
1056 if (old_iweight != iweight) {
1057 ldout(cct, 5) << "update_item " << item << " adjusting weight "
1058 << ((float)old_iweight/(float)0x10000) << " -> " << weight << dendl;
1059 adjust_item_weight_in_loc(cct, item, iweight, loc);
1060 ret = 1;
1061 }
1062 if (get_item_name(item) != name) {
1063 ldout(cct, 5) << "update_item setting " << item << " name to " << name << dendl;
1064 set_item_name(item, name);
1065 ret = 1;
1066 }
1067 } else {
1068 if (item_exists(item)) {
1069 remove_item(cct, item, true);
1070 }
1071 ldout(cct, 5) << "update_item adding " << item << " weight " << weight
1072 << " at " << loc << dendl;
1073 ret = insert_item(cct, item, weight, name, loc);
1074 if (ret == 0)
1075 ret = 1; // changed
1076 }
1077 return ret;
1078}
1079
1080int CrushWrapper::get_item_weight(int id) const
1081{
1082 for (int bidx = 0; bidx < crush->max_buckets; bidx++) {
1083 crush_bucket *b = crush->buckets[bidx];
1084 if (b == NULL)
1085 continue;
1086 if (b->id == id)
1087 return b->weight;
1088 for (unsigned i = 0; i < b->size; i++)
1089 if (b->items[i] == id)
1090 return crush_get_bucket_item_weight(b, i);
1091 }
1092 return -ENOENT;
1093}
1094
1095int CrushWrapper::get_item_weight_in_loc(int id, const map<string,string> &loc)
1096{
1097 for (map<string,string>::const_iterator l = loc.begin(); l != loc.end(); ++l) {
1098
1099 int bid = get_item_id(l->second);
1100 if (!bucket_exists(bid))
1101 continue;
1102 crush_bucket *b = get_bucket(bid);
1103 for (unsigned int i = 0; i < b->size; i++) {
1104 if (b->items[i] == id) {
1105 return crush_get_bucket_item_weight(b, i);
1106 }
1107 }
1108 }
1109 return -ENOENT;
1110}
1111
1112int CrushWrapper::adjust_item_weight(CephContext *cct, int id, int weight)
1113{
1114 ldout(cct, 5) << "adjust_item_weight " << id << " weight " << weight << dendl;
1115 int changed = 0;
1116 for (int bidx = 0; bidx < crush->max_buckets; bidx++) {
1117 crush_bucket *b = crush->buckets[bidx];
1118 if (b == 0)
1119 continue;
1120 for (unsigned i = 0; i < b->size; i++) {
1121 if (b->items[i] == id) {
31f18b77 1122 int diff = bucket_adjust_item_weight(cct, b, id, weight);
7c673cae
FG
1123 ldout(cct, 5) << "adjust_item_weight " << id << " diff " << diff << " in bucket " << bidx << dendl;
1124 adjust_item_weight(cct, -1 - bidx, b->weight);
1125 changed++;
1126 }
1127 }
1128 }
1129 if (!changed)
1130 return -ENOENT;
1131 return changed;
1132}
1133
1134int CrushWrapper::adjust_item_weight_in_loc(CephContext *cct, int id, int weight, const map<string,string>& loc)
1135{
1136 ldout(cct, 5) << "adjust_item_weight_in_loc " << id << " weight " << weight << " in " << loc << dendl;
1137 int changed = 0;
1138
1139 for (map<string,string>::const_iterator l = loc.begin(); l != loc.end(); ++l) {
1140 int bid = get_item_id(l->second);
1141 if (!bucket_exists(bid))
1142 continue;
1143 crush_bucket *b = get_bucket(bid);
1144 for (unsigned int i = 0; i < b->size; i++) {
1145 if (b->items[i] == id) {
31f18b77 1146 int diff = bucket_adjust_item_weight(cct, b, id, weight);
7c673cae
FG
1147 ldout(cct, 5) << "adjust_item_weight_in_loc " << id << " diff " << diff << " in bucket " << bid << dendl;
1148 adjust_item_weight(cct, bid, b->weight);
1149 changed++;
1150 }
1151 }
1152 }
1153 if (!changed)
1154 return -ENOENT;
1155 return changed;
1156}
1157
1158int CrushWrapper::adjust_subtree_weight(CephContext *cct, int id, int weight)
1159{
1160 ldout(cct, 5) << __func__ << " " << id << " weight " << weight << dendl;
1161 crush_bucket *b = get_bucket(id);
1162 if (IS_ERR(b))
1163 return PTR_ERR(b);
1164 int changed = 0;
1165 list<crush_bucket*> q;
1166 q.push_back(b);
1167 while (!q.empty()) {
1168 b = q.front();
1169 q.pop_front();
1170 int local_changed = 0;
1171 for (unsigned i=0; i<b->size; ++i) {
1172 int n = b->items[i];
1173 if (n >= 0) {
31f18b77 1174 bucket_adjust_item_weight(cct, b, n, weight);
7c673cae
FG
1175 ++changed;
1176 ++local_changed;
1177 } else {
1178 crush_bucket *sub = get_bucket(n);
1179 if (IS_ERR(sub))
1180 continue;
1181 q.push_back(sub);
1182 }
1183 }
1184 if (local_changed) {
1185 adjust_item_weight(cct, b->id, b->weight);
1186 }
1187 }
1188 return changed;
1189}
1190
1191bool CrushWrapper::check_item_present(int id) const
1192{
1193 bool found = false;
1194
1195 for (int bidx = 0; bidx < crush->max_buckets; bidx++) {
1196 crush_bucket *b = crush->buckets[bidx];
1197 if (b == 0)
1198 continue;
1199 for (unsigned i = 0; i < b->size; i++)
1200 if (b->items[i] == id)
1201 found = true;
1202 }
1203 return found;
1204}
1205
1206
1207pair<string,string> CrushWrapper::get_immediate_parent(int id, int *_ret)
1208{
1209
1210 for (int bidx = 0; bidx < crush->max_buckets; bidx++) {
1211 crush_bucket *b = crush->buckets[bidx];
1212 if (b == 0)
1213 continue;
224ce89b
WB
1214 const char *n = get_item_name(b->id);
1215 if (n && !is_valid_crush_name(n))
1216 continue;
7c673cae
FG
1217 for (unsigned i = 0; i < b->size; i++)
1218 if (b->items[i] == id) {
1219 string parent_id = name_map[b->id];
1220 string parent_bucket_type = type_map[b->type];
1221 if (_ret)
1222 *_ret = 0;
1223 return make_pair(parent_bucket_type, parent_id);
1224 }
1225 }
1226
1227 if (_ret)
1228 *_ret = -ENOENT;
1229
1230 return pair<string, string>();
1231}
1232
1233int CrushWrapper::get_immediate_parent_id(int id, int *parent) const
1234{
1235 for (int bidx = 0; bidx < crush->max_buckets; bidx++) {
1236 crush_bucket *b = crush->buckets[bidx];
1237 if (b == 0)
1238 continue;
224ce89b
WB
1239 const char *n = get_item_name(b->id);
1240 if (n && !is_valid_crush_name(n))
1241 continue;
7c673cae
FG
1242 for (unsigned i = 0; i < b->size; i++) {
1243 if (b->items[i] == id) {
1244 *parent = b->id;
1245 return 0;
1246 }
1247 }
1248 }
1249 return -ENOENT;
1250}
1251
31f18b77
FG
1252int CrushWrapper::get_parent_of_type(int item, int type) const
1253{
1254 do {
1255 int r = get_immediate_parent_id(item, &item);
1256 if (r < 0) {
1257 return 0;
1258 }
1259 } while (get_bucket_type(item) != type);
1260 return item;
1261}
1262
7c673cae
FG
1263bool CrushWrapper::class_is_in_use(int class_id)
1264{
1265 for (auto &i : class_bucket)
1266 for (auto &j : i.second)
1267 if (j.first == class_id)
1268 return true;
1269
1270 for (auto &i : class_map)
1271 if (i.second == class_id)
1272 return true;
1273
1274 return false;
1275}
1276
1277int CrushWrapper::populate_classes()
1278{
1279 set<int> roots;
1280 find_roots(roots);
1281 for (auto &r : roots) {
1282 if (r >= 0)
1283 continue;
1284 if (id_has_class(r))
1285 continue;
1286 for (auto &c : class_name) {
1287 int clone;
1288 int res = device_class_clone(r, c.first, &clone);
1289 if (res < 0)
1290 return res;
1291 }
1292 }
1293 return 0;
1294}
1295
1296int CrushWrapper::cleanup_classes()
1297{
1298 return trim_roots_with_class(true);
1299}
1300
1301int CrushWrapper::trim_roots_with_class(bool unused)
1302{
1303 set<int> roots;
1304 find_roots(roots);
1305 for (auto &r : roots) {
1306 if (r >= 0)
1307 continue;
1308 if (!id_has_class(r))
1309 continue;
1310 int res = remove_root(r, unused);
1311 if (res)
1312 return res;
1313 }
1314 // there is no need to reweight because we only remove from the
1315 // root and down
1316 return 0;
1317}
1318
224ce89b
WB
1319int32_t CrushWrapper::_alloc_class_id() const {
1320 if (class_name.empty()) {
1321 return 0;
1322 }
1323 int32_t class_id = class_name.rbegin()->first + 1;
1324 if (class_id >= 0) {
1325 return class_id;
1326 }
1327 // wrapped, pick a random start and do exhaustive search
1328 uint32_t upperlimit = numeric_limits<int32_t>::max();
1329 upperlimit++;
1330 class_id = rand() % upperlimit;
1331 const auto start = class_id;
1332 do {
1333 if (!class_name.count(class_id)) {
1334 return class_id;
1335 } else {
1336 class_id++;
1337 if (class_id < 0) {
1338 class_id = 0;
1339 }
1340 }
1341 } while (class_id != start);
1342 assert(0 == "no available class id");
1343}
1344
7c673cae
FG
1345void CrushWrapper::reweight(CephContext *cct)
1346{
1347 set<int> roots;
1348 find_roots(roots);
1349 for (set<int>::iterator p = roots.begin(); p != roots.end(); ++p) {
1350 if (*p >= 0)
1351 continue;
1352 crush_bucket *b = get_bucket(*p);
1353 ldout(cct, 5) << "reweight bucket " << *p << dendl;
1354 int r = crush_reweight_bucket(crush, b);
1355 assert(r == 0);
1356 }
1357}
1358
31f18b77
FG
1359int CrushWrapper::add_simple_rule_at(
1360 string name, string root_name,
1361 string failure_domain_name,
224ce89b 1362 string device_class,
31f18b77 1363 string mode, int rule_type,
224ce89b
WB
1364 int rno,
1365 ostream *err)
7c673cae
FG
1366{
1367 if (rule_exists(name)) {
1368 if (err)
1369 *err << "rule " << name << " exists";
1370 return -EEXIST;
1371 }
1372 if (rno >= 0) {
1373 if (rule_exists(rno)) {
1374 if (err)
1375 *err << "rule with ruleno " << rno << " exists";
1376 return -EEXIST;
1377 }
1378 if (ruleset_exists(rno)) {
1379 if (err)
1380 *err << "ruleset " << rno << " exists";
1381 return -EEXIST;
1382 }
1383 } else {
1384 for (rno = 0; rno < get_max_rules(); rno++) {
1385 if (!rule_exists(rno) && !ruleset_exists(rno))
1386 break;
1387 }
1388 }
1389 if (!name_exists(root_name)) {
1390 if (err)
1391 *err << "root item " << root_name << " does not exist";
1392 return -ENOENT;
1393 }
1394 int root = get_item_id(root_name);
1395 int type = 0;
1396 if (failure_domain_name.length()) {
1397 type = get_type_id(failure_domain_name);
1398 if (type < 0) {
1399 if (err)
1400 *err << "unknown type " << failure_domain_name;
1401 return -EINVAL;
1402 }
1403 }
224ce89b
WB
1404 if (device_class.size()) {
1405 if (!class_exists(device_class)) {
1406 if (err)
1407 *err << "device class " << device_class << " does not exist";
1408 return -EINVAL;
1409 }
1410 int c = get_class_id(device_class);
1411 if (class_bucket.count(root) == 0 ||
1412 class_bucket[root].count(c) == 0) {
1413 if (err)
1414 *err << "root " << root_name << " has no devices with class "
1415 << device_class;
1416 return -EINVAL;
1417 }
1418 root = class_bucket[root][c];
1419 }
7c673cae
FG
1420 if (mode != "firstn" && mode != "indep") {
1421 if (err)
1422 *err << "unknown mode " << mode;
1423 return -EINVAL;
1424 }
1425
1426 int steps = 3;
1427 if (mode == "indep")
1428 steps = 5;
1429 int min_rep = mode == "firstn" ? 1 : 3;
1430 int max_rep = mode == "firstn" ? 10 : 20;
1431 //set the ruleset the same as rule_id(rno)
1432 crush_rule *rule = crush_make_rule(steps, rno, rule_type, min_rep, max_rep);
1433 assert(rule);
1434 int step = 0;
1435 if (mode == "indep") {
1436 crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0);
1437 crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSE_TRIES, 100, 0);
1438 }
1439 crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, root, 0);
1440 if (type)
1441 crush_rule_set_step(rule, step++,
1442 mode == "firstn" ? CRUSH_RULE_CHOOSELEAF_FIRSTN :
1443 CRUSH_RULE_CHOOSELEAF_INDEP,
1444 CRUSH_CHOOSE_N,
1445 type);
1446 else
1447 crush_rule_set_step(rule, step++,
1448 mode == "firstn" ? CRUSH_RULE_CHOOSE_FIRSTN :
1449 CRUSH_RULE_CHOOSE_INDEP,
1450 CRUSH_CHOOSE_N,
1451 0);
1452 crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0);
1453
1454 int ret = crush_add_rule(crush, rule, rno);
1455 if(ret < 0) {
1456 *err << "failed to add rule " << rno << " because " << cpp_strerror(ret);
1457 return ret;
1458 }
1459 set_rule_name(rno, name);
1460 have_rmaps = false;
1461 return rno;
1462}
1463
31f18b77
FG
1464int CrushWrapper::add_simple_rule(
1465 string name, string root_name,
1466 string failure_domain_name,
224ce89b 1467 string device_class,
31f18b77
FG
1468 string mode, int rule_type,
1469 ostream *err)
7c673cae 1470{
224ce89b
WB
1471 return add_simple_rule_at(name, root_name, failure_domain_name, device_class,
1472 mode,
31f18b77 1473 rule_type, -1, err);
7c673cae
FG
1474}
1475
1476int CrushWrapper::get_rule_weight_osd_map(unsigned ruleno, map<int,float> *pmap)
1477{
1478 if (ruleno >= crush->max_rules)
1479 return -ENOENT;
1480 if (crush->rules[ruleno] == NULL)
1481 return -ENOENT;
1482 crush_rule *rule = crush->rules[ruleno];
1483
1484 // build a weight map for each TAKE in the rule, and then merge them
31f18b77
FG
1485
1486 // FIXME: if there are multiple takes that place a different number of
1487 // objects we do not take that into account. (Also, note that doing this
1488 // right is also a function of the pool, since the crush rule
1489 // might choose 2 + choose 2 but pool size may only be 3.)
7c673cae
FG
1490 for (unsigned i=0; i<rule->len; ++i) {
1491 map<int,float> m;
1492 float sum = 0;
1493 if (rule->steps[i].op == CRUSH_RULE_TAKE) {
1494 int n = rule->steps[i].arg1;
1495 if (n >= 0) {
1496 m[n] = 1.0;
1497 sum = 1.0;
1498 } else {
1499 list<int> q;
1500 q.push_back(n);
1501 //breadth first iterate the OSD tree
1502 while (!q.empty()) {
1503 int bno = q.front();
1504 q.pop_front();
1505 crush_bucket *b = crush->buckets[-1-bno];
1506 assert(b);
1507 for (unsigned j=0; j<b->size; ++j) {
1508 int item_id = b->items[j];
1509 if (item_id >= 0) { //it's an OSD
1510 float w = crush_get_bucket_item_weight(b, j);
1511 m[item_id] = w;
1512 sum += w;
1513 } else { //not an OSD, expand the child later
1514 q.push_back(item_id);
1515 }
1516 }
1517 }
1518 }
1519 }
1520 for (map<int,float>::iterator p = m.begin(); p != m.end(); ++p) {
1521 map<int,float>::iterator q = pmap->find(p->first);
1522 if (q == pmap->end()) {
1523 (*pmap)[p->first] = p->second / sum;
1524 } else {
1525 q->second += p->second / sum;
1526 }
1527 }
1528 }
1529
1530 return 0;
1531}
1532
1533int CrushWrapper::remove_rule(int ruleno)
1534{
1535 if (ruleno >= (int)crush->max_rules)
1536 return -ENOENT;
1537 if (crush->rules[ruleno] == NULL)
1538 return -ENOENT;
1539 crush_destroy_rule(crush->rules[ruleno]);
1540 crush->rules[ruleno] = NULL;
1541 rule_name_map.erase(ruleno);
1542 have_rmaps = false;
1543 return 0;
1544}
1545
31f18b77
FG
1546int CrushWrapper::bucket_adjust_item_weight(CephContext *cct, crush_bucket *bucket, int item, int weight)
1547{
1548 if (cct->_conf->osd_crush_update_weight_set) {
1549 unsigned position;
1550 for (position = 0; position < bucket->size; position++)
1551 if (bucket->items[position] == item)
1552 break;
1553 assert(position != bucket->size);
1554 for (auto w : choose_args) {
1555 crush_choose_arg_map arg_map = w.second;
1556 crush_choose_arg *arg = &arg_map.args[-1-bucket->id];
1557 for (__u32 j = 0; j < arg->weight_set_size; j++) {
1558 crush_weight_set *weight_set = &arg->weight_set[j];
1559 weight_set->weights[position] = weight;
1560 }
1561 }
1562 }
1563 return crush_bucket_adjust_item_weight(crush, bucket, item, weight);
1564}
1565
1566int CrushWrapper::bucket_add_item(crush_bucket *bucket, int item, int weight)
1567{
1568 __u32 new_size = bucket->size + 1;
1569 for (auto w : choose_args) {
1570 crush_choose_arg_map arg_map = w.second;
1571 crush_choose_arg *arg = &arg_map.args[-1-bucket->id];
1572 for (__u32 j = 0; j < arg->weight_set_size; j++) {
1573 crush_weight_set *weight_set = &arg->weight_set[j];
1574 weight_set->weights = (__u32*)realloc(weight_set->weights, new_size * sizeof(__u32));
1575 assert(weight_set->size + 1 == new_size);
1576 weight_set->weights[weight_set->size] = weight;
1577 weight_set->size = new_size;
1578 }
1579 if (arg->ids_size) {
224ce89b 1580 arg->ids = (__s32 *)realloc(arg->ids, new_size * sizeof(__s32));
31f18b77
FG
1581 assert(arg->ids_size + 1 == new_size);
1582 arg->ids[arg->ids_size] = item;
1583 arg->ids_size = new_size;
1584 }
1585 }
1586 return crush_bucket_add_item(crush, bucket, item, weight);
1587}
1588
1589int CrushWrapper::bucket_remove_item(crush_bucket *bucket, int item)
1590{
1591 __u32 new_size = bucket->size - 1;
1592 unsigned position;
1593 for (position = 0; position < bucket->size; position++)
1594 if (bucket->items[position] == item)
1595 break;
1596 assert(position != bucket->size);
1597 for (auto w : choose_args) {
1598 crush_choose_arg_map arg_map = w.second;
1599 crush_choose_arg *arg = &arg_map.args[-1-bucket->id];
1600 for (__u32 j = 0; j < arg->weight_set_size; j++) {
1601 crush_weight_set *weight_set = &arg->weight_set[j];
1602 assert(weight_set->size - 1 == new_size);
1603 for (__u32 k = position; k < new_size; k++)
1604 weight_set->weights[k] = weight_set->weights[k+1];
1605 weight_set->weights = (__u32*)realloc(weight_set->weights, new_size * sizeof(__u32));
1606 weight_set->size = new_size;
1607 }
1608 if (arg->ids_size) {
1609 assert(arg->ids_size - 1 == new_size);
1610 for (__u32 k = position; k < new_size; k++)
1611 arg->ids[k] = arg->ids[k+1];
224ce89b 1612 arg->ids = (__s32 *)realloc(arg->ids, new_size * sizeof(__s32));
31f18b77
FG
1613 arg->ids_size = new_size;
1614 }
1615 }
1616 return crush_bucket_remove_item(crush, bucket, item);
1617}
1618
224ce89b
WB
1619int CrushWrapper::update_device_class(int id,
1620 const string& class_name,
1621 const string& name,
1622 ostream *ss)
7c673cae 1623{
224ce89b 1624 int class_id = get_or_create_class_id(class_name);
7c673cae 1625 if (id < 0) {
224ce89b 1626 *ss << name << " id " << id << " is negative";
7c673cae
FG
1627 return -EINVAL;
1628 }
1629 assert(item_exists(id));
1630
1631 if (class_map.count(id) != 0 && class_map[id] == class_id) {
224ce89b 1632 *ss << name << " already set to class " << class_name;
7c673cae
FG
1633 return 0;
1634 }
1635
1636 set_item_class(id, class_id);
1637
1638 int r = rebuild_roots_with_classes();
1639 if (r < 0)
1640 return r;
1641 return 1;
1642}
1643
1644int CrushWrapper::device_class_clone(int original_id, int device_class, int *clone)
1645{
1646 const char *item_name = get_item_name(original_id);
1647 if (item_name == NULL)
1648 return -ECHILD;
1649 const char *class_name = get_class_name(device_class);
1650 if (class_name == NULL)
1651 return -EBADF;
1652 string copy_name = item_name + string("~") + class_name;
1653 if (name_exists(copy_name)) {
1654 *clone = get_item_id(copy_name);
1655 return 0;
1656 }
1657 crush_bucket *original = get_bucket(original_id);
224ce89b 1658 if (IS_ERR(original))
7c673cae
FG
1659 return -ENOENT;
1660 crush_bucket *copy = crush_make_bucket(crush,
1661 original->alg,
1662 original->hash,
1663 original->type,
1664 0, NULL, NULL);
1665 if(copy == NULL)
1666 return -ENOMEM;
1667 for (unsigned i = 0; i < original->size; i++) {
1668 int item = original->items[i];
1669 int weight = crush_get_bucket_item_weight(original, i);
1670 if (item >= 0) {
1671 if (class_map.count(item) != 0 && class_map[item] == device_class) {
31f18b77 1672 int res = bucket_add_item(copy, item, weight);
7c673cae
FG
1673 if (res)
1674 return res;
1675 }
1676 } else {
1677 int child_copy_id;
1678 int res = device_class_clone(item, device_class, &child_copy_id);
1679 if (res < 0)
1680 return res;
1681 crush_bucket *child_copy = get_bucket(child_copy_id);
1682 if (IS_ERR(child_copy))
1683 return -ENOENT;
31f18b77 1684 res = bucket_add_item(copy, child_copy_id, child_copy->weight);
7c673cae
FG
1685 if (res)
1686 return res;
1687 }
1688 }
1689 int res = crush_add_bucket(crush, 0, copy, clone);
1690 if (res)
1691 return res;
1692 res = set_item_class(*clone, device_class);
1693 if (res < 0)
1694 return res;
1695 // we do not use set_item_name because the name is intentionally invalid
1696 name_map[*clone] = copy_name;
1697 if (have_rmaps)
1698 name_rmap[copy_name] = *clone;
1699 class_bucket[original_id][device_class] = *clone;
1700 return 0;
1701}
1702
1703int CrushWrapper::rebuild_roots_with_classes()
1704{
1705 int r = trim_roots_with_class(false);
1706 if (r < 0)
1707 return r;
1708 r = populate_classes();
1709 if (r < 0)
1710 return r;
1711 return trim_roots_with_class(true);
1712}
1713
1714void CrushWrapper::encode(bufferlist& bl, uint64_t features) const
1715{
1716 assert(crush);
1717
1718 __u32 magic = CRUSH_MAGIC;
1719 ::encode(magic, bl);
1720
1721 ::encode(crush->max_buckets, bl);
1722 ::encode(crush->max_rules, bl);
1723 ::encode(crush->max_devices, bl);
1724
31f18b77
FG
1725 bool encode_compat_choose_args = false;
1726 crush_choose_arg_map arg_map;
1727 memset(&arg_map, '\0', sizeof(arg_map));
1728 if (has_choose_args() &&
1729 !HAVE_FEATURE(features, CRUSH_CHOOSE_ARGS)) {
1730 assert(!has_incompat_choose_args());
1731 encode_compat_choose_args = true;
1732 arg_map = choose_args.begin()->second;
1733 }
1734
7c673cae
FG
1735 // buckets
1736 for (int i=0; i<crush->max_buckets; i++) {
1737 __u32 alg = 0;
1738 if (crush->buckets[i]) alg = crush->buckets[i]->alg;
1739 ::encode(alg, bl);
1740 if (!alg)
1741 continue;
1742
1743 ::encode(crush->buckets[i]->id, bl);
1744 ::encode(crush->buckets[i]->type, bl);
1745 ::encode(crush->buckets[i]->alg, bl);
1746 ::encode(crush->buckets[i]->hash, bl);
1747 ::encode(crush->buckets[i]->weight, bl);
1748 ::encode(crush->buckets[i]->size, bl);
1749 for (unsigned j=0; j<crush->buckets[i]->size; j++)
1750 ::encode(crush->buckets[i]->items[j], bl);
1751
1752 switch (crush->buckets[i]->alg) {
1753 case CRUSH_BUCKET_UNIFORM:
1754 ::encode((reinterpret_cast<crush_bucket_uniform*>(crush->buckets[i]))->item_weight, bl);
1755 break;
1756
1757 case CRUSH_BUCKET_LIST:
1758 for (unsigned j=0; j<crush->buckets[i]->size; j++) {
1759 ::encode((reinterpret_cast<crush_bucket_list*>(crush->buckets[i]))->item_weights[j], bl);
1760 ::encode((reinterpret_cast<crush_bucket_list*>(crush->buckets[i]))->sum_weights[j], bl);
1761 }
1762 break;
1763
1764 case CRUSH_BUCKET_TREE:
1765 ::encode((reinterpret_cast<crush_bucket_tree*>(crush->buckets[i]))->num_nodes, bl);
1766 for (unsigned j=0; j<(reinterpret_cast<crush_bucket_tree*>(crush->buckets[i]))->num_nodes; j++)
1767 ::encode((reinterpret_cast<crush_bucket_tree*>(crush->buckets[i]))->node_weights[j], bl);
1768 break;
1769
1770 case CRUSH_BUCKET_STRAW:
1771 for (unsigned j=0; j<crush->buckets[i]->size; j++) {
1772 ::encode((reinterpret_cast<crush_bucket_straw*>(crush->buckets[i]))->item_weights[j], bl);
1773 ::encode((reinterpret_cast<crush_bucket_straw*>(crush->buckets[i]))->straws[j], bl);
1774 }
1775 break;
1776
1777 case CRUSH_BUCKET_STRAW2:
31f18b77
FG
1778 {
1779 __u32 *weights;
1780 if (encode_compat_choose_args &&
1781 arg_map.args[i].weight_set_size > 0) {
1782 weights = arg_map.args[i].weight_set[0].weights;
1783 } else {
1784 weights = (reinterpret_cast<crush_bucket_straw2*>(crush->buckets[i]))->item_weights;
1785 }
1786 for (unsigned j=0; j<crush->buckets[i]->size; j++) {
1787 ::encode(weights[j], bl);
1788 }
7c673cae
FG
1789 }
1790 break;
1791
1792 default:
1793 ceph_abort();
1794 break;
1795 }
1796 }
1797
1798 // rules
1799 for (unsigned i=0; i<crush->max_rules; i++) {
1800 __u32 yes = crush->rules[i] ? 1:0;
1801 ::encode(yes, bl);
1802 if (!yes)
1803 continue;
1804
1805 ::encode(crush->rules[i]->len, bl);
1806 ::encode(crush->rules[i]->mask, bl);
1807 for (unsigned j=0; j<crush->rules[i]->len; j++)
1808 ::encode(crush->rules[i]->steps[j], bl);
1809 }
1810
1811 // name info
1812 ::encode(type_map, bl);
1813 ::encode(name_map, bl);
1814 ::encode(rule_name_map, bl);
1815
1816 // tunables
1817 ::encode(crush->choose_local_tries, bl);
1818 ::encode(crush->choose_local_fallback_tries, bl);
1819 ::encode(crush->choose_total_tries, bl);
1820 ::encode(crush->chooseleaf_descend_once, bl);
1821 ::encode(crush->chooseleaf_vary_r, bl);
1822 ::encode(crush->straw_calc_version, bl);
1823 ::encode(crush->allowed_bucket_algs, bl);
1824 if (features & CEPH_FEATURE_CRUSH_TUNABLES5) {
1825 ::encode(crush->chooseleaf_stable, bl);
1826 }
1827
1828 if (HAVE_FEATURE(features, SERVER_LUMINOUS)) {
1829 // device classes
1830 ::encode(class_map, bl);
1831 ::encode(class_name, bl);
1832 ::encode(class_bucket, bl);
1833
224ce89b
WB
1834 __u32 size = (__u32)choose_args.size();
1835 ::encode(size, bl);
7c673cae
FG
1836 for (auto c : choose_args) {
1837 ::encode(c.first, bl);
1838 crush_choose_arg_map arg_map = c.second;
224ce89b 1839 size = 0;
7c673cae
FG
1840 for (__u32 i = 0; i < arg_map.size; i++) {
1841 crush_choose_arg *arg = &arg_map.args[i];
1842 if (arg->weight_set_size == 0 &&
1843 arg->ids_size == 0)
1844 continue;
1845 size++;
1846 }
1847 ::encode(size, bl);
1848 for (__u32 i = 0; i < arg_map.size; i++) {
1849 crush_choose_arg *arg = &arg_map.args[i];
1850 if (arg->weight_set_size == 0 &&
1851 arg->ids_size == 0)
1852 continue;
1853 ::encode(i, bl);
1854 ::encode(arg->weight_set_size, bl);
1855 for (__u32 j = 0; j < arg->weight_set_size; j++) {
1856 crush_weight_set *weight_set = &arg->weight_set[j];
1857 ::encode(weight_set->size, bl);
1858 for (__u32 k = 0; k < weight_set->size; k++)
1859 ::encode(weight_set->weights[k], bl);
1860 }
1861 ::encode(arg->ids_size, bl);
1862 for (__u32 j = 0; j < arg->ids_size; j++)
1863 ::encode(arg->ids[j], bl);
1864 }
1865 }
1866 }
1867}
1868
1869static void decode_32_or_64_string_map(map<int32_t,string>& m, bufferlist::iterator& blp)
1870{
1871 m.clear();
1872 __u32 n;
1873 ::decode(n, blp);
1874 while (n--) {
1875 __s32 key;
1876 ::decode(key, blp);
1877
1878 __u32 strlen;
1879 ::decode(strlen, blp);
1880 if (strlen == 0) {
1881 // der, key was actually 64-bits!
1882 ::decode(strlen, blp);
1883 }
1884 ::decode_nohead(strlen, m[key], blp);
1885 }
1886}
1887
1888void CrushWrapper::decode(bufferlist::iterator& blp)
1889{
1890 create();
1891
1892 __u32 magic;
1893 ::decode(magic, blp);
1894 if (magic != CRUSH_MAGIC)
1895 throw buffer::malformed_input("bad magic number");
1896
1897 ::decode(crush->max_buckets, blp);
1898 ::decode(crush->max_rules, blp);
1899 ::decode(crush->max_devices, blp);
1900
1901 // legacy tunables, unless we decode something newer
1902 set_tunables_legacy();
1903
1904 try {
1905 // buckets
1906 crush->buckets = (crush_bucket**)calloc(1, crush->max_buckets * sizeof(crush_bucket*));
1907 for (int i=0; i<crush->max_buckets; i++) {
1908 decode_crush_bucket(&crush->buckets[i], blp);
1909 }
1910
1911 // rules
1912 crush->rules = (crush_rule**)calloc(1, crush->max_rules * sizeof(crush_rule*));
1913 for (unsigned i = 0; i < crush->max_rules; ++i) {
1914 __u32 yes;
1915 ::decode(yes, blp);
1916 if (!yes) {
1917 crush->rules[i] = NULL;
1918 continue;
1919 }
1920
1921 __u32 len;
1922 ::decode(len, blp);
1923 crush->rules[i] = reinterpret_cast<crush_rule*>(calloc(1, crush_rule_size(len)));
1924 crush->rules[i]->len = len;
1925 ::decode(crush->rules[i]->mask, blp);
1926 for (unsigned j=0; j<crush->rules[i]->len; j++)
1927 ::decode(crush->rules[i]->steps[j], blp);
1928 }
1929
1930 // name info
1931 // NOTE: we had a bug where we were incoding int instead of int32, which means the
1932 // 'key' field for these maps may be either 32 or 64 bits, depending. tolerate
1933 // both by assuming the string is always non-empty.
1934 decode_32_or_64_string_map(type_map, blp);
1935 decode_32_or_64_string_map(name_map, blp);
1936 decode_32_or_64_string_map(rule_name_map, blp);
1937
1938 // tunables
1939 if (!blp.end()) {
1940 ::decode(crush->choose_local_tries, blp);
1941 ::decode(crush->choose_local_fallback_tries, blp);
1942 ::decode(crush->choose_total_tries, blp);
1943 }
1944 if (!blp.end()) {
1945 ::decode(crush->chooseleaf_descend_once, blp);
1946 }
1947 if (!blp.end()) {
1948 ::decode(crush->chooseleaf_vary_r, blp);
1949 }
1950 if (!blp.end()) {
1951 ::decode(crush->straw_calc_version, blp);
1952 }
1953 if (!blp.end()) {
1954 ::decode(crush->allowed_bucket_algs, blp);
1955 }
1956 if (!blp.end()) {
1957 ::decode(crush->chooseleaf_stable, blp);
1958 }
1959 if (!blp.end()) {
1960 ::decode(class_map, blp);
1961 ::decode(class_name, blp);
1962 for (auto &c : class_name)
1963 class_rname[c.second] = c.first;
1964 ::decode(class_bucket, blp);
1965 cleanup_classes();
1966 }
1967 if (!blp.end()) {
224ce89b 1968 __u32 choose_args_size;
7c673cae 1969 ::decode(choose_args_size, blp);
224ce89b 1970 for (__u32 i = 0; i < choose_args_size; i++) {
7c673cae
FG
1971 uint64_t choose_args_index;
1972 ::decode(choose_args_index, blp);
1973 crush_choose_arg_map arg_map;
1974 arg_map.size = crush->max_buckets;
1975 arg_map.args = (crush_choose_arg*)calloc(arg_map.size, sizeof(crush_choose_arg));
1976 __u32 size;
1977 ::decode(size, blp);
1978 for (__u32 j = 0; j < size; j++) {
1979 __u32 bucket_index;
1980 ::decode(bucket_index, blp);
1981 assert(bucket_index < arg_map.size);
1982 crush_choose_arg *arg = &arg_map.args[bucket_index];
1983 ::decode(arg->weight_set_size, blp);
1984 arg->weight_set = (crush_weight_set*)calloc(arg->weight_set_size, sizeof(crush_weight_set));
1985 for (__u32 k = 0; k < arg->weight_set_size; k++) {
1986 crush_weight_set *weight_set = &arg->weight_set[k];
1987 ::decode(weight_set->size, blp);
1988 weight_set->weights = (__u32*)calloc(weight_set->size, sizeof(__u32));
1989 for (__u32 l = 0; l < weight_set->size; l++)
1990 ::decode(weight_set->weights[l], blp);
1991 }
1992 ::decode(arg->ids_size, blp);
224ce89b 1993 arg->ids = (__s32 *)calloc(arg->ids_size, sizeof(__s32));
7c673cae
FG
1994 for (__u32 k = 0; k < arg->ids_size; k++)
1995 ::decode(arg->ids[k], blp);
1996 }
1997 choose_args[choose_args_index] = arg_map;
1998 }
1999 }
2000 finalize();
2001 }
2002 catch (...) {
2003 crush_destroy(crush);
2004 throw;
2005 }
2006}
2007
2008void CrushWrapper::decode_crush_bucket(crush_bucket** bptr, bufferlist::iterator &blp)
2009{
2010 __u32 alg;
2011 ::decode(alg, blp);
2012 if (!alg) {
2013 *bptr = NULL;
2014 return;
2015 }
2016
2017 int size = 0;
2018 switch (alg) {
2019 case CRUSH_BUCKET_UNIFORM:
2020 size = sizeof(crush_bucket_uniform);
2021 break;
2022 case CRUSH_BUCKET_LIST:
2023 size = sizeof(crush_bucket_list);
2024 break;
2025 case CRUSH_BUCKET_TREE:
2026 size = sizeof(crush_bucket_tree);
2027 break;
2028 case CRUSH_BUCKET_STRAW:
2029 size = sizeof(crush_bucket_straw);
2030 break;
2031 case CRUSH_BUCKET_STRAW2:
2032 size = sizeof(crush_bucket_straw2);
2033 break;
2034 default:
2035 {
2036 char str[128];
2037 snprintf(str, sizeof(str), "unsupported bucket algorithm: %d", alg);
2038 throw buffer::malformed_input(str);
2039 }
2040 }
2041 crush_bucket *bucket = reinterpret_cast<crush_bucket*>(calloc(1, size));
2042 *bptr = bucket;
2043
2044 ::decode(bucket->id, blp);
2045 ::decode(bucket->type, blp);
2046 ::decode(bucket->alg, blp);
2047 ::decode(bucket->hash, blp);
2048 ::decode(bucket->weight, blp);
2049 ::decode(bucket->size, blp);
2050
2051 bucket->items = (__s32*)calloc(1, bucket->size * sizeof(__s32));
2052 for (unsigned j = 0; j < bucket->size; ++j) {
2053 ::decode(bucket->items[j], blp);
2054 }
2055
2056 switch (bucket->alg) {
2057 case CRUSH_BUCKET_UNIFORM:
2058 ::decode((reinterpret_cast<crush_bucket_uniform*>(bucket))->item_weight, blp);
2059 break;
2060
2061 case CRUSH_BUCKET_LIST: {
2062 crush_bucket_list* cbl = reinterpret_cast<crush_bucket_list*>(bucket);
2063 cbl->item_weights = (__u32*)calloc(1, bucket->size * sizeof(__u32));
2064 cbl->sum_weights = (__u32*)calloc(1, bucket->size * sizeof(__u32));
2065
2066 for (unsigned j = 0; j < bucket->size; ++j) {
2067 ::decode(cbl->item_weights[j], blp);
2068 ::decode(cbl->sum_weights[j], blp);
2069 }
2070 break;
2071 }
2072
2073 case CRUSH_BUCKET_TREE: {
2074 crush_bucket_tree* cbt = reinterpret_cast<crush_bucket_tree*>(bucket);
2075 ::decode(cbt->num_nodes, blp);
2076 cbt->node_weights = (__u32*)calloc(1, cbt->num_nodes * sizeof(__u32));
2077 for (unsigned j=0; j<cbt->num_nodes; j++) {
2078 ::decode(cbt->node_weights[j], blp);
2079 }
2080 break;
2081 }
2082
2083 case CRUSH_BUCKET_STRAW: {
2084 crush_bucket_straw* cbs = reinterpret_cast<crush_bucket_straw*>(bucket);
2085 cbs->straws = (__u32*)calloc(1, bucket->size * sizeof(__u32));
2086 cbs->item_weights = (__u32*)calloc(1, bucket->size * sizeof(__u32));
2087 for (unsigned j = 0; j < bucket->size; ++j) {
2088 ::decode(cbs->item_weights[j], blp);
2089 ::decode(cbs->straws[j], blp);
2090 }
2091 break;
2092 }
2093
2094 case CRUSH_BUCKET_STRAW2: {
2095 crush_bucket_straw2* cbs = reinterpret_cast<crush_bucket_straw2*>(bucket);
2096 cbs->item_weights = (__u32*)calloc(1, bucket->size * sizeof(__u32));
2097 for (unsigned j = 0; j < bucket->size; ++j) {
2098 ::decode(cbs->item_weights[j], blp);
2099 }
2100 break;
2101 }
2102
2103 default:
2104 // We should have handled this case in the first switch statement
2105 ceph_abort();
2106 break;
2107 }
2108}
2109
2110
2111void CrushWrapper::dump(Formatter *f) const
2112{
2113 f->open_array_section("devices");
2114 for (int i=0; i<get_max_devices(); i++) {
2115 f->open_object_section("device");
2116 f->dump_int("id", i);
2117 const char *n = get_item_name(i);
2118 if (n) {
2119 f->dump_string("name", n);
2120 } else {
2121 char name[20];
2122 sprintf(name, "device%d", i);
2123 f->dump_string("name", name);
2124 }
2125 const char *device_class = get_item_class(i);
2126 if (device_class != NULL)
2127 f->dump_string("class", device_class);
2128 f->close_section();
2129 }
2130 f->close_section();
2131
2132 f->open_array_section("types");
2133 int n = get_num_type_names();
2134 for (int i=0; n; i++) {
2135 const char *name = get_type_name(i);
2136 if (!name) {
2137 if (i == 0) {
2138 f->open_object_section("type");
2139 f->dump_int("type_id", 0);
2140 f->dump_string("name", "device");
2141 f->close_section();
2142 }
2143 continue;
2144 }
2145 n--;
2146 f->open_object_section("type");
2147 f->dump_int("type_id", i);
2148 f->dump_string("name", name);
2149 f->close_section();
2150 }
2151 f->close_section();
2152
2153 f->open_array_section("buckets");
2154 for (int bucket = -1; bucket > -1-get_max_buckets(); --bucket) {
2155 if (!bucket_exists(bucket))
2156 continue;
2157 f->open_object_section("bucket");
2158 f->dump_int("id", bucket);
2159 if (get_item_name(bucket))
2160 f->dump_string("name", get_item_name(bucket));
2161 f->dump_int("type_id", get_bucket_type(bucket));
2162 if (get_type_name(get_bucket_type(bucket)))
2163 f->dump_string("type_name", get_type_name(get_bucket_type(bucket)));
2164 f->dump_int("weight", get_bucket_weight(bucket));
2165 f->dump_string("alg", crush_bucket_alg_name(get_bucket_alg(bucket)));
2166 f->dump_string("hash", crush_hash_name(get_bucket_hash(bucket)));
2167 f->open_array_section("items");
2168 for (int j=0; j<get_bucket_size(bucket); j++) {
2169 f->open_object_section("item");
2170 f->dump_int("id", get_bucket_item(bucket, j));
2171 f->dump_int("weight", get_bucket_item_weight(bucket, j));
2172 f->dump_int("pos", j);
2173 f->close_section();
2174 }
2175 f->close_section();
2176 f->close_section();
2177 }
2178 f->close_section();
2179
2180 f->open_array_section("rules");
2181 dump_rules(f);
2182 f->close_section();
2183
2184 f->open_object_section("tunables");
2185 dump_tunables(f);
2186 f->close_section();
2187
2188 dump_choose_args(f);
2189}
2190
2191namespace {
2192 // depth first walker
2193 class TreeDumper {
2194 typedef CrushTreeDumper::Item Item;
2195 const CrushWrapper *crush;
2196 public:
2197 explicit TreeDumper(const CrushWrapper *crush)
2198 : crush(crush) {}
2199
2200 void dump(Formatter *f) {
2201 set<int> roots;
2202 crush->find_roots(roots);
2203 for (set<int>::iterator root = roots.begin(); root != roots.end(); ++root) {
2204 dump_item(Item(*root, 0, crush->get_bucket_weightf(*root)), f);
2205 }
2206 }
2207
2208 private:
2209 void dump_item(const Item& qi, Formatter* f) {
2210 if (qi.is_bucket()) {
2211 f->open_object_section("bucket");
2212 CrushTreeDumper::dump_item_fields(crush, qi, f);
2213 dump_bucket_children(qi, f);
2214 f->close_section();
2215 } else {
2216 f->open_object_section("device");
2217 CrushTreeDumper::dump_item_fields(crush, qi, f);
2218 f->close_section();
2219 }
2220 }
2221
2222 void dump_bucket_children(const Item& parent, Formatter* f) {
2223 f->open_array_section("items");
2224 const int max_pos = crush->get_bucket_size(parent.id);
2225 for (int pos = 0; pos < max_pos; pos++) {
2226 int id = crush->get_bucket_item(parent.id, pos);
2227 float weight = crush->get_bucket_item_weightf(parent.id, pos);
2228 dump_item(Item(id, parent.depth + 1, weight), f);
2229 }
2230 f->close_section();
2231 }
2232 };
2233}
2234
2235void CrushWrapper::dump_tree(Formatter *f) const
2236{
2237 assert(f);
2238 TreeDumper(this).dump(f);
2239}
2240
2241void CrushWrapper::dump_tunables(Formatter *f) const
2242{
2243 f->dump_int("choose_local_tries", get_choose_local_tries());
2244 f->dump_int("choose_local_fallback_tries", get_choose_local_fallback_tries());
2245 f->dump_int("choose_total_tries", get_choose_total_tries());
2246 f->dump_int("chooseleaf_descend_once", get_chooseleaf_descend_once());
2247 f->dump_int("chooseleaf_vary_r", get_chooseleaf_vary_r());
2248 f->dump_int("chooseleaf_stable", get_chooseleaf_stable());
2249 f->dump_int("straw_calc_version", get_straw_calc_version());
2250 f->dump_int("allowed_bucket_algs", get_allowed_bucket_algs());
2251
2252 // be helpful about it
2253 if (has_jewel_tunables())
2254 f->dump_string("profile", "jewel");
2255 else if (has_hammer_tunables())
2256 f->dump_string("profile", "hammer");
2257 else if (has_firefly_tunables())
2258 f->dump_string("profile", "firefly");
2259 else if (has_bobtail_tunables())
2260 f->dump_string("profile", "bobtail");
2261 else if (has_argonaut_tunables())
2262 f->dump_string("profile", "argonaut");
2263 else
2264 f->dump_string("profile", "unknown");
2265 f->dump_int("optimal_tunables", (int)has_optimal_tunables());
2266 f->dump_int("legacy_tunables", (int)has_legacy_tunables());
2267
2268 // be helpful about minimum version required
2269 f->dump_string("minimum_required_version", get_min_required_version());
2270
2271 f->dump_int("require_feature_tunables", (int)has_nondefault_tunables());
2272 f->dump_int("require_feature_tunables2", (int)has_nondefault_tunables2());
2273 f->dump_int("has_v2_rules", (int)has_v2_rules());
2274 f->dump_int("require_feature_tunables3", (int)has_nondefault_tunables3());
2275 f->dump_int("has_v3_rules", (int)has_v3_rules());
2276 f->dump_int("has_v4_buckets", (int)has_v4_buckets());
2277 f->dump_int("require_feature_tunables5", (int)has_nondefault_tunables5());
2278 f->dump_int("has_v5_rules", (int)has_v5_rules());
2279}
2280
2281void CrushWrapper::dump_choose_args(Formatter *f) const
2282{
2283 f->open_object_section("choose_args");
2284 for (auto c : choose_args) {
2285 crush_choose_arg_map arg_map = c.second;
2286 f->open_array_section(stringify(c.first).c_str());
2287 for (__u32 i = 0; i < arg_map.size; i++) {
2288 crush_choose_arg *arg = &arg_map.args[i];
2289 if (arg->weight_set_size == 0 &&
2290 arg->ids_size == 0)
2291 continue;
2292 f->open_object_section("choose_args");
2293 int bucket_index = i;
2294 f->dump_int("bucket_id", -1-bucket_index);
2295 if (arg->weight_set_size > 0) {
2296 f->open_array_section("weight_set");
2297 for (__u32 j = 0; j < arg->weight_set_size; j++) {
2298 f->open_array_section("weights");
2299 __u32 *weights = arg->weight_set[j].weights;
2300 __u32 size = arg->weight_set[j].size;
2301 for (__u32 k = 0; k < size; k++) {
2302 f->dump_float("weight", (float)weights[k]/(float)0x10000);
2303 }
2304 f->close_section();
2305 }
2306 f->close_section();
2307 }
2308 if (arg->ids_size > 0) {
2309 f->open_array_section("ids");
2310 for (__u32 j = 0; j < arg->ids_size; j++)
2311 f->dump_int("id", arg->ids[j]);
2312 f->close_section();
2313 }
2314 f->close_section();
2315 }
2316 f->close_section();
2317 }
2318 f->close_section();
2319}
2320
2321void CrushWrapper::dump_rules(Formatter *f) const
2322{
2323 for (int i=0; i<get_max_rules(); i++) {
2324 if (!rule_exists(i))
2325 continue;
2326 dump_rule(i, f);
2327 }
2328}
2329
2330void CrushWrapper::dump_rule(int ruleset, Formatter *f) const
2331{
2332 f->open_object_section("rule");
2333 f->dump_int("rule_id", ruleset);
2334 if (get_rule_name(ruleset))
2335 f->dump_string("rule_name", get_rule_name(ruleset));
2336 f->dump_int("ruleset", get_rule_mask_ruleset(ruleset));
2337 f->dump_int("type", get_rule_mask_type(ruleset));
2338 f->dump_int("min_size", get_rule_mask_min_size(ruleset));
2339 f->dump_int("max_size", get_rule_mask_max_size(ruleset));
2340 f->open_array_section("steps");
2341 for (int j=0; j<get_rule_len(ruleset); j++) {
2342 f->open_object_section("step");
2343 switch (get_rule_op(ruleset, j)) {
2344 case CRUSH_RULE_NOOP:
2345 f->dump_string("op", "noop");
2346 break;
2347 case CRUSH_RULE_TAKE:
2348 f->dump_string("op", "take");
2349 {
2350 int item = get_rule_arg1(ruleset, j);
2351 f->dump_int("item", item);
2352
2353 const char *name = get_item_name(item);
2354 f->dump_string("item_name", name ? name : "");
2355 }
2356 break;
2357 case CRUSH_RULE_EMIT:
2358 f->dump_string("op", "emit");
2359 break;
2360 case CRUSH_RULE_CHOOSE_FIRSTN:
2361 f->dump_string("op", "choose_firstn");
2362 f->dump_int("num", get_rule_arg1(ruleset, j));
2363 f->dump_string("type", get_type_name(get_rule_arg2(ruleset, j)));
2364 break;
2365 case CRUSH_RULE_CHOOSE_INDEP:
2366 f->dump_string("op", "choose_indep");
2367 f->dump_int("num", get_rule_arg1(ruleset, j));
2368 f->dump_string("type", get_type_name(get_rule_arg2(ruleset, j)));
2369 break;
2370 case CRUSH_RULE_CHOOSELEAF_FIRSTN:
2371 f->dump_string("op", "chooseleaf_firstn");
2372 f->dump_int("num", get_rule_arg1(ruleset, j));
2373 f->dump_string("type", get_type_name(get_rule_arg2(ruleset, j)));
2374 break;
2375 case CRUSH_RULE_CHOOSELEAF_INDEP:
2376 f->dump_string("op", "chooseleaf_indep");
2377 f->dump_int("num", get_rule_arg1(ruleset, j));
2378 f->dump_string("type", get_type_name(get_rule_arg2(ruleset, j)));
2379 break;
2380 case CRUSH_RULE_SET_CHOOSE_TRIES:
2381 f->dump_string("op", "set_choose_tries");
2382 f->dump_int("num", get_rule_arg1(ruleset, j));
2383 break;
2384 case CRUSH_RULE_SET_CHOOSELEAF_TRIES:
2385 f->dump_string("op", "set_chooseleaf_tries");
2386 f->dump_int("num", get_rule_arg1(ruleset, j));
2387 break;
2388 default:
2389 f->dump_int("opcode", get_rule_op(ruleset, j));
2390 f->dump_int("arg1", get_rule_arg1(ruleset, j));
2391 f->dump_int("arg2", get_rule_arg2(ruleset, j));
2392 }
2393 f->close_section();
2394 }
2395 f->close_section();
2396 f->close_section();
2397}
2398
2399void CrushWrapper::list_rules(Formatter *f) const
2400{
2401 for (int rule = 0; rule < get_max_rules(); rule++) {
2402 if (!rule_exists(rule))
2403 continue;
2404 f->dump_string("name", get_rule_name(rule));
2405 }
2406}
2407
2408class CrushTreePlainDumper : public CrushTreeDumper::Dumper<ostream> {
2409public:
2410 typedef CrushTreeDumper::Dumper<ostream> Parent;
2411
2412 explicit CrushTreePlainDumper(const CrushWrapper *crush)
2413 : Parent(crush) {}
2414
2415 void dump(ostream *out) {
2416 *out << "ID\tWEIGHT\tTYPE NAME\n";
2417 Parent::dump(out);
2418 }
2419
2420protected:
2421 void dump_item(const CrushTreeDumper::Item &qi, ostream *out) override {
2422 *out << qi.id << "\t"
2423 << weightf_t(qi.weight) << "\t";
2424
2425 for (int k=0; k < qi.depth; k++)
2426 *out << "\t";
2427
2428 if (qi.is_bucket())
2429 {
2430 *out << crush->get_type_name(crush->get_bucket_type(qi.id)) << " "
2431 << crush->get_item_name(qi.id);
2432 }
2433 else
2434 {
2435 *out << "osd." << qi.id;
2436 }
2437 *out << "\n";
2438 }
2439};
2440
2441
2442class CrushTreeFormattingDumper : public CrushTreeDumper::FormattingDumper {
2443public:
2444 typedef CrushTreeDumper::FormattingDumper Parent;
2445
2446 explicit CrushTreeFormattingDumper(const CrushWrapper *crush)
2447 : Parent(crush) {}
2448
2449 void dump(Formatter *f) {
2450 f->open_array_section("nodes");
2451 Parent::dump(f);
2452 f->close_section();
2453 f->open_array_section("stray");
2454 f->close_section();
2455 }
2456};
2457
2458
2459void CrushWrapper::dump_tree(ostream *out, Formatter *f) const
2460{
2461 if (out)
2462 CrushTreePlainDumper(this).dump(out);
2463 if (f)
2464 CrushTreeFormattingDumper(this).dump(f);
2465}
2466
2467void CrushWrapper::generate_test_instances(list<CrushWrapper*>& o)
2468{
2469 o.push_back(new CrushWrapper);
2470 // fixme
2471}
2472
7c673cae
FG
2473/**
2474 * Determine the default CRUSH ruleset ID to be used with
2475 * newly created replicated pools.
2476 *
2477 * @returns a ruleset ID (>=0) or -1 if no suitable ruleset found
2478 */
2479int CrushWrapper::get_osd_pool_default_crush_replicated_ruleset(CephContext *cct)
2480{
31f18b77
FG
2481 int crush_ruleset = cct->_conf->osd_pool_default_crush_rule;
2482 if (crush_ruleset < 0) {
7c673cae
FG
2483 crush_ruleset = find_first_ruleset(pg_pool_t::TYPE_REPLICATED);
2484 } else if (!ruleset_exists(crush_ruleset)) {
2485 crush_ruleset = -1; // match find_first_ruleset() retval
2486 }
7c673cae
FG
2487 return crush_ruleset;
2488}
2489
2490bool CrushWrapper::is_valid_crush_name(const string& s)
2491{
2492 if (s.empty())
2493 return false;
2494 for (string::const_iterator p = s.begin(); p != s.end(); ++p) {
2495 if (!(*p == '-') &&
2496 !(*p == '_') &&
2497 !(*p == '.') &&
2498 !(*p >= '0' && *p <= '9') &&
2499 !(*p >= 'A' && *p <= 'Z') &&
2500 !(*p >= 'a' && *p <= 'z'))
2501 return false;
2502 }
2503 return true;
2504}
2505
2506bool CrushWrapper::is_valid_crush_loc(CephContext *cct,
2507 const map<string,string>& loc)
2508{
2509 for (map<string,string>::const_iterator l = loc.begin(); l != loc.end(); ++l) {
2510 if (!is_valid_crush_name(l->first) ||
2511 !is_valid_crush_name(l->second)) {
2512 ldout(cct, 1) << "loc["
2513 << l->first << "] = '"
2514 << l->second << "' not a valid crush name ([A-Za-z0-9_-.]+)"
2515 << dendl;
2516 return false;
2517 }
2518 }
2519 return true;
2520}
2521
2522int CrushWrapper::_choose_type_stack(
2523 CephContext *cct,
2524 const vector<pair<int,int>>& stack,
2525 const set<int>& overfull,
2526 const vector<int>& underfull,
2527 const vector<int>& orig,
2528 vector<int>::const_iterator& i,
2529 set<int>& used,
2530 vector<int> *pw) const
2531{
2532 vector<int> w = *pw;
2533 vector<int> o;
2534
2535 ldout(cct, 10) << __func__ << " stack " << stack
2536 << " orig " << orig
2537 << " at " << *i
2538 << " pw " << *pw
2539 << dendl;
2540
2541 vector<int> cumulative_fanout(stack.size());
2542 int f = 1;
2543 for (int j = (int)stack.size() - 1; j >= 0; --j) {
2544 cumulative_fanout[j] = f;
2545 f *= stack[j].second;
2546 }
2547 ldout(cct, 10) << __func__ << " cumulative_fanout " << cumulative_fanout
2548 << dendl;
2549
31f18b77
FG
2550 // identify underful targets for each intermediate level.
2551 // this serves two purposes:
2552 // 1. we can tell when we are selecting a bucket that does not have any underfull
2553 // devices beneath it. that means that if the current input includes an overfull
2554 // device, we won't be able to find an underfull device with this parent to
2555 // swap for it.
2556 // 2. when we decide we should reject a bucket due to the above, this list gives us
2557 // a list of peers to consider that *do* have underfull devices available.. (we
2558 // are careful to pick one that has the same parent.)
2559 vector<set<int>> underfull_buckets; // level -> set of buckets with >0 underfull item(s)
2560 underfull_buckets.resize(stack.size() - 1);
2561 for (auto osd : underfull) {
2562 int item = osd;
2563 for (int j = (int)stack.size() - 2; j >= 0; --j) {
2564 int type = stack[j].first;
2565 item = get_parent_of_type(item, type);
2566 ldout(cct, 10) << __func__ << " underfull " << osd << " type " << type
2567 << " is " << item << dendl;
2568 underfull_buckets[j].insert(item);
2569 }
2570 }
2571 ldout(cct, 20) << __func__ << " underfull_buckets " << underfull_buckets << dendl;
2572
7c673cae
FG
2573 for (unsigned j = 0; j < stack.size(); ++j) {
2574 int type = stack[j].first;
2575 int fanout = stack[j].second;
2576 int cum_fanout = cumulative_fanout[j];
2577 ldout(cct, 10) << " level " << j << ": type " << type << " fanout " << fanout
2578 << " cumulative " << cum_fanout
2579 << " w " << w << dendl;
2580 vector<int> o;
2581 auto tmpi = i;
2582 for (auto from : w) {
2583 ldout(cct, 10) << " from " << from << dendl;
31f18b77
FG
2584 // identify leaves under each choice. we use this to check whether any of these
2585 // leaves are overfull. (if so, we need to make sure there are underfull candidates
2586 // to swap for them.)
2587 vector<set<int>> leaves;
2588 leaves.resize(fanout);
7c673cae
FG
2589 for (int pos = 0; pos < fanout; ++pos) {
2590 if (type > 0) {
2591 // non-leaf
31f18b77 2592 int item = get_parent_of_type(*tmpi, type);
7c673cae 2593 o.push_back(item);
7c673cae 2594 int n = cum_fanout;
31f18b77
FG
2595 while (n-- && tmpi != orig.end()) {
2596 leaves[pos].insert(*tmpi++);
2597 }
2598 ldout(cct, 10) << __func__ << " from " << *tmpi << " got " << item
2599 << " of type " << type << " over leaves " << leaves[pos] << dendl;
7c673cae
FG
2600 } else {
2601 // leaf
2602 bool replaced = false;
2603 if (overfull.count(*i)) {
2604 for (auto item : underfull) {
2605 ldout(cct, 10) << __func__ << " pos " << pos
2606 << " was " << *i << " considering " << item
2607 << dendl;
2608 if (used.count(item)) {
2609 ldout(cct, 20) << __func__ << " in used " << used << dendl;
2610 continue;
2611 }
2612 if (!subtree_contains(from, item)) {
2613 ldout(cct, 20) << __func__ << " not in subtree " << from << dendl;
2614 continue;
2615 }
2616 if (std::find(orig.begin(), orig.end(), item) != orig.end()) {
2617 ldout(cct, 20) << __func__ << " in orig " << orig << dendl;
2618 continue;
2619 }
2620 o.push_back(item);
2621 used.insert(item);
2622 ldout(cct, 10) << __func__ << " pos " << pos << " replace "
2623 << *i << " -> " << item << dendl;
2624 replaced = true;
2625 ++i;
2626 break;
2627 }
2628 }
2629 if (!replaced) {
2630 ldout(cct, 10) << __func__ << " pos " << pos << " keep " << *i
2631 << dendl;
2632 o.push_back(*i);
2633 ++i;
2634 }
2635 if (i == orig.end()) {
2636 ldout(cct, 10) << __func__ << " end of orig, break 1" << dendl;
2637 break;
2638 }
2639 }
2640 }
31f18b77
FG
2641 if (j + 1 < stack.size()) {
2642 // check if any buckets have overfull leaves but no underfull candidates
2643 for (int pos = 0; pos < fanout; ++pos) {
2644 if (underfull_buckets[j].count(o[pos]) == 0) {
2645 // are any leaves overfull?
2646 bool any_overfull = false;
2647 for (auto osd : leaves[pos]) {
2648 if (overfull.count(osd)) {
2649 any_overfull = true;
2650 }
2651 }
2652 if (any_overfull) {
2653 ldout(cct, 10) << " bucket " << o[pos] << " has no underfull targets and "
2654 << ">0 leaves " << leaves[pos] << " is overfull; alts "
2655 << underfull_buckets[j]
2656 << dendl;
2657 for (auto alt : underfull_buckets[j]) {
2658 if (std::find(o.begin(), o.end(), alt) == o.end()) {
2659 // see if alt has the same parent
2660 if (j == 0 ||
2661 get_parent_of_type(o[pos], stack[j-1].first) ==
2662 get_parent_of_type(alt, stack[j-1].first)) {
2663 if (j)
2664 ldout(cct, 10) << " replacing " << o[pos]
2665 << " (which has no underfull leaves) with " << alt
2666 << " (same parent "
2667 << get_parent_of_type(alt, stack[j-1].first) << " type "
2668 << type << ")" << dendl;
2669 else
2670 ldout(cct, 10) << " replacing " << o[pos]
2671 << " (which has no underfull leaves) with " << alt
2672 << " (first level)" << dendl;
2673 o[pos] = alt;
2674 break;
2675 } else {
2676 ldout(cct, 30) << " alt " << alt << " for " << o[pos]
2677 << " has different parent, skipping" << dendl;
2678 }
2679 }
2680 }
2681 }
2682 }
2683 }
2684 }
7c673cae
FG
2685 if (i == orig.end()) {
2686 ldout(cct, 10) << __func__ << " end of orig, break 2" << dendl;
2687 break;
2688 }
2689 }
2690 ldout(cct, 10) << __func__ << " w <- " << o << " was " << w << dendl;
2691 w.swap(o);
2692 }
2693 *pw = w;
2694 return 0;
2695}
2696
2697int CrushWrapper::try_remap_rule(
2698 CephContext *cct,
2699 int ruleno,
2700 int maxout,
2701 const set<int>& overfull,
2702 const vector<int>& underfull,
2703 const vector<int>& orig,
2704 vector<int> *out) const
2705{
2706 const crush_map *map = crush;
2707 const crush_rule *rule = get_rule(ruleno);
2708 assert(rule);
2709
2710 ldout(cct, 10) << __func__ << " ruleno " << ruleno
2711 << " numrep " << maxout << " overfull " << overfull
2712 << " underfull " << underfull << " orig " << orig
2713 << dendl;
2714 vector<int> w; // working set
2715 out->clear();
2716
2717 auto i = orig.begin();
2718 set<int> used;
2719
2720 vector<pair<int,int>> type_stack; // (type, fan-out)
2721
2722 for (unsigned step = 0; step < rule->len; ++step) {
2723 const crush_rule_step *curstep = &rule->steps[step];
2724 ldout(cct, 10) << __func__ << " step " << step << " w " << w << dendl;
2725 switch (curstep->op) {
2726 case CRUSH_RULE_TAKE:
2727 if ((curstep->arg1 >= 0 && curstep->arg1 < map->max_devices) ||
2728 (-1-curstep->arg1 >= 0 && -1-curstep->arg1 < map->max_buckets &&
2729 map->buckets[-1-curstep->arg1])) {
2730 w.clear();
2731 w.push_back(curstep->arg1);
2732 ldout(cct, 10) << __func__ << " take " << w << dendl;
2733 } else {
2734 ldout(cct, 1) << " bad take value " << curstep->arg1 << dendl;
2735 }
2736 break;
2737
2738 case CRUSH_RULE_CHOOSELEAF_FIRSTN:
2739 case CRUSH_RULE_CHOOSELEAF_INDEP:
2740 {
2741 int numrep = curstep->arg1;
2742 int type = curstep->arg2;
2743 if (numrep <= 0)
2744 numrep += maxout;
2745 type_stack.push_back(make_pair(type, numrep));
2746 type_stack.push_back(make_pair(0, 1));
2747 int r = _choose_type_stack(cct, type_stack, overfull, underfull, orig,
2748 i, used, &w);
2749 if (r < 0)
2750 return r;
2751 type_stack.clear();
2752 }
2753 break;
2754
2755 case CRUSH_RULE_CHOOSE_FIRSTN:
2756 case CRUSH_RULE_CHOOSE_INDEP:
2757 {
2758 int numrep = curstep->arg1;
2759 int type = curstep->arg2;
2760 if (numrep <= 0)
2761 numrep += maxout;
2762 type_stack.push_back(make_pair(type, numrep));
2763 }
2764 break;
2765
2766 case CRUSH_RULE_EMIT:
2767 ldout(cct, 10) << " emit " << w << dendl;
2768 if (!type_stack.empty()) {
2769 int r = _choose_type_stack(cct, type_stack, overfull, underfull, orig,
2770 i, used, &w);
2771 if (r < 0)
2772 return r;
2773 type_stack.clear();
2774 }
2775 for (auto item : w) {
2776 out->push_back(item);
2777 }
2778 w.clear();
2779 break;
2780
2781 default:
2782 // ignore
2783 break;
2784 }
2785 }
2786
2787 return 0;
2788}