]> git.proxmox.com Git - ceph.git/blame - ceph/src/crush/CrushWrapper.cc
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / crush / CrushWrapper.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3
4#include "osd/osd_types.h"
5#include "common/debug.h"
6#include "common/Formatter.h"
7#include "common/errno.h"
8#include "include/stringify.h"
9
10#include "CrushWrapper.h"
11#include "CrushTreeDumper.h"
12
13#define dout_subsys ceph_subsys_crush
14
15bool CrushWrapper::has_v2_rules() const
16{
17 for (unsigned i=0; i<crush->max_rules; i++) {
18 if (is_v2_rule(i)) {
19 return true;
20 }
21 }
22 return false;
23}
24
25bool CrushWrapper::is_v2_rule(unsigned ruleid) const
26{
27 // check rule for use of indep or new SET_* rule steps
28 if (ruleid >= crush->max_rules)
29 return false;
30 crush_rule *r = crush->rules[ruleid];
31 if (!r)
32 return false;
33 for (unsigned j=0; j<r->len; j++) {
34 if (r->steps[j].op == CRUSH_RULE_CHOOSE_INDEP ||
35 r->steps[j].op == CRUSH_RULE_CHOOSELEAF_INDEP ||
36 r->steps[j].op == CRUSH_RULE_SET_CHOOSE_TRIES ||
37 r->steps[j].op == CRUSH_RULE_SET_CHOOSELEAF_TRIES) {
38 return true;
39 }
40 }
41 return false;
42}
43
44bool CrushWrapper::has_v3_rules() const
45{
46 for (unsigned i=0; i<crush->max_rules; i++) {
47 if (is_v3_rule(i)) {
48 return true;
49 }
50 }
51 return false;
52}
53
54bool CrushWrapper::is_v3_rule(unsigned ruleid) const
55{
56 // check rule for use of SET_CHOOSELEAF_VARY_R step
57 if (ruleid >= crush->max_rules)
58 return false;
59 crush_rule *r = crush->rules[ruleid];
60 if (!r)
61 return false;
62 for (unsigned j=0; j<r->len; j++) {
63 if (r->steps[j].op == CRUSH_RULE_SET_CHOOSELEAF_VARY_R) {
64 return true;
65 }
66 }
67 return false;
68}
69
70bool CrushWrapper::has_v4_buckets() const
71{
72 for (int i=0; i<crush->max_buckets; ++i) {
73 crush_bucket *b = crush->buckets[i];
74 if (!b)
75 continue;
76 if (b->alg == CRUSH_BUCKET_STRAW2)
77 return true;
78 }
79 return false;
80}
81
82bool CrushWrapper::has_v5_rules() const
83{
84 for (unsigned i=0; i<crush->max_rules; i++) {
85 if (is_v5_rule(i)) {
86 return true;
87 }
88 }
89 return false;
90}
91
92bool CrushWrapper::is_v5_rule(unsigned ruleid) const
93{
94 // check rule for use of SET_CHOOSELEAF_STABLE step
95 if (ruleid >= crush->max_rules)
96 return false;
97 crush_rule *r = crush->rules[ruleid];
98 if (!r)
99 return false;
100 for (unsigned j=0; j<r->len; j++) {
101 if (r->steps[j].op == CRUSH_RULE_SET_CHOOSELEAF_STABLE) {
102 return true;
103 }
104 }
105 return false;
106}
107
108bool CrushWrapper::has_chooseargs() const
109{
110 return !choose_args.empty();
111}
112
113bool CrushWrapper::has_incompat_chooseargs() const
114{
115 // FIXME: if the chooseargs all have 1 position *and* do not remap IDs then
116 // we can fabricate a compatible crush map for legacy clients by swapping the
117 // choose_args weights in for the real weights. until then,
118 return has_chooseargs();
119}
120
121int CrushWrapper::split_id_class(int i, int *idout, int *classout) const
122{
123 if (!item_exists(i))
124 return -EINVAL;
125 string name = get_item_name(i);
126 size_t pos = name.find("~");
127 if (pos == string::npos) {
128 *idout = i;
129 *classout = -1;
130 return 0;
131 }
132 string name_no_class = name.substr(0, pos);
133 if (!name_exists(name_no_class))
134 return -ENOENT;
135 string class_name = name.substr(pos + 1);
136 if (!class_exists(class_name))
137 return -ENOENT;
138 *idout = get_item_id(name_no_class);
139 *classout = get_class_id(class_name);
140 return 0;
141}
142
143int CrushWrapper::can_rename_item(const string& srcname,
144 const string& dstname,
145 ostream *ss) const
146{
147 if (name_exists(srcname)) {
148 if (name_exists(dstname)) {
149 *ss << "dstname = '" << dstname << "' already exists";
150 return -EEXIST;
151 }
152 if (is_valid_crush_name(dstname)) {
153 return 0;
154 } else {
155 *ss << "dstname = '" << dstname << "' does not match [-_.0-9a-zA-Z]+";
156 return -EINVAL;
157 }
158 } else {
159 if (name_exists(dstname)) {
160 *ss << "srcname = '" << srcname << "' does not exist "
161 << "and dstname = '" << dstname << "' already exists";
162 return -EALREADY;
163 } else {
164 *ss << "srcname = '" << srcname << "' does not exist";
165 return -ENOENT;
166 }
167 }
168}
169
170int CrushWrapper::rename_item(const string& srcname,
171 const string& dstname,
172 ostream *ss)
173{
174 int ret = can_rename_item(srcname, dstname, ss);
175 if (ret < 0)
176 return ret;
177 int oldid = get_item_id(srcname);
178 return set_item_name(oldid, dstname);
179}
180
181int CrushWrapper::can_rename_bucket(const string& srcname,
182 const string& dstname,
183 ostream *ss) const
184{
185 int ret = can_rename_item(srcname, dstname, ss);
186 if (ret)
187 return ret;
188 int srcid = get_item_id(srcname);
189 if (srcid >= 0) {
190 *ss << "srcname = '" << srcname << "' is not a bucket "
191 << "because its id = " << srcid << " is >= 0";
192 return -ENOTDIR;
193 }
194 return 0;
195}
196
197int CrushWrapper::rename_bucket(const string& srcname,
198 const string& dstname,
199 ostream *ss)
200{
201 int ret = can_rename_bucket(srcname, dstname, ss);
202 if (ret < 0)
203 return ret;
204 int oldid = get_item_id(srcname);
205 return set_item_name(oldid, dstname);
206}
207
208void CrushWrapper::find_takes(set<int>& roots) const
209{
210 for (unsigned i=0; i<crush->max_rules; i++) {
211 crush_rule *r = crush->rules[i];
212 if (!r)
213 continue;
214 for (unsigned j=0; j<r->len; j++) {
215 if (r->steps[j].op == CRUSH_RULE_TAKE)
216 roots.insert(r->steps[j].arg1);
217 }
218 }
219}
220
221void CrushWrapper::find_roots(set<int>& roots) const
222{
223 for (int i = 0; i < crush->max_buckets; i++) {
224 if (!crush->buckets[i])
225 continue;
226 crush_bucket *b = crush->buckets[i];
227 if (!_search_item_exists(b->id))
228 roots.insert(b->id);
229 }
230}
231
232bool CrushWrapper::subtree_contains(int root, int item) const
233{
234 if (root == item)
235 return true;
236
237 if (root >= 0)
238 return false; // root is a leaf
239
240 const crush_bucket *b = get_bucket(root);
241 if (IS_ERR(b))
242 return false;
243
244 for (unsigned j=0; j<b->size; j++) {
245 if (subtree_contains(b->items[j], item))
246 return true;
247 }
248 return false;
249}
250
251bool CrushWrapper::_maybe_remove_last_instance(CephContext *cct, int item, bool unlink_only)
252{
253 // last instance?
254 if (_search_item_exists(item)) {
255 return false;
256 }
257 if (item < 0 && _bucket_is_in_use(item)) {
258 return false;
259 }
260
261 if (item < 0 && !unlink_only) {
262 crush_bucket *t = get_bucket(item);
263 ldout(cct, 5) << "_maybe_remove_last_instance removing bucket " << item << dendl;
264 crush_remove_bucket(crush, t);
265 if (class_bucket.count(item) != 0)
266 class_bucket.erase(item);
267 }
268 if ((item >= 0 || !unlink_only) && name_map.count(item)) {
269 ldout(cct, 5) << "_maybe_remove_last_instance removing name for item " << item << dendl;
270 name_map.erase(item);
271 have_rmaps = false;
272 }
273 return true;
274}
275
276int CrushWrapper::remove_root(int item, bool unused)
277{
278 if (unused && _bucket_is_in_use(item))
279 return 0;
280
281 crush_bucket *b = get_bucket(item);
282 if (IS_ERR(b))
283 return -ENOENT;
284
285 for (unsigned n = 0; n < b->size; n++) {
286 if (b->items[n] >= 0)
287 continue;
288 int r = remove_root(b->items[n], unused);
289 if (r < 0)
290 return r;
291 }
292
293 crush_remove_bucket(crush, b);
294 if (name_map.count(item) != 0) {
295 name_map.erase(item);
296 have_rmaps = false;
297 }
298 if (class_bucket.count(item) != 0)
299 class_bucket.erase(item);
300 return 0;
301}
302
303int CrushWrapper::remove_item(CephContext *cct, int item, bool unlink_only)
304{
305 if (choose_args.size() > 0) {
306 ldout(cct, 1) << "remove_item not implemented when choose_args is not empty" << dendl;
307 return -EDOM;
308 }
309
310 ldout(cct, 5) << "remove_item " << item << (unlink_only ? " unlink_only":"") << dendl;
311
312 int ret = -ENOENT;
313
314 if (item < 0 && !unlink_only) {
315 crush_bucket *t = get_bucket(item);
316 if (IS_ERR(t)) {
317 ldout(cct, 1) << "remove_item bucket " << item << " does not exist" << dendl;
318 return -ENOENT;
319 }
320
321 if (t->size) {
322 ldout(cct, 1) << "remove_item bucket " << item << " has " << t->size
323 << " items, not empty" << dendl;
324 return -ENOTEMPTY;
325 }
326 if (_bucket_is_in_use(item)) {
327 return -EBUSY;
328 }
329 }
330
331 for (int i = 0; i < crush->max_buckets; i++) {
332 if (!crush->buckets[i])
333 continue;
334 crush_bucket *b = crush->buckets[i];
335
336 for (unsigned i=0; i<b->size; ++i) {
337 int id = b->items[i];
338 if (id == item) {
339 ldout(cct, 5) << "remove_item removing item " << item
340 << " from bucket " << b->id << dendl;
341 crush_bucket_remove_item(crush, b, item);
342 adjust_item_weight(cct, b->id, b->weight);
343 ret = 0;
344 }
345 }
346 }
347
348 if (_maybe_remove_last_instance(cct, item, unlink_only))
349 ret = 0;
350
351 return ret;
352}
353
354bool CrushWrapper::_search_item_exists(int item) const
355{
356 for (int i = 0; i < crush->max_buckets; i++) {
357 if (!crush->buckets[i])
358 continue;
359 crush_bucket *b = crush->buckets[i];
360 for (unsigned j=0; j<b->size; ++j) {
361 if (b->items[j] == item)
362 return true;
363 }
364 }
365 return false;
366}
367
368bool CrushWrapper::_bucket_is_in_use(int item)
369{
370 for (auto &i : class_bucket)
371 for (auto &j : i.second)
372 if (j.second == item)
373 return true;
374 for (unsigned i = 0; i < crush->max_rules; ++i) {
375 crush_rule *r = crush->rules[i];
376 if (!r)
377 continue;
378 for (unsigned j = 0; j < r->len; ++j) {
379 if (r->steps[j].op == CRUSH_RULE_TAKE) {
380 int step_item = r->steps[j].arg1;
381 int original_item;
382 int c;
383 int res = split_id_class(step_item, &original_item, &c);
384 if (res < 0)
385 return false;
386 if (step_item == item || original_item == item)
387 return true;
388 }
389 }
390 }
391 return false;
392}
393
394int CrushWrapper::_remove_item_under(CephContext *cct, int item, int ancestor, bool unlink_only)
395{
396 ldout(cct, 5) << "_remove_item_under " << item << " under " << ancestor
397 << (unlink_only ? " unlink_only":"") << dendl;
398
399 if (ancestor >= 0) {
400 return -EINVAL;
401 }
402
403 if (!bucket_exists(ancestor))
404 return -EINVAL;
405
406 int ret = -ENOENT;
407
408 crush_bucket *b = get_bucket(ancestor);
409 for (unsigned i=0; i<b->size; ++i) {
410 int id = b->items[i];
411 if (id == item) {
412 ldout(cct, 5) << "_remove_item_under removing item " << item << " from bucket " << b->id << dendl;
413 crush_bucket_remove_item(crush, b, item);
414 adjust_item_weight(cct, b->id, b->weight);
415 ret = 0;
416 } else if (id < 0) {
417 int r = remove_item_under(cct, item, id, unlink_only);
418 if (r == 0)
419 ret = 0;
420 }
421 }
422 return ret;
423}
424
425int CrushWrapper::remove_item_under(CephContext *cct, int item, int ancestor, bool unlink_only)
426{
427 ldout(cct, 5) << "remove_item_under " << item << " under " << ancestor
428 << (unlink_only ? " unlink_only":"") << dendl;
429
430 if (!unlink_only && _bucket_is_in_use(item)) {
431 return -EBUSY;
432 }
433
434 int ret = _remove_item_under(cct, item, ancestor, unlink_only);
435 if (ret < 0)
436 return ret;
437
438 if (item < 0 && !unlink_only) {
439 crush_bucket *t = get_bucket(item);
440 if (IS_ERR(t)) {
441 ldout(cct, 1) << "remove_item_under bucket " << item
442 << " does not exist" << dendl;
443 return -ENOENT;
444 }
445
446 if (t->size) {
447 ldout(cct, 1) << "remove_item_under bucket " << item << " has " << t->size
448 << " items, not empty" << dendl;
449 return -ENOTEMPTY;
450 }
451 }
452
453 if (_maybe_remove_last_instance(cct, item, unlink_only))
454 ret = 0;
455
456 return ret;
457}
458
459int CrushWrapper::get_common_ancestor_distance(CephContext *cct, int id,
460 const std::multimap<string,string>& loc)
461{
462 ldout(cct, 5) << __func__ << " " << id << " " << loc << dendl;
463 if (!item_exists(id))
464 return -ENOENT;
465 map<string,string> id_loc = get_full_location(id);
466 ldout(cct, 20) << " id is at " << id_loc << dendl;
467
468 for (map<int,string>::const_iterator p = type_map.begin();
469 p != type_map.end();
470 ++p) {
471 map<string,string>::iterator ip = id_loc.find(p->second);
472 if (ip == id_loc.end())
473 continue;
474 for (std::multimap<string,string>::const_iterator q = loc.find(p->second);
475 q != loc.end();
476 ++q) {
477 if (q->first != p->second)
478 break;
479 if (q->second == ip->second)
480 return p->first;
481 }
482 }
483 return -ERANGE;
484}
485
486int CrushWrapper::parse_loc_map(const std::vector<string>& args,
487 std::map<string,string> *ploc)
488{
489 ploc->clear();
490 for (unsigned i = 0; i < args.size(); ++i) {
491 const char *s = args[i].c_str();
492 const char *pos = strchr(s, '=');
493 if (!pos)
494 return -EINVAL;
495 string key(s, 0, pos-s);
496 string value(pos+1);
497 if (value.length())
498 (*ploc)[key] = value;
499 else
500 return -EINVAL;
501 }
502 return 0;
503}
504
505int CrushWrapper::parse_loc_multimap(const std::vector<string>& args,
506 std::multimap<string,string> *ploc)
507{
508 ploc->clear();
509 for (unsigned i = 0; i < args.size(); ++i) {
510 const char *s = args[i].c_str();
511 const char *pos = strchr(s, '=');
512 if (!pos)
513 return -EINVAL;
514 string key(s, 0, pos-s);
515 string value(pos+1);
516 if (value.length())
517 ploc->insert(make_pair(key, value));
518 else
519 return -EINVAL;
520 }
521 return 0;
522}
523
524bool CrushWrapper::check_item_loc(CephContext *cct, int item, const map<string,string>& loc,
525 int *weight)
526{
527 ldout(cct, 5) << "check_item_loc item " << item << " loc " << loc << dendl;
528
529 for (map<int,string>::const_iterator p = type_map.begin(); p != type_map.end(); ++p) {
530 // ignore device
531 if (p->first == 0)
532 continue;
533
534 // ignore types that aren't specified in loc
535 map<string,string>::const_iterator q = loc.find(p->second);
536 if (q == loc.end()) {
537 ldout(cct, 2) << "warning: did not specify location for '" << p->second << "' level (levels are "
538 << type_map << ")" << dendl;
539 continue;
540 }
541
542 if (!name_exists(q->second)) {
543 ldout(cct, 5) << "check_item_loc bucket " << q->second << " dne" << dendl;
544 return false;
545 }
546
547 int id = get_item_id(q->second);
548 if (id >= 0) {
549 ldout(cct, 5) << "check_item_loc requested " << q->second << " for type " << p->second
550 << " is a device, not bucket" << dendl;
551 return false;
552 }
553
554 assert(bucket_exists(id));
555 crush_bucket *b = get_bucket(id);
556
557 // see if item exists in this bucket
558 for (unsigned j=0; j<b->size; j++) {
559 if (b->items[j] == item) {
560 ldout(cct, 2) << "check_item_loc " << item << " exists in bucket " << b->id << dendl;
561 if (weight)
562 *weight = crush_get_bucket_item_weight(b, j);
563 return true;
564 }
565 }
566 return false;
567 }
568
569 ldout(cct, 1) << "check_item_loc item " << item << " loc " << loc << dendl;
570 return false;
571}
572
573map<string, string> CrushWrapper::get_full_location(int id)
574{
575 vector<pair<string, string> > full_location_ordered;
576 map<string,string> full_location;
577
578 get_full_location_ordered(id, full_location_ordered);
579
580 std::copy(full_location_ordered.begin(),
581 full_location_ordered.end(),
582 std::inserter(full_location, full_location.begin()));
583
584 return full_location;
585}
586
587int CrushWrapper::get_full_location_ordered(int id, vector<pair<string, string> >& path)
588{
589 if (!item_exists(id))
590 return -ENOENT;
591 int cur = id;
592 int ret;
593 while (true) {
594 pair<string, string> parent_coord = get_immediate_parent(cur, &ret);
595 if (ret != 0)
596 break;
597 path.push_back(parent_coord);
598 cur = get_item_id(parent_coord.second);
599 }
600 return 0;
601}
602
603
604map<int, string> CrushWrapper::get_parent_hierarchy(int id)
605{
606 map<int,string> parent_hierarchy;
607 pair<string, string> parent_coord = get_immediate_parent(id);
608 int parent_id;
609
610 // get the integer type for id and create a counter from there
611 int type_counter = get_bucket_type(id);
612
613 // if we get a negative type then we can assume that we have an OSD
614 // change behavior in get_item_type FIXME
615 if (type_counter < 0)
616 type_counter = 0;
617
618 // read the type map and get the name of the type with the largest ID
619 int high_type = 0;
620 for (map<int, string>::iterator it = type_map.begin(); it != type_map.end(); ++it){
621 if ( (*it).first > high_type )
622 high_type = (*it).first;
623 }
624
625 parent_id = get_item_id(parent_coord.second);
626
627 while (type_counter < high_type) {
628 type_counter++;
629 parent_hierarchy[ type_counter ] = parent_coord.first;
630
631 if (type_counter < high_type){
632 // get the coordinate information for the next parent
633 parent_coord = get_immediate_parent(parent_id);
634 parent_id = get_item_id(parent_coord.second);
635 }
636 }
637
638 return parent_hierarchy;
639}
640
641int CrushWrapper::get_children(int id, list<int> *children)
642{
643 // leaf?
644 if (id >= 0) {
645 return 0;
646 }
647
648 crush_bucket *b = get_bucket(id);
649 if (IS_ERR(b)) {
650 return -ENOENT;
651 }
652
653 for (unsigned n=0; n<b->size; n++) {
654 children->push_back(b->items[n]);
655 }
656 return b->size;
657}
658
659
660int CrushWrapper::insert_item(CephContext *cct, int item, float weight, string name,
661 const map<string,string>& loc) // typename -> bucketname
662{
663 if (choose_args.size() > 0) {
664 ldout(cct, 1) << "insert_item not implemented when choose_args is not empty" << dendl;
665 return -EDOM;
666 }
667
668 ldout(cct, 5) << "insert_item item " << item << " weight " << weight
669 << " name " << name << " loc " << loc << dendl;
670
671 if (!is_valid_crush_name(name))
672 return -EINVAL;
673
674 if (!is_valid_crush_loc(cct, loc))
675 return -EINVAL;
676
677 if (name_exists(name)) {
678 if (get_item_id(name) != item) {
679 ldout(cct, 10) << "device name '" << name << "' already exists as id "
680 << get_item_id(name) << dendl;
681 return -EEXIST;
682 }
683 } else {
684 set_item_name(item, name);
685 }
686
687 int cur = item;
688
689 // create locations if locations don't exist and add child in location with 0 weight
690 // the more detail in the insert_item method declaration in CrushWrapper.h
691 for (map<int,string>::iterator p = type_map.begin(); p != type_map.end(); ++p) {
692 // ignore device type
693 if (p->first == 0)
694 continue;
695
696 // skip types that are unspecified
697 map<string,string>::const_iterator q = loc.find(p->second);
698 if (q == loc.end()) {
699 ldout(cct, 2) << "warning: did not specify location for '" << p->second << "' level (levels are "
700 << type_map << ")" << dendl;
701 continue;
702 }
703
704 if (!name_exists(q->second)) {
705 ldout(cct, 5) << "insert_item creating bucket " << q->second << dendl;
706 int empty = 0, newid;
707 int r = add_bucket(0, 0,
708 CRUSH_HASH_DEFAULT, p->first, 1, &cur, &empty, &newid);
709 if (r < 0) {
710 ldout(cct, 1) << "add_bucket failure error: " << cpp_strerror(r) << dendl;
711 return r;
712 }
713 set_item_name(newid, q->second);
714
715 cur = newid;
716 continue;
717 }
718
719 // add to an existing bucket
720 int id = get_item_id(q->second);
721 if (!bucket_exists(id)) {
722 ldout(cct, 1) << "insert_item doesn't have bucket " << id << dendl;
723 return -EINVAL;
724 }
725
726 // check that we aren't creating a cycle.
727 if (subtree_contains(id, cur)) {
728 ldout(cct, 1) << "insert_item item " << cur << " already exists beneath " << id << dendl;
729 return -EINVAL;
730 }
731
732 // we have done sanity check above
733 crush_bucket *b = get_bucket(id);
734
735 if (p->first != b->type) {
736 ldout(cct, 1) << "insert_item existing bucket has type "
737 << "'" << type_map[b->type] << "' != "
738 << "'" << type_map[p->first] << "'" << dendl;
739 return -EINVAL;
740 }
741
742 // are we forming a loop?
743 if (subtree_contains(cur, b->id)) {
744 ldout(cct, 1) << "insert_item " << cur << " already contains " << b->id
745 << "; cannot form loop" << dendl;
746 return -ELOOP;
747 }
748
749 ldout(cct, 5) << "insert_item adding " << cur << " weight " << weight
750 << " to bucket " << id << dendl;
751 int r = crush_bucket_add_item(crush, b, cur, 0);
752 assert (!r);
753 break;
754 }
755
756 // adjust the item's weight in location
757 if(adjust_item_weightf_in_loc(cct, item, weight, loc) > 0) {
758 if (item >= crush->max_devices) {
759 crush->max_devices = item + 1;
760 ldout(cct, 5) << "insert_item max_devices now " << crush->max_devices << dendl;
761 }
762 return 0;
763 }
764
765 ldout(cct, 1) << "error: didn't find anywhere to add item " << item << " in " << loc << dendl;
766 return -EINVAL;
767}
768
769int CrushWrapper::move_bucket(CephContext *cct, int id, const map<string,string>& loc)
770{
771 if (choose_args.size() > 0) {
772 ldout(cct, 1) << "move_bucket not implemented when choose_args is not empty" << dendl;
773 return -EDOM;
774 }
775
776 // sorry this only works for buckets
777 if (id >= 0)
778 return -EINVAL;
779
780 if (!item_exists(id))
781 return -ENOENT;
782
783 // get the name of the bucket we are trying to move for later
784 string id_name = get_item_name(id);
785
786 // detach the bucket
787 int bucket_weight = detach_bucket(cct, id);
788
789 // insert the bucket back into the hierarchy
790 return insert_item(cct, id, bucket_weight / (float)0x10000, id_name, loc);
791}
792
793int CrushWrapper::link_bucket(CephContext *cct, int id, const map<string,string>& loc)
794{
795 if (choose_args.size() > 0) {
796 ldout(cct, 1) << "link_bucket not implemented when choose_args is not empty" << dendl;
797 return -EDOM;
798 }
799
800 // sorry this only works for buckets
801 if (id >= 0)
802 return -EINVAL;
803
804 if (!item_exists(id))
805 return -ENOENT;
806
807 // get the name of the bucket we are trying to move for later
808 string id_name = get_item_name(id);
809
810 crush_bucket *b = get_bucket(id);
811 unsigned bucket_weight = b->weight;
812
813 return insert_item(cct, id, bucket_weight / (float)0x10000, id_name, loc);
814}
815
816int CrushWrapper::create_or_move_item(CephContext *cct, int item, float weight, string name,
817 const map<string,string>& loc) // typename -> bucketname
818{
819 if (choose_args.size() > 0) {
820 ldout(cct, 1) << "create_or_move_item not implemented when choose_args is not empty" << dendl;
821 return -EDOM;
822 }
823
824 int ret = 0;
825 int old_iweight;
826
827 if (!is_valid_crush_name(name))
828 return -EINVAL;
829
830 if (check_item_loc(cct, item, loc, &old_iweight)) {
831 ldout(cct, 5) << "create_or_move_item " << item << " already at " << loc << dendl;
832 } else {
833 if (_search_item_exists(item)) {
834 weight = get_item_weightf(item);
835 ldout(cct, 10) << "create_or_move_item " << item << " exists with weight " << weight << dendl;
836 remove_item(cct, item, true);
837 }
838 ldout(cct, 5) << "create_or_move_item adding " << item << " weight " << weight
839 << " at " << loc << dendl;
840 ret = insert_item(cct, item, weight, name, loc);
841 if (ret == 0)
842 ret = 1; // changed
843 }
844 return ret;
845}
846
847int CrushWrapper::update_item(CephContext *cct, int item, float weight, string name,
848 const map<string,string>& loc) // typename -> bucketname
849{
850 if (choose_args.size() > 0) {
851 ldout(cct, 1) << "update_item not implemented when choose_args is not empty" << dendl;
852 return -EDOM;
853 }
854
855 ldout(cct, 5) << "update_item item " << item << " weight " << weight
856 << " name " << name << " loc " << loc << dendl;
857 int ret = 0;
858
859 if (!is_valid_crush_name(name))
860 return -EINVAL;
861
862 if (!is_valid_crush_loc(cct, loc))
863 return -EINVAL;
864
865 // compare quantized (fixed-point integer) weights!
866 int iweight = (int)(weight * (float)0x10000);
867 int old_iweight;
868 if (check_item_loc(cct, item, loc, &old_iweight)) {
869 ldout(cct, 5) << "update_item " << item << " already at " << loc << dendl;
870 if (old_iweight != iweight) {
871 ldout(cct, 5) << "update_item " << item << " adjusting weight "
872 << ((float)old_iweight/(float)0x10000) << " -> " << weight << dendl;
873 adjust_item_weight_in_loc(cct, item, iweight, loc);
874 ret = 1;
875 }
876 if (get_item_name(item) != name) {
877 ldout(cct, 5) << "update_item setting " << item << " name to " << name << dendl;
878 set_item_name(item, name);
879 ret = 1;
880 }
881 } else {
882 if (item_exists(item)) {
883 remove_item(cct, item, true);
884 }
885 ldout(cct, 5) << "update_item adding " << item << " weight " << weight
886 << " at " << loc << dendl;
887 ret = insert_item(cct, item, weight, name, loc);
888 if (ret == 0)
889 ret = 1; // changed
890 }
891 return ret;
892}
893
894int CrushWrapper::get_item_weight(int id) const
895{
896 for (int bidx = 0; bidx < crush->max_buckets; bidx++) {
897 crush_bucket *b = crush->buckets[bidx];
898 if (b == NULL)
899 continue;
900 if (b->id == id)
901 return b->weight;
902 for (unsigned i = 0; i < b->size; i++)
903 if (b->items[i] == id)
904 return crush_get_bucket_item_weight(b, i);
905 }
906 return -ENOENT;
907}
908
909int CrushWrapper::get_item_weight_in_loc(int id, const map<string,string> &loc)
910{
911 for (map<string,string>::const_iterator l = loc.begin(); l != loc.end(); ++l) {
912
913 int bid = get_item_id(l->second);
914 if (!bucket_exists(bid))
915 continue;
916 crush_bucket *b = get_bucket(bid);
917 for (unsigned int i = 0; i < b->size; i++) {
918 if (b->items[i] == id) {
919 return crush_get_bucket_item_weight(b, i);
920 }
921 }
922 }
923 return -ENOENT;
924}
925
926int CrushWrapper::adjust_item_weight(CephContext *cct, int id, int weight)
927{
928 ldout(cct, 5) << "adjust_item_weight " << id << " weight " << weight << dendl;
929 int changed = 0;
930 for (int bidx = 0; bidx < crush->max_buckets; bidx++) {
931 crush_bucket *b = crush->buckets[bidx];
932 if (b == 0)
933 continue;
934 for (unsigned i = 0; i < b->size; i++) {
935 if (b->items[i] == id) {
936 int diff = crush_bucket_adjust_item_weight(crush, b, id, weight);
937 ldout(cct, 5) << "adjust_item_weight " << id << " diff " << diff << " in bucket " << bidx << dendl;
938 adjust_item_weight(cct, -1 - bidx, b->weight);
939 changed++;
940 }
941 }
942 }
943 if (!changed)
944 return -ENOENT;
945 return changed;
946}
947
948int CrushWrapper::adjust_item_weight_in_loc(CephContext *cct, int id, int weight, const map<string,string>& loc)
949{
950 ldout(cct, 5) << "adjust_item_weight_in_loc " << id << " weight " << weight << " in " << loc << dendl;
951 int changed = 0;
952
953 for (map<string,string>::const_iterator l = loc.begin(); l != loc.end(); ++l) {
954 int bid = get_item_id(l->second);
955 if (!bucket_exists(bid))
956 continue;
957 crush_bucket *b = get_bucket(bid);
958 for (unsigned int i = 0; i < b->size; i++) {
959 if (b->items[i] == id) {
960 int diff = crush_bucket_adjust_item_weight(crush, b, id, weight);
961 ldout(cct, 5) << "adjust_item_weight_in_loc " << id << " diff " << diff << " in bucket " << bid << dendl;
962 adjust_item_weight(cct, bid, b->weight);
963 changed++;
964 }
965 }
966 }
967 if (!changed)
968 return -ENOENT;
969 return changed;
970}
971
972int CrushWrapper::adjust_subtree_weight(CephContext *cct, int id, int weight)
973{
974 ldout(cct, 5) << __func__ << " " << id << " weight " << weight << dendl;
975 crush_bucket *b = get_bucket(id);
976 if (IS_ERR(b))
977 return PTR_ERR(b);
978 int changed = 0;
979 list<crush_bucket*> q;
980 q.push_back(b);
981 while (!q.empty()) {
982 b = q.front();
983 q.pop_front();
984 int local_changed = 0;
985 for (unsigned i=0; i<b->size; ++i) {
986 int n = b->items[i];
987 if (n >= 0) {
988 crush_bucket_adjust_item_weight(crush, b, n, weight);
989 ++changed;
990 ++local_changed;
991 } else {
992 crush_bucket *sub = get_bucket(n);
993 if (IS_ERR(sub))
994 continue;
995 q.push_back(sub);
996 }
997 }
998 if (local_changed) {
999 adjust_item_weight(cct, b->id, b->weight);
1000 }
1001 }
1002 return changed;
1003}
1004
1005bool CrushWrapper::check_item_present(int id) const
1006{
1007 bool found = false;
1008
1009 for (int bidx = 0; bidx < crush->max_buckets; bidx++) {
1010 crush_bucket *b = crush->buckets[bidx];
1011 if (b == 0)
1012 continue;
1013 for (unsigned i = 0; i < b->size; i++)
1014 if (b->items[i] == id)
1015 found = true;
1016 }
1017 return found;
1018}
1019
1020
1021pair<string,string> CrushWrapper::get_immediate_parent(int id, int *_ret)
1022{
1023
1024 for (int bidx = 0; bidx < crush->max_buckets; bidx++) {
1025 crush_bucket *b = crush->buckets[bidx];
1026 if (b == 0)
1027 continue;
1028 for (unsigned i = 0; i < b->size; i++)
1029 if (b->items[i] == id) {
1030 string parent_id = name_map[b->id];
1031 string parent_bucket_type = type_map[b->type];
1032 if (_ret)
1033 *_ret = 0;
1034 return make_pair(parent_bucket_type, parent_id);
1035 }
1036 }
1037
1038 if (_ret)
1039 *_ret = -ENOENT;
1040
1041 return pair<string, string>();
1042}
1043
1044int CrushWrapper::get_immediate_parent_id(int id, int *parent) const
1045{
1046 for (int bidx = 0; bidx < crush->max_buckets; bidx++) {
1047 crush_bucket *b = crush->buckets[bidx];
1048 if (b == 0)
1049 continue;
1050 for (unsigned i = 0; i < b->size; i++) {
1051 if (b->items[i] == id) {
1052 *parent = b->id;
1053 return 0;
1054 }
1055 }
1056 }
1057 return -ENOENT;
1058}
1059
1060bool CrushWrapper::class_is_in_use(int class_id)
1061{
1062 for (auto &i : class_bucket)
1063 for (auto &j : i.second)
1064 if (j.first == class_id)
1065 return true;
1066
1067 for (auto &i : class_map)
1068 if (i.second == class_id)
1069 return true;
1070
1071 return false;
1072}
1073
1074int CrushWrapper::populate_classes()
1075{
1076 set<int> roots;
1077 find_roots(roots);
1078 for (auto &r : roots) {
1079 if (r >= 0)
1080 continue;
1081 if (id_has_class(r))
1082 continue;
1083 for (auto &c : class_name) {
1084 int clone;
1085 int res = device_class_clone(r, c.first, &clone);
1086 if (res < 0)
1087 return res;
1088 }
1089 }
1090 return 0;
1091}
1092
1093int CrushWrapper::cleanup_classes()
1094{
1095 return trim_roots_with_class(true);
1096}
1097
1098int CrushWrapper::trim_roots_with_class(bool unused)
1099{
1100 set<int> roots;
1101 find_roots(roots);
1102 for (auto &r : roots) {
1103 if (r >= 0)
1104 continue;
1105 if (!id_has_class(r))
1106 continue;
1107 int res = remove_root(r, unused);
1108 if (res)
1109 return res;
1110 }
1111 // there is no need to reweight because we only remove from the
1112 // root and down
1113 return 0;
1114}
1115
1116void CrushWrapper::reweight(CephContext *cct)
1117{
1118 set<int> roots;
1119 find_roots(roots);
1120 for (set<int>::iterator p = roots.begin(); p != roots.end(); ++p) {
1121 if (*p >= 0)
1122 continue;
1123 crush_bucket *b = get_bucket(*p);
1124 ldout(cct, 5) << "reweight bucket " << *p << dendl;
1125 int r = crush_reweight_bucket(crush, b);
1126 assert(r == 0);
1127 }
1128}
1129
1130int CrushWrapper::add_simple_ruleset_at(string name, string root_name,
1131 string failure_domain_name,
1132 string mode, int rule_type,
1133 int rno, ostream *err)
1134{
1135 if (rule_exists(name)) {
1136 if (err)
1137 *err << "rule " << name << " exists";
1138 return -EEXIST;
1139 }
1140 if (rno >= 0) {
1141 if (rule_exists(rno)) {
1142 if (err)
1143 *err << "rule with ruleno " << rno << " exists";
1144 return -EEXIST;
1145 }
1146 if (ruleset_exists(rno)) {
1147 if (err)
1148 *err << "ruleset " << rno << " exists";
1149 return -EEXIST;
1150 }
1151 } else {
1152 for (rno = 0; rno < get_max_rules(); rno++) {
1153 if (!rule_exists(rno) && !ruleset_exists(rno))
1154 break;
1155 }
1156 }
1157 if (!name_exists(root_name)) {
1158 if (err)
1159 *err << "root item " << root_name << " does not exist";
1160 return -ENOENT;
1161 }
1162 int root = get_item_id(root_name);
1163 int type = 0;
1164 if (failure_domain_name.length()) {
1165 type = get_type_id(failure_domain_name);
1166 if (type < 0) {
1167 if (err)
1168 *err << "unknown type " << failure_domain_name;
1169 return -EINVAL;
1170 }
1171 }
1172 if (mode != "firstn" && mode != "indep") {
1173 if (err)
1174 *err << "unknown mode " << mode;
1175 return -EINVAL;
1176 }
1177
1178 int steps = 3;
1179 if (mode == "indep")
1180 steps = 5;
1181 int min_rep = mode == "firstn" ? 1 : 3;
1182 int max_rep = mode == "firstn" ? 10 : 20;
1183 //set the ruleset the same as rule_id(rno)
1184 crush_rule *rule = crush_make_rule(steps, rno, rule_type, min_rep, max_rep);
1185 assert(rule);
1186 int step = 0;
1187 if (mode == "indep") {
1188 crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0);
1189 crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSE_TRIES, 100, 0);
1190 }
1191 crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, root, 0);
1192 if (type)
1193 crush_rule_set_step(rule, step++,
1194 mode == "firstn" ? CRUSH_RULE_CHOOSELEAF_FIRSTN :
1195 CRUSH_RULE_CHOOSELEAF_INDEP,
1196 CRUSH_CHOOSE_N,
1197 type);
1198 else
1199 crush_rule_set_step(rule, step++,
1200 mode == "firstn" ? CRUSH_RULE_CHOOSE_FIRSTN :
1201 CRUSH_RULE_CHOOSE_INDEP,
1202 CRUSH_CHOOSE_N,
1203 0);
1204 crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0);
1205
1206 int ret = crush_add_rule(crush, rule, rno);
1207 if(ret < 0) {
1208 *err << "failed to add rule " << rno << " because " << cpp_strerror(ret);
1209 return ret;
1210 }
1211 set_rule_name(rno, name);
1212 have_rmaps = false;
1213 return rno;
1214}
1215
1216int CrushWrapper::add_simple_ruleset(string name, string root_name,
1217 string failure_domain_name,
1218 string mode, int rule_type,
1219 ostream *err)
1220{
1221 return add_simple_ruleset_at(name, root_name, failure_domain_name, mode,
1222 rule_type, -1, err);
1223}
1224
1225int CrushWrapper::get_rule_weight_osd_map(unsigned ruleno, map<int,float> *pmap)
1226{
1227 if (ruleno >= crush->max_rules)
1228 return -ENOENT;
1229 if (crush->rules[ruleno] == NULL)
1230 return -ENOENT;
1231 crush_rule *rule = crush->rules[ruleno];
1232
1233 // build a weight map for each TAKE in the rule, and then merge them
1234 for (unsigned i=0; i<rule->len; ++i) {
1235 map<int,float> m;
1236 float sum = 0;
1237 if (rule->steps[i].op == CRUSH_RULE_TAKE) {
1238 int n = rule->steps[i].arg1;
1239 if (n >= 0) {
1240 m[n] = 1.0;
1241 sum = 1.0;
1242 } else {
1243 list<int> q;
1244 q.push_back(n);
1245 //breadth first iterate the OSD tree
1246 while (!q.empty()) {
1247 int bno = q.front();
1248 q.pop_front();
1249 crush_bucket *b = crush->buckets[-1-bno];
1250 assert(b);
1251 for (unsigned j=0; j<b->size; ++j) {
1252 int item_id = b->items[j];
1253 if (item_id >= 0) { //it's an OSD
1254 float w = crush_get_bucket_item_weight(b, j);
1255 m[item_id] = w;
1256 sum += w;
1257 } else { //not an OSD, expand the child later
1258 q.push_back(item_id);
1259 }
1260 }
1261 }
1262 }
1263 }
1264 for (map<int,float>::iterator p = m.begin(); p != m.end(); ++p) {
1265 map<int,float>::iterator q = pmap->find(p->first);
1266 if (q == pmap->end()) {
1267 (*pmap)[p->first] = p->second / sum;
1268 } else {
1269 q->second += p->second / sum;
1270 }
1271 }
1272 }
1273
1274 return 0;
1275}
1276
1277int CrushWrapper::remove_rule(int ruleno)
1278{
1279 if (ruleno >= (int)crush->max_rules)
1280 return -ENOENT;
1281 if (crush->rules[ruleno] == NULL)
1282 return -ENOENT;
1283 crush_destroy_rule(crush->rules[ruleno]);
1284 crush->rules[ruleno] = NULL;
1285 rule_name_map.erase(ruleno);
1286 have_rmaps = false;
1287 return 0;
1288}
1289
1290int CrushWrapper::update_device_class(CephContext *cct, int id, const string& class_name, const string& name)
1291{
1292 int class_id = get_class_id(class_name);
1293 if (class_id < 0) {
1294 ldout(cct, 0) << "update_device_class class " << class_name << " does not exist " << dendl;
1295 return -ENOENT;
1296 }
1297 if (id < 0) {
1298 ldout(cct, 0) << "update_device_class " << name << " id " << id << " is negative " << dendl;
1299 return -EINVAL;
1300 }
1301 assert(item_exists(id));
1302
1303 if (class_map.count(id) != 0 && class_map[id] == class_id) {
1304 ldout(cct, 5) << "update_device_class " << name << " already set to class " << class_name << dendl;
1305 return 0;
1306 }
1307
1308 set_item_class(id, class_id);
1309
1310 int r = rebuild_roots_with_classes();
1311 if (r < 0)
1312 return r;
1313 return 1;
1314}
1315
1316int CrushWrapper::device_class_clone(int original_id, int device_class, int *clone)
1317{
1318 const char *item_name = get_item_name(original_id);
1319 if (item_name == NULL)
1320 return -ECHILD;
1321 const char *class_name = get_class_name(device_class);
1322 if (class_name == NULL)
1323 return -EBADF;
1324 string copy_name = item_name + string("~") + class_name;
1325 if (name_exists(copy_name)) {
1326 *clone = get_item_id(copy_name);
1327 return 0;
1328 }
1329 crush_bucket *original = get_bucket(original_id);
1330 if (original == NULL)
1331 return -ENOENT;
1332 crush_bucket *copy = crush_make_bucket(crush,
1333 original->alg,
1334 original->hash,
1335 original->type,
1336 0, NULL, NULL);
1337 if(copy == NULL)
1338 return -ENOMEM;
1339 for (unsigned i = 0; i < original->size; i++) {
1340 int item = original->items[i];
1341 int weight = crush_get_bucket_item_weight(original, i);
1342 if (item >= 0) {
1343 if (class_map.count(item) != 0 && class_map[item] == device_class) {
1344 int res = crush_bucket_add_item(crush, copy, item, weight);
1345 if (res)
1346 return res;
1347 }
1348 } else {
1349 int child_copy_id;
1350 int res = device_class_clone(item, device_class, &child_copy_id);
1351 if (res < 0)
1352 return res;
1353 crush_bucket *child_copy = get_bucket(child_copy_id);
1354 if (IS_ERR(child_copy))
1355 return -ENOENT;
1356 res = crush_bucket_add_item(crush, copy, child_copy_id, child_copy->weight);
1357 if (res)
1358 return res;
1359 }
1360 }
1361 int res = crush_add_bucket(crush, 0, copy, clone);
1362 if (res)
1363 return res;
1364 res = set_item_class(*clone, device_class);
1365 if (res < 0)
1366 return res;
1367 // we do not use set_item_name because the name is intentionally invalid
1368 name_map[*clone] = copy_name;
1369 if (have_rmaps)
1370 name_rmap[copy_name] = *clone;
1371 class_bucket[original_id][device_class] = *clone;
1372 return 0;
1373}
1374
1375int CrushWrapper::rebuild_roots_with_classes()
1376{
1377 int r = trim_roots_with_class(false);
1378 if (r < 0)
1379 return r;
1380 r = populate_classes();
1381 if (r < 0)
1382 return r;
1383 return trim_roots_with_class(true);
1384}
1385
1386void CrushWrapper::encode(bufferlist& bl, uint64_t features) const
1387{
1388 assert(crush);
1389
1390 __u32 magic = CRUSH_MAGIC;
1391 ::encode(magic, bl);
1392
1393 ::encode(crush->max_buckets, bl);
1394 ::encode(crush->max_rules, bl);
1395 ::encode(crush->max_devices, bl);
1396
1397 // buckets
1398 for (int i=0; i<crush->max_buckets; i++) {
1399 __u32 alg = 0;
1400 if (crush->buckets[i]) alg = crush->buckets[i]->alg;
1401 ::encode(alg, bl);
1402 if (!alg)
1403 continue;
1404
1405 ::encode(crush->buckets[i]->id, bl);
1406 ::encode(crush->buckets[i]->type, bl);
1407 ::encode(crush->buckets[i]->alg, bl);
1408 ::encode(crush->buckets[i]->hash, bl);
1409 ::encode(crush->buckets[i]->weight, bl);
1410 ::encode(crush->buckets[i]->size, bl);
1411 for (unsigned j=0; j<crush->buckets[i]->size; j++)
1412 ::encode(crush->buckets[i]->items[j], bl);
1413
1414 switch (crush->buckets[i]->alg) {
1415 case CRUSH_BUCKET_UNIFORM:
1416 ::encode((reinterpret_cast<crush_bucket_uniform*>(crush->buckets[i]))->item_weight, bl);
1417 break;
1418
1419 case CRUSH_BUCKET_LIST:
1420 for (unsigned j=0; j<crush->buckets[i]->size; j++) {
1421 ::encode((reinterpret_cast<crush_bucket_list*>(crush->buckets[i]))->item_weights[j], bl);
1422 ::encode((reinterpret_cast<crush_bucket_list*>(crush->buckets[i]))->sum_weights[j], bl);
1423 }
1424 break;
1425
1426 case CRUSH_BUCKET_TREE:
1427 ::encode((reinterpret_cast<crush_bucket_tree*>(crush->buckets[i]))->num_nodes, bl);
1428 for (unsigned j=0; j<(reinterpret_cast<crush_bucket_tree*>(crush->buckets[i]))->num_nodes; j++)
1429 ::encode((reinterpret_cast<crush_bucket_tree*>(crush->buckets[i]))->node_weights[j], bl);
1430 break;
1431
1432 case CRUSH_BUCKET_STRAW:
1433 for (unsigned j=0; j<crush->buckets[i]->size; j++) {
1434 ::encode((reinterpret_cast<crush_bucket_straw*>(crush->buckets[i]))->item_weights[j], bl);
1435 ::encode((reinterpret_cast<crush_bucket_straw*>(crush->buckets[i]))->straws[j], bl);
1436 }
1437 break;
1438
1439 case CRUSH_BUCKET_STRAW2:
1440 for (unsigned j=0; j<crush->buckets[i]->size; j++) {
1441 ::encode((reinterpret_cast<crush_bucket_straw2*>(crush->buckets[i]))->item_weights[j], bl);
1442 }
1443 break;
1444
1445 default:
1446 ceph_abort();
1447 break;
1448 }
1449 }
1450
1451 // rules
1452 for (unsigned i=0; i<crush->max_rules; i++) {
1453 __u32 yes = crush->rules[i] ? 1:0;
1454 ::encode(yes, bl);
1455 if (!yes)
1456 continue;
1457
1458 ::encode(crush->rules[i]->len, bl);
1459 ::encode(crush->rules[i]->mask, bl);
1460 for (unsigned j=0; j<crush->rules[i]->len; j++)
1461 ::encode(crush->rules[i]->steps[j], bl);
1462 }
1463
1464 // name info
1465 ::encode(type_map, bl);
1466 ::encode(name_map, bl);
1467 ::encode(rule_name_map, bl);
1468
1469 // tunables
1470 ::encode(crush->choose_local_tries, bl);
1471 ::encode(crush->choose_local_fallback_tries, bl);
1472 ::encode(crush->choose_total_tries, bl);
1473 ::encode(crush->chooseleaf_descend_once, bl);
1474 ::encode(crush->chooseleaf_vary_r, bl);
1475 ::encode(crush->straw_calc_version, bl);
1476 ::encode(crush->allowed_bucket_algs, bl);
1477 if (features & CEPH_FEATURE_CRUSH_TUNABLES5) {
1478 ::encode(crush->chooseleaf_stable, bl);
1479 }
1480
1481 if (HAVE_FEATURE(features, SERVER_LUMINOUS)) {
1482 // device classes
1483 ::encode(class_map, bl);
1484 ::encode(class_name, bl);
1485 ::encode(class_bucket, bl);
1486
1487 ::encode(choose_args.size(), bl);
1488 for (auto c : choose_args) {
1489 ::encode(c.first, bl);
1490 crush_choose_arg_map arg_map = c.second;
1491 __u32 size = 0;
1492 for (__u32 i = 0; i < arg_map.size; i++) {
1493 crush_choose_arg *arg = &arg_map.args[i];
1494 if (arg->weight_set_size == 0 &&
1495 arg->ids_size == 0)
1496 continue;
1497 size++;
1498 }
1499 ::encode(size, bl);
1500 for (__u32 i = 0; i < arg_map.size; i++) {
1501 crush_choose_arg *arg = &arg_map.args[i];
1502 if (arg->weight_set_size == 0 &&
1503 arg->ids_size == 0)
1504 continue;
1505 ::encode(i, bl);
1506 ::encode(arg->weight_set_size, bl);
1507 for (__u32 j = 0; j < arg->weight_set_size; j++) {
1508 crush_weight_set *weight_set = &arg->weight_set[j];
1509 ::encode(weight_set->size, bl);
1510 for (__u32 k = 0; k < weight_set->size; k++)
1511 ::encode(weight_set->weights[k], bl);
1512 }
1513 ::encode(arg->ids_size, bl);
1514 for (__u32 j = 0; j < arg->ids_size; j++)
1515 ::encode(arg->ids[j], bl);
1516 }
1517 }
1518 }
1519}
1520
1521static void decode_32_or_64_string_map(map<int32_t,string>& m, bufferlist::iterator& blp)
1522{
1523 m.clear();
1524 __u32 n;
1525 ::decode(n, blp);
1526 while (n--) {
1527 __s32 key;
1528 ::decode(key, blp);
1529
1530 __u32 strlen;
1531 ::decode(strlen, blp);
1532 if (strlen == 0) {
1533 // der, key was actually 64-bits!
1534 ::decode(strlen, blp);
1535 }
1536 ::decode_nohead(strlen, m[key], blp);
1537 }
1538}
1539
1540void CrushWrapper::decode(bufferlist::iterator& blp)
1541{
1542 create();
1543
1544 __u32 magic;
1545 ::decode(magic, blp);
1546 if (magic != CRUSH_MAGIC)
1547 throw buffer::malformed_input("bad magic number");
1548
1549 ::decode(crush->max_buckets, blp);
1550 ::decode(crush->max_rules, blp);
1551 ::decode(crush->max_devices, blp);
1552
1553 // legacy tunables, unless we decode something newer
1554 set_tunables_legacy();
1555
1556 try {
1557 // buckets
1558 crush->buckets = (crush_bucket**)calloc(1, crush->max_buckets * sizeof(crush_bucket*));
1559 for (int i=0; i<crush->max_buckets; i++) {
1560 decode_crush_bucket(&crush->buckets[i], blp);
1561 }
1562
1563 // rules
1564 crush->rules = (crush_rule**)calloc(1, crush->max_rules * sizeof(crush_rule*));
1565 for (unsigned i = 0; i < crush->max_rules; ++i) {
1566 __u32 yes;
1567 ::decode(yes, blp);
1568 if (!yes) {
1569 crush->rules[i] = NULL;
1570 continue;
1571 }
1572
1573 __u32 len;
1574 ::decode(len, blp);
1575 crush->rules[i] = reinterpret_cast<crush_rule*>(calloc(1, crush_rule_size(len)));
1576 crush->rules[i]->len = len;
1577 ::decode(crush->rules[i]->mask, blp);
1578 for (unsigned j=0; j<crush->rules[i]->len; j++)
1579 ::decode(crush->rules[i]->steps[j], blp);
1580 }
1581
1582 // name info
1583 // NOTE: we had a bug where we were incoding int instead of int32, which means the
1584 // 'key' field for these maps may be either 32 or 64 bits, depending. tolerate
1585 // both by assuming the string is always non-empty.
1586 decode_32_or_64_string_map(type_map, blp);
1587 decode_32_or_64_string_map(name_map, blp);
1588 decode_32_or_64_string_map(rule_name_map, blp);
1589
1590 // tunables
1591 if (!blp.end()) {
1592 ::decode(crush->choose_local_tries, blp);
1593 ::decode(crush->choose_local_fallback_tries, blp);
1594 ::decode(crush->choose_total_tries, blp);
1595 }
1596 if (!blp.end()) {
1597 ::decode(crush->chooseleaf_descend_once, blp);
1598 }
1599 if (!blp.end()) {
1600 ::decode(crush->chooseleaf_vary_r, blp);
1601 }
1602 if (!blp.end()) {
1603 ::decode(crush->straw_calc_version, blp);
1604 }
1605 if (!blp.end()) {
1606 ::decode(crush->allowed_bucket_algs, blp);
1607 }
1608 if (!blp.end()) {
1609 ::decode(crush->chooseleaf_stable, blp);
1610 }
1611 if (!blp.end()) {
1612 ::decode(class_map, blp);
1613 ::decode(class_name, blp);
1614 for (auto &c : class_name)
1615 class_rname[c.second] = c.first;
1616 ::decode(class_bucket, blp);
1617 cleanup_classes();
1618 }
1619 if (!blp.end()) {
1620 size_t choose_args_size;
1621 ::decode(choose_args_size, blp);
1622 for (size_t i = 0; i < choose_args_size; i++) {
1623 uint64_t choose_args_index;
1624 ::decode(choose_args_index, blp);
1625 crush_choose_arg_map arg_map;
1626 arg_map.size = crush->max_buckets;
1627 arg_map.args = (crush_choose_arg*)calloc(arg_map.size, sizeof(crush_choose_arg));
1628 __u32 size;
1629 ::decode(size, blp);
1630 for (__u32 j = 0; j < size; j++) {
1631 __u32 bucket_index;
1632 ::decode(bucket_index, blp);
1633 assert(bucket_index < arg_map.size);
1634 crush_choose_arg *arg = &arg_map.args[bucket_index];
1635 ::decode(arg->weight_set_size, blp);
1636 arg->weight_set = (crush_weight_set*)calloc(arg->weight_set_size, sizeof(crush_weight_set));
1637 for (__u32 k = 0; k < arg->weight_set_size; k++) {
1638 crush_weight_set *weight_set = &arg->weight_set[k];
1639 ::decode(weight_set->size, blp);
1640 weight_set->weights = (__u32*)calloc(weight_set->size, sizeof(__u32));
1641 for (__u32 l = 0; l < weight_set->size; l++)
1642 ::decode(weight_set->weights[l], blp);
1643 }
1644 ::decode(arg->ids_size, blp);
1645 arg->ids = (int*)calloc(arg->ids_size, sizeof(int));
1646 for (__u32 k = 0; k < arg->ids_size; k++)
1647 ::decode(arg->ids[k], blp);
1648 }
1649 choose_args[choose_args_index] = arg_map;
1650 }
1651 }
1652 finalize();
1653 }
1654 catch (...) {
1655 crush_destroy(crush);
1656 throw;
1657 }
1658}
1659
1660void CrushWrapper::decode_crush_bucket(crush_bucket** bptr, bufferlist::iterator &blp)
1661{
1662 __u32 alg;
1663 ::decode(alg, blp);
1664 if (!alg) {
1665 *bptr = NULL;
1666 return;
1667 }
1668
1669 int size = 0;
1670 switch (alg) {
1671 case CRUSH_BUCKET_UNIFORM:
1672 size = sizeof(crush_bucket_uniform);
1673 break;
1674 case CRUSH_BUCKET_LIST:
1675 size = sizeof(crush_bucket_list);
1676 break;
1677 case CRUSH_BUCKET_TREE:
1678 size = sizeof(crush_bucket_tree);
1679 break;
1680 case CRUSH_BUCKET_STRAW:
1681 size = sizeof(crush_bucket_straw);
1682 break;
1683 case CRUSH_BUCKET_STRAW2:
1684 size = sizeof(crush_bucket_straw2);
1685 break;
1686 default:
1687 {
1688 char str[128];
1689 snprintf(str, sizeof(str), "unsupported bucket algorithm: %d", alg);
1690 throw buffer::malformed_input(str);
1691 }
1692 }
1693 crush_bucket *bucket = reinterpret_cast<crush_bucket*>(calloc(1, size));
1694 *bptr = bucket;
1695
1696 ::decode(bucket->id, blp);
1697 ::decode(bucket->type, blp);
1698 ::decode(bucket->alg, blp);
1699 ::decode(bucket->hash, blp);
1700 ::decode(bucket->weight, blp);
1701 ::decode(bucket->size, blp);
1702
1703 bucket->items = (__s32*)calloc(1, bucket->size * sizeof(__s32));
1704 for (unsigned j = 0; j < bucket->size; ++j) {
1705 ::decode(bucket->items[j], blp);
1706 }
1707
1708 switch (bucket->alg) {
1709 case CRUSH_BUCKET_UNIFORM:
1710 ::decode((reinterpret_cast<crush_bucket_uniform*>(bucket))->item_weight, blp);
1711 break;
1712
1713 case CRUSH_BUCKET_LIST: {
1714 crush_bucket_list* cbl = reinterpret_cast<crush_bucket_list*>(bucket);
1715 cbl->item_weights = (__u32*)calloc(1, bucket->size * sizeof(__u32));
1716 cbl->sum_weights = (__u32*)calloc(1, bucket->size * sizeof(__u32));
1717
1718 for (unsigned j = 0; j < bucket->size; ++j) {
1719 ::decode(cbl->item_weights[j], blp);
1720 ::decode(cbl->sum_weights[j], blp);
1721 }
1722 break;
1723 }
1724
1725 case CRUSH_BUCKET_TREE: {
1726 crush_bucket_tree* cbt = reinterpret_cast<crush_bucket_tree*>(bucket);
1727 ::decode(cbt->num_nodes, blp);
1728 cbt->node_weights = (__u32*)calloc(1, cbt->num_nodes * sizeof(__u32));
1729 for (unsigned j=0; j<cbt->num_nodes; j++) {
1730 ::decode(cbt->node_weights[j], blp);
1731 }
1732 break;
1733 }
1734
1735 case CRUSH_BUCKET_STRAW: {
1736 crush_bucket_straw* cbs = reinterpret_cast<crush_bucket_straw*>(bucket);
1737 cbs->straws = (__u32*)calloc(1, bucket->size * sizeof(__u32));
1738 cbs->item_weights = (__u32*)calloc(1, bucket->size * sizeof(__u32));
1739 for (unsigned j = 0; j < bucket->size; ++j) {
1740 ::decode(cbs->item_weights[j], blp);
1741 ::decode(cbs->straws[j], blp);
1742 }
1743 break;
1744 }
1745
1746 case CRUSH_BUCKET_STRAW2: {
1747 crush_bucket_straw2* cbs = reinterpret_cast<crush_bucket_straw2*>(bucket);
1748 cbs->item_weights = (__u32*)calloc(1, bucket->size * sizeof(__u32));
1749 for (unsigned j = 0; j < bucket->size; ++j) {
1750 ::decode(cbs->item_weights[j], blp);
1751 }
1752 break;
1753 }
1754
1755 default:
1756 // We should have handled this case in the first switch statement
1757 ceph_abort();
1758 break;
1759 }
1760}
1761
1762
1763void CrushWrapper::dump(Formatter *f) const
1764{
1765 f->open_array_section("devices");
1766 for (int i=0; i<get_max_devices(); i++) {
1767 f->open_object_section("device");
1768 f->dump_int("id", i);
1769 const char *n = get_item_name(i);
1770 if (n) {
1771 f->dump_string("name", n);
1772 } else {
1773 char name[20];
1774 sprintf(name, "device%d", i);
1775 f->dump_string("name", name);
1776 }
1777 const char *device_class = get_item_class(i);
1778 if (device_class != NULL)
1779 f->dump_string("class", device_class);
1780 f->close_section();
1781 }
1782 f->close_section();
1783
1784 f->open_array_section("types");
1785 int n = get_num_type_names();
1786 for (int i=0; n; i++) {
1787 const char *name = get_type_name(i);
1788 if (!name) {
1789 if (i == 0) {
1790 f->open_object_section("type");
1791 f->dump_int("type_id", 0);
1792 f->dump_string("name", "device");
1793 f->close_section();
1794 }
1795 continue;
1796 }
1797 n--;
1798 f->open_object_section("type");
1799 f->dump_int("type_id", i);
1800 f->dump_string("name", name);
1801 f->close_section();
1802 }
1803 f->close_section();
1804
1805 f->open_array_section("buckets");
1806 for (int bucket = -1; bucket > -1-get_max_buckets(); --bucket) {
1807 if (!bucket_exists(bucket))
1808 continue;
1809 f->open_object_section("bucket");
1810 f->dump_int("id", bucket);
1811 if (get_item_name(bucket))
1812 f->dump_string("name", get_item_name(bucket));
1813 f->dump_int("type_id", get_bucket_type(bucket));
1814 if (get_type_name(get_bucket_type(bucket)))
1815 f->dump_string("type_name", get_type_name(get_bucket_type(bucket)));
1816 f->dump_int("weight", get_bucket_weight(bucket));
1817 f->dump_string("alg", crush_bucket_alg_name(get_bucket_alg(bucket)));
1818 f->dump_string("hash", crush_hash_name(get_bucket_hash(bucket)));
1819 f->open_array_section("items");
1820 for (int j=0; j<get_bucket_size(bucket); j++) {
1821 f->open_object_section("item");
1822 f->dump_int("id", get_bucket_item(bucket, j));
1823 f->dump_int("weight", get_bucket_item_weight(bucket, j));
1824 f->dump_int("pos", j);
1825 f->close_section();
1826 }
1827 f->close_section();
1828 f->close_section();
1829 }
1830 f->close_section();
1831
1832 f->open_array_section("rules");
1833 dump_rules(f);
1834 f->close_section();
1835
1836 f->open_object_section("tunables");
1837 dump_tunables(f);
1838 f->close_section();
1839
1840 dump_choose_args(f);
1841}
1842
1843namespace {
1844 // depth first walker
1845 class TreeDumper {
1846 typedef CrushTreeDumper::Item Item;
1847 const CrushWrapper *crush;
1848 public:
1849 explicit TreeDumper(const CrushWrapper *crush)
1850 : crush(crush) {}
1851
1852 void dump(Formatter *f) {
1853 set<int> roots;
1854 crush->find_roots(roots);
1855 for (set<int>::iterator root = roots.begin(); root != roots.end(); ++root) {
1856 dump_item(Item(*root, 0, crush->get_bucket_weightf(*root)), f);
1857 }
1858 }
1859
1860 private:
1861 void dump_item(const Item& qi, Formatter* f) {
1862 if (qi.is_bucket()) {
1863 f->open_object_section("bucket");
1864 CrushTreeDumper::dump_item_fields(crush, qi, f);
1865 dump_bucket_children(qi, f);
1866 f->close_section();
1867 } else {
1868 f->open_object_section("device");
1869 CrushTreeDumper::dump_item_fields(crush, qi, f);
1870 f->close_section();
1871 }
1872 }
1873
1874 void dump_bucket_children(const Item& parent, Formatter* f) {
1875 f->open_array_section("items");
1876 const int max_pos = crush->get_bucket_size(parent.id);
1877 for (int pos = 0; pos < max_pos; pos++) {
1878 int id = crush->get_bucket_item(parent.id, pos);
1879 float weight = crush->get_bucket_item_weightf(parent.id, pos);
1880 dump_item(Item(id, parent.depth + 1, weight), f);
1881 }
1882 f->close_section();
1883 }
1884 };
1885}
1886
1887void CrushWrapper::dump_tree(Formatter *f) const
1888{
1889 assert(f);
1890 TreeDumper(this).dump(f);
1891}
1892
1893void CrushWrapper::dump_tunables(Formatter *f) const
1894{
1895 f->dump_int("choose_local_tries", get_choose_local_tries());
1896 f->dump_int("choose_local_fallback_tries", get_choose_local_fallback_tries());
1897 f->dump_int("choose_total_tries", get_choose_total_tries());
1898 f->dump_int("chooseleaf_descend_once", get_chooseleaf_descend_once());
1899 f->dump_int("chooseleaf_vary_r", get_chooseleaf_vary_r());
1900 f->dump_int("chooseleaf_stable", get_chooseleaf_stable());
1901 f->dump_int("straw_calc_version", get_straw_calc_version());
1902 f->dump_int("allowed_bucket_algs", get_allowed_bucket_algs());
1903
1904 // be helpful about it
1905 if (has_jewel_tunables())
1906 f->dump_string("profile", "jewel");
1907 else if (has_hammer_tunables())
1908 f->dump_string("profile", "hammer");
1909 else if (has_firefly_tunables())
1910 f->dump_string("profile", "firefly");
1911 else if (has_bobtail_tunables())
1912 f->dump_string("profile", "bobtail");
1913 else if (has_argonaut_tunables())
1914 f->dump_string("profile", "argonaut");
1915 else
1916 f->dump_string("profile", "unknown");
1917 f->dump_int("optimal_tunables", (int)has_optimal_tunables());
1918 f->dump_int("legacy_tunables", (int)has_legacy_tunables());
1919
1920 // be helpful about minimum version required
1921 f->dump_string("minimum_required_version", get_min_required_version());
1922
1923 f->dump_int("require_feature_tunables", (int)has_nondefault_tunables());
1924 f->dump_int("require_feature_tunables2", (int)has_nondefault_tunables2());
1925 f->dump_int("has_v2_rules", (int)has_v2_rules());
1926 f->dump_int("require_feature_tunables3", (int)has_nondefault_tunables3());
1927 f->dump_int("has_v3_rules", (int)has_v3_rules());
1928 f->dump_int("has_v4_buckets", (int)has_v4_buckets());
1929 f->dump_int("require_feature_tunables5", (int)has_nondefault_tunables5());
1930 f->dump_int("has_v5_rules", (int)has_v5_rules());
1931}
1932
1933void CrushWrapper::dump_choose_args(Formatter *f) const
1934{
1935 f->open_object_section("choose_args");
1936 for (auto c : choose_args) {
1937 crush_choose_arg_map arg_map = c.second;
1938 f->open_array_section(stringify(c.first).c_str());
1939 for (__u32 i = 0; i < arg_map.size; i++) {
1940 crush_choose_arg *arg = &arg_map.args[i];
1941 if (arg->weight_set_size == 0 &&
1942 arg->ids_size == 0)
1943 continue;
1944 f->open_object_section("choose_args");
1945 int bucket_index = i;
1946 f->dump_int("bucket_id", -1-bucket_index);
1947 if (arg->weight_set_size > 0) {
1948 f->open_array_section("weight_set");
1949 for (__u32 j = 0; j < arg->weight_set_size; j++) {
1950 f->open_array_section("weights");
1951 __u32 *weights = arg->weight_set[j].weights;
1952 __u32 size = arg->weight_set[j].size;
1953 for (__u32 k = 0; k < size; k++) {
1954 f->dump_float("weight", (float)weights[k]/(float)0x10000);
1955 }
1956 f->close_section();
1957 }
1958 f->close_section();
1959 }
1960 if (arg->ids_size > 0) {
1961 f->open_array_section("ids");
1962 for (__u32 j = 0; j < arg->ids_size; j++)
1963 f->dump_int("id", arg->ids[j]);
1964 f->close_section();
1965 }
1966 f->close_section();
1967 }
1968 f->close_section();
1969 }
1970 f->close_section();
1971}
1972
1973void CrushWrapper::dump_rules(Formatter *f) const
1974{
1975 for (int i=0; i<get_max_rules(); i++) {
1976 if (!rule_exists(i))
1977 continue;
1978 dump_rule(i, f);
1979 }
1980}
1981
1982void CrushWrapper::dump_rule(int ruleset, Formatter *f) const
1983{
1984 f->open_object_section("rule");
1985 f->dump_int("rule_id", ruleset);
1986 if (get_rule_name(ruleset))
1987 f->dump_string("rule_name", get_rule_name(ruleset));
1988 f->dump_int("ruleset", get_rule_mask_ruleset(ruleset));
1989 f->dump_int("type", get_rule_mask_type(ruleset));
1990 f->dump_int("min_size", get_rule_mask_min_size(ruleset));
1991 f->dump_int("max_size", get_rule_mask_max_size(ruleset));
1992 f->open_array_section("steps");
1993 for (int j=0; j<get_rule_len(ruleset); j++) {
1994 f->open_object_section("step");
1995 switch (get_rule_op(ruleset, j)) {
1996 case CRUSH_RULE_NOOP:
1997 f->dump_string("op", "noop");
1998 break;
1999 case CRUSH_RULE_TAKE:
2000 f->dump_string("op", "take");
2001 {
2002 int item = get_rule_arg1(ruleset, j);
2003 f->dump_int("item", item);
2004
2005 const char *name = get_item_name(item);
2006 f->dump_string("item_name", name ? name : "");
2007 }
2008 break;
2009 case CRUSH_RULE_EMIT:
2010 f->dump_string("op", "emit");
2011 break;
2012 case CRUSH_RULE_CHOOSE_FIRSTN:
2013 f->dump_string("op", "choose_firstn");
2014 f->dump_int("num", get_rule_arg1(ruleset, j));
2015 f->dump_string("type", get_type_name(get_rule_arg2(ruleset, j)));
2016 break;
2017 case CRUSH_RULE_CHOOSE_INDEP:
2018 f->dump_string("op", "choose_indep");
2019 f->dump_int("num", get_rule_arg1(ruleset, j));
2020 f->dump_string("type", get_type_name(get_rule_arg2(ruleset, j)));
2021 break;
2022 case CRUSH_RULE_CHOOSELEAF_FIRSTN:
2023 f->dump_string("op", "chooseleaf_firstn");
2024 f->dump_int("num", get_rule_arg1(ruleset, j));
2025 f->dump_string("type", get_type_name(get_rule_arg2(ruleset, j)));
2026 break;
2027 case CRUSH_RULE_CHOOSELEAF_INDEP:
2028 f->dump_string("op", "chooseleaf_indep");
2029 f->dump_int("num", get_rule_arg1(ruleset, j));
2030 f->dump_string("type", get_type_name(get_rule_arg2(ruleset, j)));
2031 break;
2032 case CRUSH_RULE_SET_CHOOSE_TRIES:
2033 f->dump_string("op", "set_choose_tries");
2034 f->dump_int("num", get_rule_arg1(ruleset, j));
2035 break;
2036 case CRUSH_RULE_SET_CHOOSELEAF_TRIES:
2037 f->dump_string("op", "set_chooseleaf_tries");
2038 f->dump_int("num", get_rule_arg1(ruleset, j));
2039 break;
2040 default:
2041 f->dump_int("opcode", get_rule_op(ruleset, j));
2042 f->dump_int("arg1", get_rule_arg1(ruleset, j));
2043 f->dump_int("arg2", get_rule_arg2(ruleset, j));
2044 }
2045 f->close_section();
2046 }
2047 f->close_section();
2048 f->close_section();
2049}
2050
2051void CrushWrapper::list_rules(Formatter *f) const
2052{
2053 for (int rule = 0; rule < get_max_rules(); rule++) {
2054 if (!rule_exists(rule))
2055 continue;
2056 f->dump_string("name", get_rule_name(rule));
2057 }
2058}
2059
2060class CrushTreePlainDumper : public CrushTreeDumper::Dumper<ostream> {
2061public:
2062 typedef CrushTreeDumper::Dumper<ostream> Parent;
2063
2064 explicit CrushTreePlainDumper(const CrushWrapper *crush)
2065 : Parent(crush) {}
2066
2067 void dump(ostream *out) {
2068 *out << "ID\tWEIGHT\tTYPE NAME\n";
2069 Parent::dump(out);
2070 }
2071
2072protected:
2073 void dump_item(const CrushTreeDumper::Item &qi, ostream *out) override {
2074 *out << qi.id << "\t"
2075 << weightf_t(qi.weight) << "\t";
2076
2077 for (int k=0; k < qi.depth; k++)
2078 *out << "\t";
2079
2080 if (qi.is_bucket())
2081 {
2082 *out << crush->get_type_name(crush->get_bucket_type(qi.id)) << " "
2083 << crush->get_item_name(qi.id);
2084 }
2085 else
2086 {
2087 *out << "osd." << qi.id;
2088 }
2089 *out << "\n";
2090 }
2091};
2092
2093
2094class CrushTreeFormattingDumper : public CrushTreeDumper::FormattingDumper {
2095public:
2096 typedef CrushTreeDumper::FormattingDumper Parent;
2097
2098 explicit CrushTreeFormattingDumper(const CrushWrapper *crush)
2099 : Parent(crush) {}
2100
2101 void dump(Formatter *f) {
2102 f->open_array_section("nodes");
2103 Parent::dump(f);
2104 f->close_section();
2105 f->open_array_section("stray");
2106 f->close_section();
2107 }
2108};
2109
2110
2111void CrushWrapper::dump_tree(ostream *out, Formatter *f) const
2112{
2113 if (out)
2114 CrushTreePlainDumper(this).dump(out);
2115 if (f)
2116 CrushTreeFormattingDumper(this).dump(f);
2117}
2118
2119void CrushWrapper::generate_test_instances(list<CrushWrapper*>& o)
2120{
2121 o.push_back(new CrushWrapper);
2122 // fixme
2123}
2124
2125int CrushWrapper::_get_osd_pool_default_crush_replicated_ruleset(CephContext *cct,
2126 bool quiet)
2127{
2128 int crush_ruleset = cct->_conf->osd_pool_default_crush_rule;
2129 if (crush_ruleset == -1) {
2130 crush_ruleset = cct->_conf->osd_pool_default_crush_replicated_ruleset;
2131 } else if (!quiet) {
2132 ldout(cct, 0) << "osd_pool_default_crush_rule is deprecated "
2133 << "use osd_pool_default_crush_replicated_ruleset instead"
2134 << dendl;
2135 ldout(cct, 0) << "osd_pool_default_crush_rule = "
2136 << cct->_conf-> osd_pool_default_crush_rule << " overrides "
2137 << "osd_pool_default_crush_replicated_ruleset = "
2138 << cct->_conf->osd_pool_default_crush_replicated_ruleset
2139 << dendl;
2140 }
2141
2142 return crush_ruleset;
2143}
2144
2145/**
2146 * Determine the default CRUSH ruleset ID to be used with
2147 * newly created replicated pools.
2148 *
2149 * @returns a ruleset ID (>=0) or -1 if no suitable ruleset found
2150 */
2151int CrushWrapper::get_osd_pool_default_crush_replicated_ruleset(CephContext *cct)
2152{
2153 int crush_ruleset = _get_osd_pool_default_crush_replicated_ruleset(cct,
2154 false);
2155 if (crush_ruleset == CEPH_DEFAULT_CRUSH_REPLICATED_RULESET) {
2156 crush_ruleset = find_first_ruleset(pg_pool_t::TYPE_REPLICATED);
2157 } else if (!ruleset_exists(crush_ruleset)) {
2158 crush_ruleset = -1; // match find_first_ruleset() retval
2159 }
2160
2161 return crush_ruleset;
2162}
2163
2164bool CrushWrapper::is_valid_crush_name(const string& s)
2165{
2166 if (s.empty())
2167 return false;
2168 for (string::const_iterator p = s.begin(); p != s.end(); ++p) {
2169 if (!(*p == '-') &&
2170 !(*p == '_') &&
2171 !(*p == '.') &&
2172 !(*p >= '0' && *p <= '9') &&
2173 !(*p >= 'A' && *p <= 'Z') &&
2174 !(*p >= 'a' && *p <= 'z'))
2175 return false;
2176 }
2177 return true;
2178}
2179
2180bool CrushWrapper::is_valid_crush_loc(CephContext *cct,
2181 const map<string,string>& loc)
2182{
2183 for (map<string,string>::const_iterator l = loc.begin(); l != loc.end(); ++l) {
2184 if (!is_valid_crush_name(l->first) ||
2185 !is_valid_crush_name(l->second)) {
2186 ldout(cct, 1) << "loc["
2187 << l->first << "] = '"
2188 << l->second << "' not a valid crush name ([A-Za-z0-9_-.]+)"
2189 << dendl;
2190 return false;
2191 }
2192 }
2193 return true;
2194}
2195
2196int CrushWrapper::_choose_type_stack(
2197 CephContext *cct,
2198 const vector<pair<int,int>>& stack,
2199 const set<int>& overfull,
2200 const vector<int>& underfull,
2201 const vector<int>& orig,
2202 vector<int>::const_iterator& i,
2203 set<int>& used,
2204 vector<int> *pw) const
2205{
2206 vector<int> w = *pw;
2207 vector<int> o;
2208
2209 ldout(cct, 10) << __func__ << " stack " << stack
2210 << " orig " << orig
2211 << " at " << *i
2212 << " pw " << *pw
2213 << dendl;
2214
2215 vector<int> cumulative_fanout(stack.size());
2216 int f = 1;
2217 for (int j = (int)stack.size() - 1; j >= 0; --j) {
2218 cumulative_fanout[j] = f;
2219 f *= stack[j].second;
2220 }
2221 ldout(cct, 10) << __func__ << " cumulative_fanout " << cumulative_fanout
2222 << dendl;
2223
2224 for (unsigned j = 0; j < stack.size(); ++j) {
2225 int type = stack[j].first;
2226 int fanout = stack[j].second;
2227 int cum_fanout = cumulative_fanout[j];
2228 ldout(cct, 10) << " level " << j << ": type " << type << " fanout " << fanout
2229 << " cumulative " << cum_fanout
2230 << " w " << w << dendl;
2231 vector<int> o;
2232 auto tmpi = i;
2233 for (auto from : w) {
2234 ldout(cct, 10) << " from " << from << dendl;
2235
2236 for (int pos = 0; pos < fanout; ++pos) {
2237 if (type > 0) {
2238 // non-leaf
2239 int item = *tmpi;
2240 do {
2241 int r = get_immediate_parent_id(item, &item);
2242 if (r < 0) {
2243 ldout(cct, 10) << __func__ << " parent of " << item << " got "
2244 << cpp_strerror(r) << dendl;
2245 return -EINVAL;
2246 }
2247 } while (get_bucket_type(item) != type);
2248 o.push_back(item);
2249 ldout(cct, 10) << __func__ << " from " << *tmpi << " got " << item
2250 << " of type " << type << dendl;
2251 int n = cum_fanout;
2252 while (n-- && tmpi != orig.end())
2253 ++tmpi;
2254 } else {
2255 // leaf
2256 bool replaced = false;
2257 if (overfull.count(*i)) {
2258 for (auto item : underfull) {
2259 ldout(cct, 10) << __func__ << " pos " << pos
2260 << " was " << *i << " considering " << item
2261 << dendl;
2262 if (used.count(item)) {
2263 ldout(cct, 20) << __func__ << " in used " << used << dendl;
2264 continue;
2265 }
2266 if (!subtree_contains(from, item)) {
2267 ldout(cct, 20) << __func__ << " not in subtree " << from << dendl;
2268 continue;
2269 }
2270 if (std::find(orig.begin(), orig.end(), item) != orig.end()) {
2271 ldout(cct, 20) << __func__ << " in orig " << orig << dendl;
2272 continue;
2273 }
2274 o.push_back(item);
2275 used.insert(item);
2276 ldout(cct, 10) << __func__ << " pos " << pos << " replace "
2277 << *i << " -> " << item << dendl;
2278 replaced = true;
2279 ++i;
2280 break;
2281 }
2282 }
2283 if (!replaced) {
2284 ldout(cct, 10) << __func__ << " pos " << pos << " keep " << *i
2285 << dendl;
2286 o.push_back(*i);
2287 ++i;
2288 }
2289 if (i == orig.end()) {
2290 ldout(cct, 10) << __func__ << " end of orig, break 1" << dendl;
2291 break;
2292 }
2293 }
2294 }
2295 if (i == orig.end()) {
2296 ldout(cct, 10) << __func__ << " end of orig, break 2" << dendl;
2297 break;
2298 }
2299 }
2300 ldout(cct, 10) << __func__ << " w <- " << o << " was " << w << dendl;
2301 w.swap(o);
2302 }
2303 *pw = w;
2304 return 0;
2305}
2306
2307int CrushWrapper::try_remap_rule(
2308 CephContext *cct,
2309 int ruleno,
2310 int maxout,
2311 const set<int>& overfull,
2312 const vector<int>& underfull,
2313 const vector<int>& orig,
2314 vector<int> *out) const
2315{
2316 const crush_map *map = crush;
2317 const crush_rule *rule = get_rule(ruleno);
2318 assert(rule);
2319
2320 ldout(cct, 10) << __func__ << " ruleno " << ruleno
2321 << " numrep " << maxout << " overfull " << overfull
2322 << " underfull " << underfull << " orig " << orig
2323 << dendl;
2324 vector<int> w; // working set
2325 out->clear();
2326
2327 auto i = orig.begin();
2328 set<int> used;
2329
2330 vector<pair<int,int>> type_stack; // (type, fan-out)
2331
2332 for (unsigned step = 0; step < rule->len; ++step) {
2333 const crush_rule_step *curstep = &rule->steps[step];
2334 ldout(cct, 10) << __func__ << " step " << step << " w " << w << dendl;
2335 switch (curstep->op) {
2336 case CRUSH_RULE_TAKE:
2337 if ((curstep->arg1 >= 0 && curstep->arg1 < map->max_devices) ||
2338 (-1-curstep->arg1 >= 0 && -1-curstep->arg1 < map->max_buckets &&
2339 map->buckets[-1-curstep->arg1])) {
2340 w.clear();
2341 w.push_back(curstep->arg1);
2342 ldout(cct, 10) << __func__ << " take " << w << dendl;
2343 } else {
2344 ldout(cct, 1) << " bad take value " << curstep->arg1 << dendl;
2345 }
2346 break;
2347
2348 case CRUSH_RULE_CHOOSELEAF_FIRSTN:
2349 case CRUSH_RULE_CHOOSELEAF_INDEP:
2350 {
2351 int numrep = curstep->arg1;
2352 int type = curstep->arg2;
2353 if (numrep <= 0)
2354 numrep += maxout;
2355 type_stack.push_back(make_pair(type, numrep));
2356 type_stack.push_back(make_pair(0, 1));
2357 int r = _choose_type_stack(cct, type_stack, overfull, underfull, orig,
2358 i, used, &w);
2359 if (r < 0)
2360 return r;
2361 type_stack.clear();
2362 }
2363 break;
2364
2365 case CRUSH_RULE_CHOOSE_FIRSTN:
2366 case CRUSH_RULE_CHOOSE_INDEP:
2367 {
2368 int numrep = curstep->arg1;
2369 int type = curstep->arg2;
2370 if (numrep <= 0)
2371 numrep += maxout;
2372 type_stack.push_back(make_pair(type, numrep));
2373 }
2374 break;
2375
2376 case CRUSH_RULE_EMIT:
2377 ldout(cct, 10) << " emit " << w << dendl;
2378 if (!type_stack.empty()) {
2379 int r = _choose_type_stack(cct, type_stack, overfull, underfull, orig,
2380 i, used, &w);
2381 if (r < 0)
2382 return r;
2383 type_stack.clear();
2384 }
2385 for (auto item : w) {
2386 out->push_back(item);
2387 }
2388 w.clear();
2389 break;
2390
2391 default:
2392 // ignore
2393 break;
2394 }
2395 }
2396
2397 return 0;
2398}