]> git.proxmox.com Git - ceph.git/blob - ceph/src/crush/CrushWrapper.h
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / crush / CrushWrapper.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #ifndef CEPH_CRUSH_WRAPPER_H
5 #define CEPH_CRUSH_WRAPPER_H
6
7 #include <stdlib.h>
8 #include <map>
9 #include <set>
10 #include <string>
11
12 #include <iosfwd>
13
14 #include "include/types.h"
15
16 extern "C" {
17 #include "crush.h"
18 #include "hash.h"
19 #include "mapper.h"
20 #include "builder.h"
21 }
22
23 #include "include/err.h"
24 #include "include/encoding.h"
25
26
27 #include "common/Mutex.h"
28
29 #include "include/assert.h"
30 #define BUG_ON(x) assert(!(x))
31
32 namespace ceph {
33 class Formatter;
34 }
35
36 WRITE_RAW_ENCODER(crush_rule_mask) // it's all u8's
37
38 inline static void encode(const crush_rule_step &s, bufferlist &bl)
39 {
40 ::encode(s.op, bl);
41 ::encode(s.arg1, bl);
42 ::encode(s.arg2, bl);
43 }
44 inline static void decode(crush_rule_step &s, bufferlist::iterator &p)
45 {
46 ::decode(s.op, p);
47 ::decode(s.arg1, p);
48 ::decode(s.arg2, p);
49 }
50
51 using namespace std;
52 class CrushWrapper {
53 public:
54 std::map<int32_t, string> type_map; /* bucket/device type names */
55 std::map<int32_t, string> name_map; /* bucket/device names */
56 std::map<int32_t, string> rule_name_map;
57 std::map<int32_t, int32_t> class_map; /* item id -> class id */
58 std::map<int32_t, string> class_name; /* class id -> class name */
59 std::map<string, int32_t> class_rname; /* class name -> class id */
60 std::map<int32_t, map<int32_t, int32_t> > class_bucket; /* bucket[id][class] == id */
61 std::map<uint64_t, crush_choose_arg_map> choose_args;
62
63 private:
64 struct crush_map *crush;
65 /* reverse maps */
66 mutable bool have_rmaps;
67 mutable std::map<string, int> type_rmap, name_rmap, rule_name_rmap;
68 void build_rmaps() const {
69 if (have_rmaps) return;
70 build_rmap(type_map, type_rmap);
71 build_rmap(name_map, name_rmap);
72 build_rmap(rule_name_map, rule_name_rmap);
73 have_rmaps = true;
74 }
75 void build_rmap(const map<int, string> &f, std::map<string, int> &r) const {
76 r.clear();
77 for (std::map<int, string>::const_iterator p = f.begin(); p != f.end(); ++p)
78 r[p->second] = p->first;
79 }
80
81 public:
82 CrushWrapper(const CrushWrapper& other);
83 const CrushWrapper& operator=(const CrushWrapper& other);
84
85 CrushWrapper() : crush(0), have_rmaps(false) {
86 create();
87 }
88 ~CrushWrapper() {
89 if (crush)
90 crush_destroy(crush);
91 choose_args_clear();
92 }
93
94 crush_map *get_crush_map() { return crush; }
95
96 /* building */
97 void create() {
98 if (crush)
99 crush_destroy(crush);
100 crush = crush_create();
101 choose_args_clear();
102 assert(crush);
103 have_rmaps = false;
104
105 set_tunables_default();
106 }
107
108 // tunables
109 void set_tunables_argonaut() {
110 crush->choose_local_tries = 2;
111 crush->choose_local_fallback_tries = 5;
112 crush->choose_total_tries = 19;
113 crush->chooseleaf_descend_once = 0;
114 crush->chooseleaf_vary_r = 0;
115 crush->chooseleaf_stable = 0;
116 crush->allowed_bucket_algs = CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
117 }
118 void set_tunables_bobtail() {
119 crush->choose_local_tries = 0;
120 crush->choose_local_fallback_tries = 0;
121 crush->choose_total_tries = 50;
122 crush->chooseleaf_descend_once = 1;
123 crush->chooseleaf_vary_r = 0;
124 crush->chooseleaf_stable = 0;
125 crush->allowed_bucket_algs = CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
126 }
127 void set_tunables_firefly() {
128 crush->choose_local_tries = 0;
129 crush->choose_local_fallback_tries = 0;
130 crush->choose_total_tries = 50;
131 crush->chooseleaf_descend_once = 1;
132 crush->chooseleaf_vary_r = 1;
133 crush->chooseleaf_stable = 0;
134 crush->allowed_bucket_algs = CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
135 }
136 void set_tunables_hammer() {
137 crush->choose_local_tries = 0;
138 crush->choose_local_fallback_tries = 0;
139 crush->choose_total_tries = 50;
140 crush->chooseleaf_descend_once = 1;
141 crush->chooseleaf_vary_r = 1;
142 crush->chooseleaf_stable = 0;
143 crush->allowed_bucket_algs =
144 (1 << CRUSH_BUCKET_UNIFORM) |
145 (1 << CRUSH_BUCKET_LIST) |
146 (1 << CRUSH_BUCKET_STRAW) |
147 (1 << CRUSH_BUCKET_STRAW2);
148 }
149 void set_tunables_jewel() {
150 crush->choose_local_tries = 0;
151 crush->choose_local_fallback_tries = 0;
152 crush->choose_total_tries = 50;
153 crush->chooseleaf_descend_once = 1;
154 crush->chooseleaf_vary_r = 1;
155 crush->chooseleaf_stable = 1;
156 crush->allowed_bucket_algs =
157 (1 << CRUSH_BUCKET_UNIFORM) |
158 (1 << CRUSH_BUCKET_LIST) |
159 (1 << CRUSH_BUCKET_STRAW) |
160 (1 << CRUSH_BUCKET_STRAW2);
161 }
162
163 void set_tunables_legacy() {
164 set_tunables_argonaut();
165 crush->straw_calc_version = 0;
166 }
167 void set_tunables_optimal() {
168 set_tunables_jewel();
169 crush->straw_calc_version = 1;
170 }
171 void set_tunables_default() {
172 set_tunables_hammer();
173 crush->straw_calc_version = 1;
174 }
175
176 int get_choose_local_tries() const {
177 return crush->choose_local_tries;
178 }
179 void set_choose_local_tries(int n) {
180 crush->choose_local_tries = n;
181 }
182
183 int get_choose_local_fallback_tries() const {
184 return crush->choose_local_fallback_tries;
185 }
186 void set_choose_local_fallback_tries(int n) {
187 crush->choose_local_fallback_tries = n;
188 }
189
190 int get_choose_total_tries() const {
191 return crush->choose_total_tries;
192 }
193 void set_choose_total_tries(int n) {
194 crush->choose_total_tries = n;
195 }
196
197 int get_chooseleaf_descend_once() const {
198 return crush->chooseleaf_descend_once;
199 }
200 void set_chooseleaf_descend_once(int n) {
201 crush->chooseleaf_descend_once = !!n;
202 }
203
204 int get_chooseleaf_vary_r() const {
205 return crush->chooseleaf_vary_r;
206 }
207 void set_chooseleaf_vary_r(int n) {
208 crush->chooseleaf_vary_r = n;
209 }
210
211 int get_chooseleaf_stable() const {
212 return crush->chooseleaf_stable;
213 }
214 void set_chooseleaf_stable(int n) {
215 crush->chooseleaf_stable = n;
216 }
217
218 int get_straw_calc_version() const {
219 return crush->straw_calc_version;
220 }
221 void set_straw_calc_version(int n) {
222 crush->straw_calc_version = n;
223 }
224
225 unsigned get_allowed_bucket_algs() const {
226 return crush->allowed_bucket_algs;
227 }
228 void set_allowed_bucket_algs(unsigned n) {
229 crush->allowed_bucket_algs = n;
230 }
231
232 bool has_argonaut_tunables() const {
233 return
234 crush->choose_local_tries == 2 &&
235 crush->choose_local_fallback_tries == 5 &&
236 crush->choose_total_tries == 19 &&
237 crush->chooseleaf_descend_once == 0 &&
238 crush->chooseleaf_vary_r == 0 &&
239 crush->chooseleaf_stable == 0 &&
240 crush->allowed_bucket_algs == CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
241 }
242 bool has_bobtail_tunables() const {
243 return
244 crush->choose_local_tries == 0 &&
245 crush->choose_local_fallback_tries == 0 &&
246 crush->choose_total_tries == 50 &&
247 crush->chooseleaf_descend_once == 1 &&
248 crush->chooseleaf_vary_r == 0 &&
249 crush->chooseleaf_stable == 0 &&
250 crush->allowed_bucket_algs == CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
251 }
252 bool has_firefly_tunables() const {
253 return
254 crush->choose_local_tries == 0 &&
255 crush->choose_local_fallback_tries == 0 &&
256 crush->choose_total_tries == 50 &&
257 crush->chooseleaf_descend_once == 1 &&
258 crush->chooseleaf_vary_r == 1 &&
259 crush->chooseleaf_stable == 0 &&
260 crush->allowed_bucket_algs == CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
261 }
262 bool has_hammer_tunables() const {
263 return
264 crush->choose_local_tries == 0 &&
265 crush->choose_local_fallback_tries == 0 &&
266 crush->choose_total_tries == 50 &&
267 crush->chooseleaf_descend_once == 1 &&
268 crush->chooseleaf_vary_r == 1 &&
269 crush->chooseleaf_stable == 0 &&
270 crush->allowed_bucket_algs == ((1 << CRUSH_BUCKET_UNIFORM) |
271 (1 << CRUSH_BUCKET_LIST) |
272 (1 << CRUSH_BUCKET_STRAW) |
273 (1 << CRUSH_BUCKET_STRAW2));
274 }
275 bool has_jewel_tunables() const {
276 return
277 crush->choose_local_tries == 0 &&
278 crush->choose_local_fallback_tries == 0 &&
279 crush->choose_total_tries == 50 &&
280 crush->chooseleaf_descend_once == 1 &&
281 crush->chooseleaf_vary_r == 1 &&
282 crush->chooseleaf_stable == 1 &&
283 crush->allowed_bucket_algs == ((1 << CRUSH_BUCKET_UNIFORM) |
284 (1 << CRUSH_BUCKET_LIST) |
285 (1 << CRUSH_BUCKET_STRAW) |
286 (1 << CRUSH_BUCKET_STRAW2));
287 }
288
289 bool has_optimal_tunables() const {
290 return has_jewel_tunables();
291 }
292 bool has_legacy_tunables() const {
293 return has_argonaut_tunables();
294 }
295
296 bool has_nondefault_tunables() const {
297 return
298 (crush->choose_local_tries != 2 ||
299 crush->choose_local_fallback_tries != 5 ||
300 crush->choose_total_tries != 19);
301 }
302 bool has_nondefault_tunables2() const {
303 return
304 crush->chooseleaf_descend_once != 0;
305 }
306 bool has_nondefault_tunables3() const {
307 return
308 crush->chooseleaf_vary_r != 0;
309 }
310 bool has_nondefault_tunables5() const {
311 return
312 crush->chooseleaf_stable != 0;
313 }
314
315 bool has_v2_rules() const;
316 bool has_v3_rules() const;
317 bool has_v4_buckets() const;
318 bool has_v5_rules() const;
319 bool has_chooseargs() const; // any chooseargs
320 bool has_incompat_chooseargs() const; // chooseargs that can't be made compat
321
322 bool is_v2_rule(unsigned ruleid) const;
323 bool is_v3_rule(unsigned ruleid) const;
324 bool is_v5_rule(unsigned ruleid) const;
325
326 string get_min_required_version() const {
327 if (has_v5_rules() || has_nondefault_tunables5())
328 return "jewel";
329 else if (has_v4_buckets())
330 return "hammer";
331 else if (has_nondefault_tunables3())
332 return "firefly";
333 else if (has_nondefault_tunables2() || has_nondefault_tunables())
334 return "bobtail";
335 else
336 return "argonaut";
337 }
338
339 // default bucket types
340 unsigned get_default_bucket_alg() const {
341 // in order of preference
342 if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_STRAW2))
343 return CRUSH_BUCKET_STRAW2;
344 if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_STRAW))
345 return CRUSH_BUCKET_STRAW;
346 if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_TREE))
347 return CRUSH_BUCKET_TREE;
348 if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_LIST))
349 return CRUSH_BUCKET_LIST;
350 if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_UNIFORM))
351 return CRUSH_BUCKET_UNIFORM;
352 return 0;
353 }
354
355 // bucket types
356 int get_num_type_names() const {
357 return type_map.size();
358 }
359 int get_type_id(const string& name) const {
360 build_rmaps();
361 if (type_rmap.count(name))
362 return type_rmap[name];
363 return -1;
364 }
365 const char *get_type_name(int t) const {
366 std::map<int,string>::const_iterator p = type_map.find(t);
367 if (p != type_map.end())
368 return p->second.c_str();
369 return 0;
370 }
371 void set_type_name(int i, const string& name) {
372 type_map[i] = name;
373 if (have_rmaps)
374 type_rmap[name] = i;
375 }
376
377 // item/bucket names
378 bool name_exists(const string& name) const {
379 build_rmaps();
380 return name_rmap.count(name);
381 }
382 bool item_exists(int i) const {
383 return name_map.count(i);
384 }
385 int get_item_id(const string& name) const {
386 build_rmaps();
387 if (name_rmap.count(name))
388 return name_rmap[name];
389 return 0; /* hrm */
390 }
391 const char *get_item_name(int t) const {
392 std::map<int,string>::const_iterator p = name_map.find(t);
393 if (p != name_map.end())
394 return p->second.c_str();
395 return 0;
396 }
397 int set_item_name(int i, const string& name) {
398 if (!is_valid_crush_name(name))
399 return -EINVAL;
400 name_map[i] = name;
401 if (have_rmaps)
402 name_rmap[name] = i;
403 return 0;
404 }
405 bool id_has_class(int i) {
406 int idout;
407 int classout;
408 if (split_id_class(i, &idout, &classout) != 0)
409 return false;
410 return classout != -1;
411 }
412 int split_id_class(int i, int *idout, int *classout) const;
413
414 bool class_exists(const string& name) const {
415 return class_rname.count(name);
416 }
417 const char *get_class_name(int i) const {
418 std::map<int,string>::const_iterator p = class_name.find(i);
419 if (p != class_name.end())
420 return p->second.c_str();
421 return 0;
422 }
423 int get_class_id(const string& name) const {
424 std::map<string,int>::const_iterator p = class_rname.find(name);
425 if (p != class_rname.end())
426 return p->second;
427 else
428 return -EINVAL;
429 }
430 int remove_class_name(const string& name) {
431 std::map<string,int>::const_iterator p = class_rname.find(name);
432 if (p == class_rname.end())
433 return -ENOENT;
434 int class_id = p->second;
435 std::map<int,string>::const_iterator q = class_name.find(class_id);
436 if (q == class_name.end())
437 return -ENOENT;
438 class_rname.erase(name);
439 class_name.erase(class_id);
440 return 0;
441 }
442 int get_or_create_class_id(const string& name) {
443 int c = get_class_id(name);
444 if (c < 0) {
445 int i = class_name.size();
446 class_name[i] = name;
447 class_rname[name] = i;
448 return i;
449 } else {
450 return c;
451 }
452 }
453
454 const char *get_item_class(int t) const {
455 std::map<int,int>::const_iterator p = class_map.find(t);
456 if (p == class_map.end())
457 return 0;
458 return get_class_name(p->second);
459 }
460 int set_item_class(int i, const string& name) {
461 if (!is_valid_crush_name(name))
462 return -EINVAL;
463 class_map[i] = get_or_create_class_id(name);
464 return 0;
465 }
466 int set_item_class(int i, int c) {
467 class_map[i] = c;
468 return c;
469 }
470
471 int can_rename_item(const string& srcname,
472 const string& dstname,
473 ostream *ss) const;
474 int rename_item(const string& srcname,
475 const string& dstname,
476 ostream *ss);
477 int can_rename_bucket(const string& srcname,
478 const string& dstname,
479 ostream *ss) const;
480 int rename_bucket(const string& srcname,
481 const string& dstname,
482 ostream *ss);
483
484 // rule names
485 bool rule_exists(string name) const {
486 build_rmaps();
487 return rule_name_rmap.count(name);
488 }
489 int get_rule_id(string name) const {
490 build_rmaps();
491 if (rule_name_rmap.count(name))
492 return rule_name_rmap[name];
493 return -ENOENT;
494 }
495 const char *get_rule_name(int t) const {
496 std::map<int,string>::const_iterator p = rule_name_map.find(t);
497 if (p != rule_name_map.end())
498 return p->second.c_str();
499 return 0;
500 }
501 void set_rule_name(int i, const string& name) {
502 rule_name_map[i] = name;
503 if (have_rmaps)
504 rule_name_rmap[name] = i;
505 }
506
507
508 /**
509 * find tree nodes referenced by rules by a 'take' command
510 *
511 * Note that these may not be parentless roots.
512 */
513 void find_takes(set<int>& roots) const;
514
515 /**
516 * find tree roots
517 *
518 * These are parentless nodes in the map.
519 */
520 void find_roots(set<int>& roots) const;
521
522 /**
523 * see if an item is contained within a subtree
524 *
525 * @param root haystack
526 * @param item needle
527 * @return true if the item is located beneath the given node
528 */
529 bool subtree_contains(int root, int item) const;
530
531 private:
532 /**
533 * search for an item in any bucket
534 *
535 * @param i item
536 * @return true if present
537 */
538 bool _search_item_exists(int i) const;
539 public:
540
541 /**
542 * see if item is located where we think it is
543 *
544 * This verifies that the given item is located at a particular
545 * location in the hierarchy. However, that check is imprecise; we
546 * are actually verifying that the most specific location key/value
547 * is correct. For example, if loc specifies that rack=foo and
548 * host=bar, it will verify that host=bar is correct; any placement
549 * above that level in the hierarchy is ignored. This matches the
550 * semantics for insert_item().
551 *
552 * @param cct cct
553 * @param item item id
554 * @param loc location to check (map of type to bucket names)
555 * @param weight optional pointer to weight of item at that location
556 * @return true if item is at specified location
557 */
558 bool check_item_loc(CephContext *cct, int item, const map<string,string>& loc, int *iweight);
559 bool check_item_loc(CephContext *cct, int item, const map<string,string>& loc, float *weight) {
560 int iweight;
561 bool ret = check_item_loc(cct, item, loc, &iweight);
562 if (weight)
563 *weight = (float)iweight / (float)0x10000;
564 return ret;
565 }
566
567
568 /**
569 * returns the (type, name) of the parent bucket of id
570 *
571 * FIXME: ambiguous for items that occur multiple times in the map
572 */
573 pair<string,string> get_immediate_parent(int id, int *ret = NULL);
574 int get_immediate_parent_id(int id, int *parent) const;
575
576 /**
577 * get the fully qualified location of a device by successively finding
578 * parents beginning at ID and ending at highest type number specified in
579 * the CRUSH map which assumes that if device foo is under device bar, the
580 * type_id of foo < bar where type_id is the integer specified in the CRUSH map
581 *
582 * returns the location in the form of (type=foo) where type is a type of bucket
583 * specified in the CRUSH map and foo is a name specified in the CRUSH map
584 */
585 map<string, string> get_full_location(int id);
586
587 /*
588 * identical to get_full_location(int id) although it returns the type/name
589 * pairs in the order they occur in the hierarchy.
590 *
591 * returns -ENOENT if id is not found.
592 */
593 int get_full_location_ordered(int id, vector<pair<string, string> >& path);
594
595 /**
596 * returns (type_id, type) of all parent buckets between id and
597 * default, can be used to check for anomolous CRUSH maps
598 */
599 map<int, string> get_parent_hierarchy(int id);
600
601 /**
602 * enumerate immediate children of given node
603 *
604 * @param id parent bucket or device id
605 * @return number of items, or error
606 */
607 int get_children(int id, list<int> *children);
608
609 /**
610 * insert an item into the map at a specific position
611 *
612 * Add an item as a specific location of the hierarchy.
613 * Specifically, we look for the most specific location constraint
614 * for which a bucket already exists, and then create intervening
615 * buckets beneath that in order to place the item.
616 *
617 * Note that any location specifiers *above* the most specific match
618 * are ignored. For example, if we specify that osd.12 goes in
619 * host=foo, rack=bar, and row=baz, and rack=bar is the most
620 * specific match, we will create host=foo beneath that point and
621 * put osd.12 inside it. However, we will not verify that rack=bar
622 * is beneath row=baz or move it.
623 *
624 * In short, we will build out a hierarchy, and move leaves around,
625 * but not adjust the hierarchy's internal structure. Yet.
626 *
627 * If the item is already present in the map, we will return EEXIST.
628 * If the location key/value pairs are nonsensical
629 * (rack=nameofdevice), or location specifies that do not attach us
630 * to any existing part of the hierarchy, we will return EINVAL.
631 *
632 * @param cct cct
633 * @param id item id
634 * @param weight item weight
635 * @param name item name
636 * @param loc location (map of type to bucket names)
637 * @return 0 for success, negative on error
638 */
639 int insert_item(CephContext *cct, int id, float weight, string name, const map<string,string>& loc);
640
641 /**
642 * move a bucket in the hierarchy to the given location
643 *
644 * This has the same location and ancestor creation behavior as
645 * insert_item(), but will relocate the specified existing bucket.
646 *
647 * @param cct cct
648 * @param id bucket id
649 * @param loc location (map of type to bucket names)
650 * @return 0 for success, negative on error
651 */
652 int move_bucket(CephContext *cct, int id, const map<string,string>& loc);
653
654 /**
655 * add a link to an existing bucket in the hierarchy to the new location
656 *
657 * This has the same location and ancestor creation behavior as
658 * insert_item(), but will add a new link to the specified existing
659 * bucket.
660 *
661 * @param cct cct
662 * @param id bucket id
663 * @param loc location (map of type to bucket names)
664 * @return 0 for success, negative on error
665 */
666 int link_bucket(CephContext *cct, int id, const map<string,string>& loc);
667
668 /**
669 * add or update an item's position in the map
670 *
671 * This is analogous to insert_item, except we will move an item if
672 * it is already present.
673 *
674 * @param cct cct
675 * @param id item id
676 * @param weight item weight
677 * @param name item name
678 * @param loc location (map of type to bucket names)
679 * @return 0 for no change, 1 for successful change, negative on error
680 */
681 int update_item(CephContext *cct, int id, float weight, string name, const map<string,string>& loc);
682
683 /**
684 * create or move an item, but do not adjust its weight if it already exists
685 *
686 * @param cct cct
687 * @param item item id
688 * @param weight initial item weight (if we need to create it)
689 * @param name item name
690 * @param loc location (map of type to bucket names)
691 * @return 0 for no change, 1 for successful change, negative on error
692 */
693 int create_or_move_item(CephContext *cct, int item, float weight, string name,
694 const map<string,string>& loc);
695
696 /**
697 * remove all instances of an item from the map
698 *
699 * @param cct cct
700 * @param id item id to remove
701 * @param unlink_only unlink but do not remove bucket (useful if multiple links or not empty)
702 * @return 0 on success, negative on error
703 */
704 int remove_item(CephContext *cct, int id, bool unlink_only);
705
706 /**
707 * recursively remove buckets starting at item and stop removing
708 * when a bucket is in use.
709 *
710 * @param item id to remove
711 * @param unused true if only unused items should be removed
712 * @return 0 on success, negative on error
713 */
714 int remove_root(int item, bool unused);
715
716 /**
717 * remove all instances of an item nested beneath a certain point from the map
718 *
719 * @param cct cct
720 * @param id item id to remove
721 * @param ancestor ancestor item id under which to search for id
722 * @param unlink_only unlink but do not remove bucket (useful if bucket has multiple links or is not empty)
723 * @return 0 on success, negative on error
724 */
725 private:
726 bool _maybe_remove_last_instance(CephContext *cct, int id, bool unlink_only);
727 int _remove_item_under(CephContext *cct, int id, int ancestor, bool unlink_only);
728 bool _bucket_is_in_use(int id);
729 public:
730 int remove_item_under(CephContext *cct, int id, int ancestor, bool unlink_only);
731
732 /**
733 * calculate the locality/distance from a given id to a crush location map
734 *
735 * Specifically, we look for the lowest-valued type for which the
736 * location of id matches that described in loc.
737 *
738 * @param cct cct
739 * @param id the existing id in the map
740 * @param loc a set of key=value pairs describing a location in the hierarchy
741 */
742 int get_common_ancestor_distance(CephContext *cct, int id,
743 const std::multimap<string,string>& loc);
744
745 /**
746 * parse a set of key/value pairs out of a string vector
747 *
748 * These are used to describe a location in the CRUSH hierarchy.
749 *
750 * @param args list of strings (each key= or key=value)
751 * @param ploc pointer to a resulting location map or multimap
752 */
753 static int parse_loc_map(const std::vector<string>& args,
754 std::map<string,string> *ploc);
755 static int parse_loc_multimap(const std::vector<string>& args,
756 std::multimap<string,string> *ploc);
757
758 /**
759 * get an item's weight
760 *
761 * Will return the weight for the first instance it finds.
762 *
763 * @param id item id to check
764 * @return weight of item
765 */
766 int get_item_weight(int id) const;
767 float get_item_weightf(int id) const {
768 return (float)get_item_weight(id) / (float)0x10000;
769 }
770 int get_item_weight_in_loc(int id, const map<string,string> &loc);
771 float get_item_weightf_in_loc(int id, const map<string,string> &loc) {
772 return (float)get_item_weight_in_loc(id, loc) / (float)0x10000;
773 }
774
775 int adjust_item_weight(CephContext *cct, int id, int weight);
776 int adjust_item_weightf(CephContext *cct, int id, float weight) {
777 return adjust_item_weight(cct, id, (int)(weight * (float)0x10000));
778 }
779 int adjust_item_weight_in_loc(CephContext *cct, int id, int weight, const map<string,string>& loc);
780 int adjust_item_weightf_in_loc(CephContext *cct, int id, float weight, const map<string,string>& loc) {
781 return adjust_item_weight_in_loc(cct, id, (int)(weight * (float)0x10000), loc);
782 }
783 void reweight(CephContext *cct);
784
785 int adjust_subtree_weight(CephContext *cct, int id, int weight);
786 int adjust_subtree_weightf(CephContext *cct, int id, float weight) {
787 return adjust_subtree_weight(cct, id, (int)(weight * (float)0x10000));
788 }
789
790 /// check if item id is present in the map hierarchy
791 bool check_item_present(int id) const;
792
793
794 /*** devices ***/
795 int get_max_devices() const {
796 if (!crush) return 0;
797 return crush->max_devices;
798 }
799
800
801 /*** rules ***/
802 private:
803 crush_rule *get_rule(unsigned ruleno) const {
804 if (!crush) return (crush_rule *)(-ENOENT);
805 if (ruleno >= crush->max_rules)
806 return 0;
807 return crush->rules[ruleno];
808 }
809 crush_rule_step *get_rule_step(unsigned ruleno, unsigned step) const {
810 crush_rule *n = get_rule(ruleno);
811 if (IS_ERR(n)) return (crush_rule_step *)(-EINVAL);
812 if (step >= n->len) return (crush_rule_step *)(-EINVAL);
813 return &n->steps[step];
814 }
815
816 public:
817 /* accessors */
818 int get_max_rules() const {
819 if (!crush) return 0;
820 return crush->max_rules;
821 }
822 bool rule_exists(unsigned ruleno) const {
823 if (!crush) return false;
824 if (ruleno < crush->max_rules &&
825 crush->rules[ruleno] != NULL)
826 return true;
827 return false;
828 }
829 int get_rule_len(unsigned ruleno) const {
830 crush_rule *r = get_rule(ruleno);
831 if (IS_ERR(r)) return PTR_ERR(r);
832 return r->len;
833 }
834 int get_rule_mask_ruleset(unsigned ruleno) const {
835 crush_rule *r = get_rule(ruleno);
836 if (IS_ERR(r)) return -1;
837 return r->mask.ruleset;
838 }
839 int get_rule_mask_type(unsigned ruleno) const {
840 crush_rule *r = get_rule(ruleno);
841 if (IS_ERR(r)) return -1;
842 return r->mask.type;
843 }
844 int get_rule_mask_min_size(unsigned ruleno) const {
845 crush_rule *r = get_rule(ruleno);
846 if (IS_ERR(r)) return -1;
847 return r->mask.min_size;
848 }
849 int get_rule_mask_max_size(unsigned ruleno) const {
850 crush_rule *r = get_rule(ruleno);
851 if (IS_ERR(r)) return -1;
852 return r->mask.max_size;
853 }
854 int get_rule_op(unsigned ruleno, unsigned step) const {
855 crush_rule_step *s = get_rule_step(ruleno, step);
856 if (IS_ERR(s)) return PTR_ERR(s);
857 return s->op;
858 }
859 int get_rule_arg1(unsigned ruleno, unsigned step) const {
860 crush_rule_step *s = get_rule_step(ruleno, step);
861 if (IS_ERR(s)) return PTR_ERR(s);
862 return s->arg1;
863 }
864 int get_rule_arg2(unsigned ruleno, unsigned step) const {
865 crush_rule_step *s = get_rule_step(ruleno, step);
866 if (IS_ERR(s)) return PTR_ERR(s);
867 return s->arg2;
868 }
869
870 /**
871 * calculate a map of osds to weights for a given rule
872 *
873 * Generate a map of which OSDs get how much relative weight for a
874 * given rule.
875 *
876 * @param ruleno [in] rule id
877 * @param pmap [out] map of osd to weight
878 * @return 0 for success, or negative error code
879 */
880 int get_rule_weight_osd_map(unsigned ruleno, map<int,float> *pmap);
881
882 /* modifiers */
883 int add_rule(int len, int ruleset, int type, int minsize, int maxsize, int ruleno) {
884 if (!crush) return -ENOENT;
885 crush_rule *n = crush_make_rule(len, ruleset, type, minsize, maxsize);
886 assert(n);
887 ruleno = crush_add_rule(crush, n, ruleno);
888 return ruleno;
889 }
890 int set_rule_mask_max_size(unsigned ruleno, int max_size) {
891 crush_rule *r = get_rule(ruleno);
892 if (IS_ERR(r)) return -1;
893 return r->mask.max_size = max_size;
894 }
895 int set_rule_step(unsigned ruleno, unsigned step, int op, int arg1, int arg2) {
896 if (!crush) return -ENOENT;
897 crush_rule *n = get_rule(ruleno);
898 if (!n) return -1;
899 crush_rule_set_step(n, step, op, arg1, arg2);
900 return 0;
901 }
902 int set_rule_step_take(unsigned ruleno, unsigned step, int val) {
903 return set_rule_step(ruleno, step, CRUSH_RULE_TAKE, val, 0);
904 }
905 int set_rule_step_set_choose_tries(unsigned ruleno, unsigned step, int val) {
906 return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSE_TRIES, val, 0);
907 }
908 int set_rule_step_set_choose_local_tries(unsigned ruleno, unsigned step, int val) {
909 return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES, val, 0);
910 }
911 int set_rule_step_set_choose_local_fallback_tries(unsigned ruleno, unsigned step, int val) {
912 return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES, val, 0);
913 }
914 int set_rule_step_set_chooseleaf_tries(unsigned ruleno, unsigned step, int val) {
915 return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSELEAF_TRIES, val, 0);
916 }
917 int set_rule_step_set_chooseleaf_vary_r(unsigned ruleno, unsigned step, int val) {
918 return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSELEAF_VARY_R, val, 0);
919 }
920 int set_rule_step_set_chooseleaf_stable(unsigned ruleno, unsigned step, int val) {
921 return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSELEAF_STABLE, val, 0);
922 }
923 int set_rule_step_choose_firstn(unsigned ruleno, unsigned step, int val, int type) {
924 return set_rule_step(ruleno, step, CRUSH_RULE_CHOOSE_FIRSTN, val, type);
925 }
926 int set_rule_step_choose_indep(unsigned ruleno, unsigned step, int val, int type) {
927 return set_rule_step(ruleno, step, CRUSH_RULE_CHOOSE_INDEP, val, type);
928 }
929 int set_rule_step_choose_leaf_firstn(unsigned ruleno, unsigned step, int val, int type) {
930 return set_rule_step(ruleno, step, CRUSH_RULE_CHOOSELEAF_FIRSTN, val, type);
931 }
932 int set_rule_step_choose_leaf_indep(unsigned ruleno, unsigned step, int val, int type) {
933 return set_rule_step(ruleno, step, CRUSH_RULE_CHOOSELEAF_INDEP, val, type);
934 }
935 int set_rule_step_emit(unsigned ruleno, unsigned step) {
936 return set_rule_step(ruleno, step, CRUSH_RULE_EMIT, 0, 0);
937 }
938
939 int add_simple_ruleset(string name, string root_name, string failure_domain_type,
940 string mode, int rule_type, ostream *err = 0);
941 /**
942 * @param rno ruleset id to use, -1 to pick the lowest available
943 */
944 int add_simple_ruleset_at(string name, string root_name,
945 string failure_domain_type, string mode,
946 int rule_type, int rno, ostream *err = 0);
947
948 int remove_rule(int ruleno);
949
950
951 /** buckets **/
952 private:
953 const crush_bucket *get_bucket(int id) const {
954 if (!crush)
955 return (crush_bucket *)(-EINVAL);
956 unsigned int pos = (unsigned int)(-1 - id);
957 unsigned int max_buckets = crush->max_buckets;
958 if (pos >= max_buckets)
959 return (crush_bucket *)(-ENOENT);
960 crush_bucket *ret = crush->buckets[pos];
961 if (ret == NULL)
962 return (crush_bucket *)(-ENOENT);
963 return ret;
964 }
965 crush_bucket *get_bucket(int id) {
966 if (!crush)
967 return (crush_bucket *)(-EINVAL);
968 unsigned int pos = (unsigned int)(-1 - id);
969 unsigned int max_buckets = crush->max_buckets;
970 if (pos >= max_buckets)
971 return (crush_bucket *)(-ENOENT);
972 crush_bucket *ret = crush->buckets[pos];
973 if (ret == NULL)
974 return (crush_bucket *)(-ENOENT);
975 return ret;
976 }
977 /**
978 * detach a bucket from its parent and adjust the parent weight
979 *
980 * returns the weight of the detached bucket
981 **/
982 int detach_bucket(CephContext *cct, int item){
983 if (!crush)
984 return (-EINVAL);
985
986 if (item >= 0)
987 return (-EINVAL);
988
989 // check that the bucket that we want to detach exists
990 assert(bucket_exists(item));
991
992 // get the bucket's weight
993 crush_bucket *b = get_bucket(item);
994 unsigned bucket_weight = b->weight;
995
996 // get where the bucket is located
997 pair<string, string> bucket_location = get_immediate_parent(item);
998
999 // get the id of the parent bucket
1000 int parent_id = get_item_id(bucket_location.second);
1001
1002 // get the parent bucket
1003 crush_bucket *parent_bucket = get_bucket(parent_id);
1004
1005 if (!IS_ERR(parent_bucket)) {
1006 // zero out the bucket weight
1007 crush_bucket_adjust_item_weight(crush, parent_bucket, item, 0);
1008 adjust_item_weight(cct, parent_bucket->id, parent_bucket->weight);
1009
1010 // remove the bucket from the parent
1011 crush_bucket_remove_item(crush, parent_bucket, item);
1012 } else if (PTR_ERR(parent_bucket) != -ENOENT) {
1013 return PTR_ERR(parent_bucket);
1014 }
1015
1016 // check that we're happy
1017 int test_weight = 0;
1018 map<string,string> test_location;
1019 test_location[ bucket_location.first ] = (bucket_location.second);
1020
1021 bool successful_detach = !(check_item_loc(cct, item, test_location, &test_weight));
1022 assert(successful_detach);
1023 assert(test_weight == 0);
1024
1025 return bucket_weight;
1026 }
1027
1028 public:
1029 int get_max_buckets() const {
1030 if (!crush) return -EINVAL;
1031 return crush->max_buckets;
1032 }
1033 int get_next_bucket_id() const {
1034 if (!crush) return -EINVAL;
1035 return crush_get_next_bucket_id(crush);
1036 }
1037 bool bucket_exists(int id) const {
1038 const crush_bucket *b = get_bucket(id);
1039 if (IS_ERR(b))
1040 return false;
1041 return true;
1042 }
1043 int get_bucket_weight(int id) const {
1044 const crush_bucket *b = get_bucket(id);
1045 if (IS_ERR(b)) return PTR_ERR(b);
1046 return b->weight;
1047 }
1048 float get_bucket_weightf(int id) const {
1049 const crush_bucket *b = get_bucket(id);
1050 if (IS_ERR(b)) return 0;
1051 return b->weight / (float)0x10000;
1052 }
1053 int get_bucket_type(int id) const {
1054 const crush_bucket *b = get_bucket(id);
1055 if (IS_ERR(b)) return PTR_ERR(b);
1056 return b->type;
1057 }
1058 int get_bucket_alg(int id) const {
1059 const crush_bucket *b = get_bucket(id);
1060 if (IS_ERR(b)) return PTR_ERR(b);
1061 return b->alg;
1062 }
1063 int get_bucket_hash(int id) const {
1064 const crush_bucket *b = get_bucket(id);
1065 if (IS_ERR(b)) return PTR_ERR(b);
1066 return b->hash;
1067 }
1068 int get_bucket_size(int id) const {
1069 const crush_bucket *b = get_bucket(id);
1070 if (IS_ERR(b)) return PTR_ERR(b);
1071 return b->size;
1072 }
1073 int get_bucket_item(int id, int pos) const {
1074 const crush_bucket *b = get_bucket(id);
1075 if (IS_ERR(b)) return PTR_ERR(b);
1076 if ((__u32)pos >= b->size)
1077 return PTR_ERR(b);
1078 return b->items[pos];
1079 }
1080 int get_bucket_item_weight(int id, int pos) const {
1081 const crush_bucket *b = get_bucket(id);
1082 if (IS_ERR(b)) return PTR_ERR(b);
1083 return crush_get_bucket_item_weight(b, pos);
1084 }
1085 float get_bucket_item_weightf(int id, int pos) const {
1086 const crush_bucket *b = get_bucket(id);
1087 if (IS_ERR(b)) return 0;
1088 return (float)crush_get_bucket_item_weight(b, pos) / (float)0x10000;
1089 }
1090
1091 /* modifiers */
1092 int add_bucket(int bucketno, int alg, int hash, int type, int size,
1093 int *items, int *weights, int *idout) {
1094 if (alg == 0) {
1095 alg = get_default_bucket_alg();
1096 if (alg == 0)
1097 return -EINVAL;
1098 }
1099 crush_bucket *b = crush_make_bucket(crush, alg, hash, type, size, items, weights);
1100 assert(b);
1101 return crush_add_bucket(crush, bucketno, b, idout);
1102 }
1103
1104 void finalize() {
1105 assert(crush);
1106 crush_finalize(crush);
1107 }
1108
1109 int update_device_class(CephContext *cct, int id, const string& class_name, const string& name);
1110 int device_class_clone(int original, int device_class, int *clone);
1111 bool class_is_in_use(int class_id);
1112 int populate_classes();
1113 int rebuild_roots_with_classes();
1114 /* remove unused roots generated for class devices */
1115 int trim_roots_with_class(bool unused);
1116 int cleanup_classes();
1117
1118 void start_choose_profile() {
1119 free(crush->choose_tries);
1120 /*
1121 * the original choose_total_tries value was off by one (it
1122 * counted "retries" and not "tries"). add one to alloc.
1123 */
1124 crush->choose_tries = (__u32 *)malloc(sizeof(*crush->choose_tries) * (crush->choose_total_tries + 1));
1125 memset(crush->choose_tries, 0,
1126 sizeof(*crush->choose_tries) * (crush->choose_total_tries + 1));
1127 }
1128 void stop_choose_profile() {
1129 free(crush->choose_tries);
1130 crush->choose_tries = 0;
1131 }
1132
1133 int get_choose_profile(__u32 **vec) {
1134 if (crush->choose_tries) {
1135 *vec = crush->choose_tries;
1136 return crush->choose_total_tries;
1137 }
1138 return 0;
1139 }
1140
1141
1142 void set_max_devices(int m) {
1143 crush->max_devices = m;
1144 }
1145
1146 int find_rule(int ruleset, int type, int size) const {
1147 if (!crush) return -1;
1148 return crush_find_rule(crush, ruleset, type, size);
1149 }
1150
1151 bool ruleset_exists(int const ruleset) const {
1152 for (size_t i = 0; i < crush->max_rules; ++i) {
1153 if (rule_exists(i) && crush->rules[i]->mask.ruleset == ruleset) {
1154 return true;
1155 }
1156 }
1157
1158 return false;
1159 }
1160
1161 /**
1162 * Return the lowest numbered ruleset of type `type`
1163 *
1164 * @returns a ruleset ID, or -1 if no matching rulesets found.
1165 */
1166 int find_first_ruleset(int type) const {
1167 int result = -1;
1168
1169 for (size_t i = 0; i < crush->max_rules; ++i) {
1170 if (crush->rules[i]
1171 && crush->rules[i]->mask.type == type
1172 && (crush->rules[i]->mask.ruleset < result || result == -1)) {
1173 result = crush->rules[i]->mask.ruleset;
1174 }
1175 }
1176
1177 return result;
1178 }
1179
1180 crush_choose_arg_map choose_args_get(uint64_t choose_args_index) const {
1181 auto i = choose_args.find(choose_args_index);
1182 if (i == choose_args.end()) {
1183 crush_choose_arg_map arg_map;
1184 arg_map.args = NULL;
1185 arg_map.size = 0;
1186 return arg_map;
1187 } else {
1188 return i->second;
1189 }
1190 }
1191
1192 void destroy_choose_args(crush_choose_arg_map arg_map) {
1193 for (__u32 i = 0; i < arg_map.size; i++) {
1194 crush_choose_arg *arg = &arg_map.args[i];
1195 for (__u32 j = 0; j < arg->weight_set_size; j++) {
1196 crush_weight_set *weight_set = &arg->weight_set[j];
1197 free(weight_set->weights);
1198 }
1199 if (arg->weight_set)
1200 free(arg->weight_set);
1201 if (arg->ids)
1202 free(arg->ids);
1203 }
1204 free(arg_map.args);
1205 }
1206
1207 void choose_args_clear() {
1208 for (auto w : choose_args)
1209 destroy_choose_args(w.second);
1210 choose_args.clear();
1211 }
1212
1213 template<typename WeightVector>
1214 void do_rule(int rule, int x, vector<int>& out, int maxout,
1215 const WeightVector& weight,
1216 uint64_t choose_args_index) const {
1217 int rawout[maxout];
1218 char work[crush_work_size(crush, maxout)];
1219 crush_init_workspace(crush, work);
1220 crush_choose_arg_map arg_map = choose_args_get(choose_args_index);
1221 int numrep = crush_do_rule(crush, rule, x, rawout, maxout, &weight[0],
1222 weight.size(), work, arg_map.args);
1223 if (numrep < 0)
1224 numrep = 0;
1225 out.resize(numrep);
1226 for (int i=0; i<numrep; i++)
1227 out[i] = rawout[i];
1228 }
1229
1230 int _choose_type_stack(
1231 CephContext *cct,
1232 const vector<pair<int,int>>& stack,
1233 const set<int>& overfull,
1234 const vector<int>& underfull,
1235 const vector<int>& orig,
1236 vector<int>::const_iterator& i,
1237 set<int>& used,
1238 vector<int> *pw) const;
1239
1240 int try_remap_rule(
1241 CephContext *cct,
1242 int rule,
1243 int maxout,
1244 const set<int>& overfull,
1245 const vector<int>& underfull,
1246 const vector<int>& orig,
1247 vector<int> *out) const;
1248
1249 bool check_crush_rule(int ruleset, int type, int size, ostream& ss) {
1250 assert(crush);
1251
1252 __u32 i;
1253 for (i = 0; i < crush->max_rules; i++) {
1254 if (crush->rules[i] &&
1255 crush->rules[i]->mask.ruleset == ruleset &&
1256 crush->rules[i]->mask.type == type) {
1257
1258 if (crush->rules[i]->mask.min_size <= size &&
1259 crush->rules[i]->mask.max_size >= size) {
1260 return true;
1261 } else if (size < crush->rules[i]->mask.min_size) {
1262 ss << "pool size is smaller than the crush rule min size";
1263 return false;
1264 } else {
1265 ss << "pool size is bigger than the crush rule max size";
1266 return false;
1267 }
1268 }
1269 }
1270
1271 return false;
1272 }
1273
1274 void encode(bufferlist &bl, uint64_t features) const;
1275 void decode(bufferlist::iterator &blp);
1276 void decode_crush_bucket(crush_bucket** bptr, bufferlist::iterator &blp);
1277 void dump(Formatter *f) const;
1278 void dump_rules(Formatter *f) const;
1279 void dump_rule(int ruleset, Formatter *f) const;
1280 void dump_tunables(Formatter *f) const;
1281 void dump_choose_args(Formatter *f) const;
1282 void list_rules(Formatter *f) const;
1283 void dump_tree(ostream *out, Formatter *f) const;
1284 void dump_tree(Formatter *f) const;
1285 static void generate_test_instances(list<CrushWrapper*>& o);
1286
1287 int _get_osd_pool_default_crush_replicated_ruleset(CephContext *cct,
1288 bool quiet);
1289 int get_osd_pool_default_crush_replicated_ruleset(CephContext *cct);
1290
1291 static bool is_valid_crush_name(const string& s);
1292 static bool is_valid_crush_loc(CephContext *cct,
1293 const map<string,string>& loc);
1294 };
1295 WRITE_CLASS_ENCODER_FEATURES(CrushWrapper)
1296
1297 #endif