]> git.proxmox.com Git - ceph.git/blob - ceph/src/crush/CrushWrapper.h
bump version to 15.2.4-pve1
[ceph.git] / ceph / src / crush / CrushWrapper.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #ifndef CEPH_CRUSH_WRAPPER_H
5 #define CEPH_CRUSH_WRAPPER_H
6
7 #include <stdlib.h>
8 #include <map>
9 #include <set>
10 #include <string>
11
12 #include <iosfwd>
13
14 #include "include/types.h"
15
16 extern "C" {
17 #include "crush.h"
18 #include "hash.h"
19 #include "mapper.h"
20 #include "builder.h"
21 }
22
23 #include "include/ceph_assert.h"
24 #include "include/err.h"
25 #include "include/encoding.h"
26 #include "include/mempool.h"
27
28 namespace ceph {
29 class Formatter;
30 }
31
32 namespace CrushTreeDumper {
33 typedef mempool::osdmap::map<int64_t,std::string> name_map_t;
34 }
35
36 WRITE_RAW_ENCODER(crush_rule_mask) // it's all u8's
37
38 inline void encode(const crush_rule_step &s, ceph::buffer::list &bl)
39 {
40 using ceph::encode;
41 encode(s.op, bl);
42 encode(s.arg1, bl);
43 encode(s.arg2, bl);
44 }
45 inline void decode(crush_rule_step &s, ceph::buffer::list::const_iterator &p)
46 {
47 using ceph::decode;
48 decode(s.op, p);
49 decode(s.arg1, p);
50 decode(s.arg2, p);
51 }
52
53 class CrushWrapper {
54 public:
55 // magic value used by OSDMap for a "default" fallback choose_args, used if
56 // the choose_arg_map passed to do_rule does not exist. if this also
57 // doesn't exist, fall back to canonical weights.
58 enum {
59 DEFAULT_CHOOSE_ARGS = -1
60 };
61
62 std::map<int32_t, std::string> type_map; /* bucket/device type names */
63 std::map<int32_t, std::string> name_map; /* bucket/device names */
64 std::map<int32_t, std::string> rule_name_map;
65
66 std::map<int32_t, int32_t> class_map; /* item id -> class id */
67 std::map<int32_t, std::string> class_name; /* class id -> class name */
68 std::map<std::string, int32_t> class_rname; /* class name -> class id */
69 std::map<int32_t, std::map<int32_t, int32_t> > class_bucket; /* bucket[id][class] == id */
70 std::map<int64_t, crush_choose_arg_map> choose_args;
71
72 private:
73 struct crush_map *crush = nullptr;
74
75 bool have_uniform_rules = false;
76
77 /* reverse maps */
78 mutable bool have_rmaps = false;
79 mutable std::map<std::string, int> type_rmap, name_rmap, rule_name_rmap;
80 void build_rmaps() const {
81 if (have_rmaps) return;
82 build_rmap(type_map, type_rmap);
83 build_rmap(name_map, name_rmap);
84 build_rmap(rule_name_map, rule_name_rmap);
85 have_rmaps = true;
86 }
87 void build_rmap(const std::map<int, std::string> &f, std::map<std::string, int> &r) const {
88 r.clear();
89 for (auto p = f.begin(); p != f.end(); ++p)
90 r[p->second] = p->first;
91 }
92
93 public:
94 CrushWrapper(const CrushWrapper& other);
95 const CrushWrapper& operator=(const CrushWrapper& other);
96
97 CrushWrapper() {
98 create();
99 }
100 ~CrushWrapper() {
101 if (crush)
102 crush_destroy(crush);
103 choose_args_clear();
104 }
105
106 crush_map *get_crush_map() { return crush; }
107
108 /* building */
109 void create() {
110 if (crush)
111 crush_destroy(crush);
112 crush = crush_create();
113 choose_args_clear();
114 ceph_assert(crush);
115 have_rmaps = false;
116
117 set_tunables_default();
118 }
119
120 /**
121 * true if any rule has a rule id != its position in the array
122 *
123 * These indicate "ruleset" IDs that were created by older versions
124 * of Ceph. They are cleaned up in renumber_rules so that eventually
125 * we can remove the code for handling them.
126 */
127 bool has_legacy_rule_ids() const;
128
129 /**
130 * fix rules whose ruleid != ruleset
131 *
132 * These rules were created in older versions of Ceph. The concept
133 * of a ruleset no longer exists.
134 *
135 * Return a map of old ID -> new ID. Caller must update OSDMap
136 * to use new IDs.
137 */
138 std::map<int, int> renumber_rules();
139
140 /// true if any buckets that aren't straw2
141 bool has_non_straw2_buckets() const;
142
143 // tunables
144 void set_tunables_argonaut() {
145 crush->choose_local_tries = 2;
146 crush->choose_local_fallback_tries = 5;
147 crush->choose_total_tries = 19;
148 crush->chooseleaf_descend_once = 0;
149 crush->chooseleaf_vary_r = 0;
150 crush->chooseleaf_stable = 0;
151 crush->allowed_bucket_algs = CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
152 }
153 void set_tunables_bobtail() {
154 crush->choose_local_tries = 0;
155 crush->choose_local_fallback_tries = 0;
156 crush->choose_total_tries = 50;
157 crush->chooseleaf_descend_once = 1;
158 crush->chooseleaf_vary_r = 0;
159 crush->chooseleaf_stable = 0;
160 crush->allowed_bucket_algs = CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
161 }
162 void set_tunables_firefly() {
163 crush->choose_local_tries = 0;
164 crush->choose_local_fallback_tries = 0;
165 crush->choose_total_tries = 50;
166 crush->chooseleaf_descend_once = 1;
167 crush->chooseleaf_vary_r = 1;
168 crush->chooseleaf_stable = 0;
169 crush->allowed_bucket_algs = CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
170 }
171 void set_tunables_hammer() {
172 crush->choose_local_tries = 0;
173 crush->choose_local_fallback_tries = 0;
174 crush->choose_total_tries = 50;
175 crush->chooseleaf_descend_once = 1;
176 crush->chooseleaf_vary_r = 1;
177 crush->chooseleaf_stable = 0;
178 crush->allowed_bucket_algs =
179 (1 << CRUSH_BUCKET_UNIFORM) |
180 (1 << CRUSH_BUCKET_LIST) |
181 (1 << CRUSH_BUCKET_STRAW) |
182 (1 << CRUSH_BUCKET_STRAW2);
183 }
184 void set_tunables_jewel() {
185 crush->choose_local_tries = 0;
186 crush->choose_local_fallback_tries = 0;
187 crush->choose_total_tries = 50;
188 crush->chooseleaf_descend_once = 1;
189 crush->chooseleaf_vary_r = 1;
190 crush->chooseleaf_stable = 1;
191 crush->allowed_bucket_algs =
192 (1 << CRUSH_BUCKET_UNIFORM) |
193 (1 << CRUSH_BUCKET_LIST) |
194 (1 << CRUSH_BUCKET_STRAW) |
195 (1 << CRUSH_BUCKET_STRAW2);
196 }
197
198 void set_tunables_legacy() {
199 set_tunables_argonaut();
200 crush->straw_calc_version = 0;
201 }
202 void set_tunables_optimal() {
203 set_tunables_jewel();
204 crush->straw_calc_version = 1;
205 }
206 void set_tunables_default() {
207 set_tunables_jewel();
208 crush->straw_calc_version = 1;
209 }
210
211 int get_choose_local_tries() const {
212 return crush->choose_local_tries;
213 }
214 void set_choose_local_tries(int n) {
215 crush->choose_local_tries = n;
216 }
217
218 int get_choose_local_fallback_tries() const {
219 return crush->choose_local_fallback_tries;
220 }
221 void set_choose_local_fallback_tries(int n) {
222 crush->choose_local_fallback_tries = n;
223 }
224
225 int get_choose_total_tries() const {
226 return crush->choose_total_tries;
227 }
228 void set_choose_total_tries(int n) {
229 crush->choose_total_tries = n;
230 }
231
232 int get_chooseleaf_descend_once() const {
233 return crush->chooseleaf_descend_once;
234 }
235 void set_chooseleaf_descend_once(int n) {
236 crush->chooseleaf_descend_once = !!n;
237 }
238
239 int get_chooseleaf_vary_r() const {
240 return crush->chooseleaf_vary_r;
241 }
242 void set_chooseleaf_vary_r(int n) {
243 crush->chooseleaf_vary_r = n;
244 }
245
246 int get_chooseleaf_stable() const {
247 return crush->chooseleaf_stable;
248 }
249 void set_chooseleaf_stable(int n) {
250 crush->chooseleaf_stable = n;
251 }
252
253 int get_straw_calc_version() const {
254 return crush->straw_calc_version;
255 }
256 void set_straw_calc_version(int n) {
257 crush->straw_calc_version = n;
258 }
259
260 unsigned get_allowed_bucket_algs() const {
261 return crush->allowed_bucket_algs;
262 }
263 void set_allowed_bucket_algs(unsigned n) {
264 crush->allowed_bucket_algs = n;
265 }
266
267 bool has_argonaut_tunables() const {
268 return
269 crush->choose_local_tries == 2 &&
270 crush->choose_local_fallback_tries == 5 &&
271 crush->choose_total_tries == 19 &&
272 crush->chooseleaf_descend_once == 0 &&
273 crush->chooseleaf_vary_r == 0 &&
274 crush->chooseleaf_stable == 0 &&
275 crush->allowed_bucket_algs == CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
276 }
277 bool has_bobtail_tunables() const {
278 return
279 crush->choose_local_tries == 0 &&
280 crush->choose_local_fallback_tries == 0 &&
281 crush->choose_total_tries == 50 &&
282 crush->chooseleaf_descend_once == 1 &&
283 crush->chooseleaf_vary_r == 0 &&
284 crush->chooseleaf_stable == 0 &&
285 crush->allowed_bucket_algs == CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
286 }
287 bool has_firefly_tunables() const {
288 return
289 crush->choose_local_tries == 0 &&
290 crush->choose_local_fallback_tries == 0 &&
291 crush->choose_total_tries == 50 &&
292 crush->chooseleaf_descend_once == 1 &&
293 crush->chooseleaf_vary_r == 1 &&
294 crush->chooseleaf_stable == 0 &&
295 crush->allowed_bucket_algs == CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
296 }
297 bool has_hammer_tunables() const {
298 return
299 crush->choose_local_tries == 0 &&
300 crush->choose_local_fallback_tries == 0 &&
301 crush->choose_total_tries == 50 &&
302 crush->chooseleaf_descend_once == 1 &&
303 crush->chooseleaf_vary_r == 1 &&
304 crush->chooseleaf_stable == 0 &&
305 crush->allowed_bucket_algs == ((1 << CRUSH_BUCKET_UNIFORM) |
306 (1 << CRUSH_BUCKET_LIST) |
307 (1 << CRUSH_BUCKET_STRAW) |
308 (1 << CRUSH_BUCKET_STRAW2));
309 }
310 bool has_jewel_tunables() const {
311 return
312 crush->choose_local_tries == 0 &&
313 crush->choose_local_fallback_tries == 0 &&
314 crush->choose_total_tries == 50 &&
315 crush->chooseleaf_descend_once == 1 &&
316 crush->chooseleaf_vary_r == 1 &&
317 crush->chooseleaf_stable == 1 &&
318 crush->allowed_bucket_algs == ((1 << CRUSH_BUCKET_UNIFORM) |
319 (1 << CRUSH_BUCKET_LIST) |
320 (1 << CRUSH_BUCKET_STRAW) |
321 (1 << CRUSH_BUCKET_STRAW2));
322 }
323
324 bool has_optimal_tunables() const {
325 return has_jewel_tunables();
326 }
327 bool has_legacy_tunables() const {
328 return has_argonaut_tunables();
329 }
330
331 bool has_nondefault_tunables() const {
332 return
333 (crush->choose_local_tries != 2 ||
334 crush->choose_local_fallback_tries != 5 ||
335 crush->choose_total_tries != 19);
336 }
337 bool has_nondefault_tunables2() const {
338 return
339 crush->chooseleaf_descend_once != 0;
340 }
341 bool has_nondefault_tunables3() const {
342 return
343 crush->chooseleaf_vary_r != 0;
344 }
345 bool has_nondefault_tunables5() const {
346 return
347 crush->chooseleaf_stable != 0;
348 }
349
350 bool has_v2_rules() const;
351 bool has_v3_rules() const;
352 bool has_v4_buckets() const;
353 bool has_v5_rules() const;
354 bool has_choose_args() const; // any choose_args
355 bool has_incompat_choose_args() const; // choose_args that can't be made compat
356
357 bool is_v2_rule(unsigned ruleid) const;
358 bool is_v3_rule(unsigned ruleid) const;
359 bool is_v5_rule(unsigned ruleid) const;
360
361 std::string get_min_required_version() const {
362 if (has_v5_rules() || has_nondefault_tunables5())
363 return "jewel";
364 else if (has_v4_buckets())
365 return "hammer";
366 else if (has_nondefault_tunables3())
367 return "firefly";
368 else if (has_nondefault_tunables2() || has_nondefault_tunables())
369 return "bobtail";
370 else
371 return "argonaut";
372 }
373
374 // default bucket types
375 unsigned get_default_bucket_alg() const {
376 // in order of preference
377 if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_STRAW2))
378 return CRUSH_BUCKET_STRAW2;
379 if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_STRAW))
380 return CRUSH_BUCKET_STRAW;
381 if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_TREE))
382 return CRUSH_BUCKET_TREE;
383 if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_LIST))
384 return CRUSH_BUCKET_LIST;
385 if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_UNIFORM))
386 return CRUSH_BUCKET_UNIFORM;
387 return 0;
388 }
389
390 // bucket types
391 int get_num_type_names() const {
392 return type_map.size();
393 }
394 int get_max_type_id() const {
395 if (type_map.empty())
396 return 0;
397 return type_map.rbegin()->first;
398 }
399 int get_type_id(const std::string& name) const {
400 build_rmaps();
401 if (type_rmap.count(name))
402 return type_rmap[name];
403 return -1;
404 }
405 const char *get_type_name(int t) const {
406 auto p = type_map.find(t);
407 if (p != type_map.end())
408 return p->second.c_str();
409 return 0;
410 }
411 void set_type_name(int i, const std::string& name) {
412 type_map[i] = name;
413 if (have_rmaps)
414 type_rmap[name] = i;
415 }
416
417 // item/bucket names
418 bool name_exists(const std::string& name) const {
419 build_rmaps();
420 return name_rmap.count(name);
421 }
422 bool item_exists(int i) const {
423 return name_map.count(i);
424 }
425 int get_item_id(const std::string& name) const {
426 build_rmaps();
427 if (name_rmap.count(name))
428 return name_rmap[name];
429 return 0; /* hrm */
430 }
431 const char *get_item_name(int t) const {
432 std::map<int,std::string>::const_iterator p = name_map.find(t);
433 if (p != name_map.end())
434 return p->second.c_str();
435 return 0;
436 }
437 int set_item_name(int i, const std::string& name) {
438 if (!is_valid_crush_name(name))
439 return -EINVAL;
440 name_map[i] = name;
441 if (have_rmaps)
442 name_rmap[name] = i;
443 return 0;
444 }
445 void swap_names(int a, int b) {
446 std::string an = name_map[a];
447 std::string bn = name_map[b];
448 name_map[a] = bn;
449 name_map[b] = an;
450 if (have_rmaps) {
451 name_rmap[an] = b;
452 name_rmap[bn] = a;
453 }
454 }
455 int split_id_class(int i, int *idout, int *classout) const;
456
457 bool class_exists(const std::string& name) const {
458 return class_rname.count(name);
459 }
460 const char *get_class_name(int i) const {
461 auto p = class_name.find(i);
462 if (p != class_name.end())
463 return p->second.c_str();
464 return 0;
465 }
466 int get_class_id(const std::string& name) const {
467 auto p = class_rname.find(name);
468 if (p != class_rname.end())
469 return p->second;
470 else
471 return -EINVAL;
472 }
473 int remove_class_name(const std::string& name) {
474 auto p = class_rname.find(name);
475 if (p == class_rname.end())
476 return -ENOENT;
477 int class_id = p->second;
478 auto q = class_name.find(class_id);
479 if (q == class_name.end())
480 return -ENOENT;
481 class_rname.erase(name);
482 class_name.erase(class_id);
483 return 0;
484 }
485
486 int32_t _alloc_class_id() const;
487
488 int get_or_create_class_id(const std::string& name) {
489 int c = get_class_id(name);
490 if (c < 0) {
491 int i = _alloc_class_id();
492 class_name[i] = name;
493 class_rname[name] = i;
494 return i;
495 } else {
496 return c;
497 }
498 }
499
500 const char *get_item_class(int t) const {
501 std::map<int,int>::const_iterator p = class_map.find(t);
502 if (p == class_map.end())
503 return 0;
504 return get_class_name(p->second);
505 }
506 int get_item_class_id(int t) const {
507 auto p = class_map.find(t);
508 if (p == class_map.end())
509 return -ENOENT;
510 return p->second;
511 }
512 int set_item_class(int i, const std::string& name) {
513 if (!is_valid_crush_name(name))
514 return -EINVAL;
515 class_map[i] = get_or_create_class_id(name);
516 return 0;
517 }
518 int set_item_class(int i, int c) {
519 class_map[i] = c;
520 return c;
521 }
522 void get_devices_by_class(const std::string &name,
523 std::set<int> *devices) const {
524 ceph_assert(devices);
525 devices->clear();
526 if (!class_exists(name)) {
527 return;
528 }
529 auto cid = get_class_id(name);
530 for (auto& p : class_map) {
531 if (p.first >= 0 && p.second == cid) {
532 devices->insert(p.first);
533 }
534 }
535 }
536 void class_remove_item(int i) {
537 auto it = class_map.find(i);
538 if (it == class_map.end()) {
539 return;
540 }
541 class_map.erase(it);
542 }
543 int can_rename_item(const std::string& srcname,
544 const std::string& dstname,
545 std::ostream *ss) const;
546 int rename_item(const std::string& srcname,
547 const std::string& dstname,
548 std::ostream *ss);
549 int can_rename_bucket(const std::string& srcname,
550 const std::string& dstname,
551 std::ostream *ss) const;
552 int rename_bucket(const std::string& srcname,
553 const std::string& dstname,
554 std::ostream *ss);
555
556 // rule names
557 int rename_rule(const std::string& srcname,
558 const std::string& dstname,
559 std::ostream *ss);
560 bool rule_exists(std::string name) const {
561 build_rmaps();
562 return rule_name_rmap.count(name);
563 }
564 int get_rule_id(std::string name) const {
565 build_rmaps();
566 if (rule_name_rmap.count(name))
567 return rule_name_rmap[name];
568 return -ENOENT;
569 }
570 const char *get_rule_name(int t) const {
571 auto p = rule_name_map.find(t);
572 if (p != rule_name_map.end())
573 return p->second.c_str();
574 return 0;
575 }
576 void set_rule_name(int i, const std::string& name) {
577 rule_name_map[i] = name;
578 if (have_rmaps)
579 rule_name_rmap[name] = i;
580 }
581 bool is_shadow_item(int id) const {
582 const char *name = get_item_name(id);
583 return name && !is_valid_crush_name(name);
584 }
585
586
587 /**
588 * find tree nodes referenced by rules by a 'take' command
589 *
590 * Note that these may not be parentless roots.
591 */
592 void find_takes(std::set<int> *roots) const;
593 void find_takes_by_rule(int rule, std::set<int> *roots) const;
594
595 /**
596 * find tree roots
597 *
598 * These are parentless nodes in the map.
599 */
600 void find_roots(std::set<int> *roots) const;
601
602
603 /**
604 * find tree roots that contain shadow (device class) items only
605 */
606 void find_shadow_roots(std::set<int> *roots) const {
607 std::set<int> all;
608 find_roots(&all);
609 for (auto& p: all) {
610 if (is_shadow_item(p)) {
611 roots->insert(p);
612 }
613 }
614 }
615
616 /**
617 * find tree roots that are not shadow (device class) items
618 *
619 * These are parentless nodes in the map that are not shadow
620 * items for device classes.
621 */
622 void find_nonshadow_roots(std::set<int> *roots) const {
623 std::set<int> all;
624 find_roots(&all);
625 for (auto& p: all) {
626 if (!is_shadow_item(p)) {
627 roots->insert(p);
628 }
629 }
630 }
631
632 /**
633 * see if an item is contained within a subtree
634 *
635 * @param root haystack
636 * @param item needle
637 * @return true if the item is located beneath the given node
638 */
639 bool subtree_contains(int root, int item) const;
640
641 private:
642 /**
643 * search for an item in any bucket
644 *
645 * @param i item
646 * @return true if present
647 */
648 bool _search_item_exists(int i) const;
649 bool is_parent_of(int child, int p) const;
650 public:
651
652 /**
653 * see if item is located where we think it is
654 *
655 * This verifies that the given item is located at a particular
656 * location in the hierarchy. However, that check is imprecise; we
657 * are actually verifying that the most specific location key/value
658 * is correct. For example, if loc specifies that rack=foo and
659 * host=bar, it will verify that host=bar is correct; any placement
660 * above that level in the hierarchy is ignored. This matches the
661 * semantics for insert_item().
662 *
663 * @param cct cct
664 * @param item item id
665 * @param loc location to check (map of type to bucket names)
666 * @param weight optional pointer to weight of item at that location
667 * @return true if item is at specified location
668 */
669 bool check_item_loc(CephContext *cct, int item,
670 const std::map<std::string,std::string>& loc,
671 int *iweight);
672 bool check_item_loc(CephContext *cct, int item,
673 const std::map<std::string,std::string>& loc,
674 float *weight) {
675 int iweight;
676 bool ret = check_item_loc(cct, item, loc, &iweight);
677 if (weight)
678 *weight = (float)iweight / (float)0x10000;
679 return ret;
680 }
681
682
683 /**
684 * returns the (type, name) of the parent bucket of id
685 *
686 * FIXME: ambiguous for items that occur multiple times in the map
687 */
688 std::pair<std::string,std::string> get_immediate_parent(int id, int *ret = NULL) const;
689
690 int get_immediate_parent_id(int id, int *parent) const;
691
692 /**
693 * return ancestor of the given type, or 0 if none
694 * can pass in a specific crush **rule** to return ancestor from that rule only
695 * (parent is always a bucket and thus <0)
696 */
697 int get_parent_of_type(int id, int type, int rule = -1) const;
698
699 /**
700 * get the fully qualified location of a device by successively finding
701 * parents beginning at ID and ending at highest type number specified in
702 * the CRUSH map which assumes that if device foo is under device bar, the
703 * type_id of foo < bar where type_id is the integer specified in the CRUSH map
704 *
705 * returns the location in the form of (type=foo) where type is a type of bucket
706 * specified in the CRUSH map and foo is a name specified in the CRUSH map
707 */
708 std::map<std::string, std::string> get_full_location(int id) const;
709
710 /**
711 * return location map for a item, by name
712 */
713 int get_full_location(
714 const std::string& name,
715 std::map<std::string,std::string> *ploc);
716
717 /*
718 * identical to get_full_location(int id) although it returns the type/name
719 * pairs in the order they occur in the hierarchy.
720 *
721 * returns -ENOENT if id is not found.
722 */
723 int get_full_location_ordered(
724 int id,
725 std::vector<std::pair<std::string, std::string> >& path) const;
726
727 /*
728 * identical to get_full_location_ordered(int id, vector<pair<string, string> >& path),
729 * although it returns a concatenated string with the type/name pairs in descending
730 * hierarchical order with format key1=val1,key2=val2.
731 *
732 * returns the location in descending hierarchy as a string.
733 */
734 std::string get_full_location_ordered_string(int id) const;
735
736 /**
737 * returns (type_id, type) of all parent buckets between id and
738 * default, can be used to check for anomalous CRUSH maps
739 */
740 std::map<int, std::string> get_parent_hierarchy(int id) const;
741
742 /**
743 * enumerate immediate children of given node
744 *
745 * @param id parent bucket or device id
746 * @return number of items, or error
747 */
748 int get_children(int id, std::list<int> *children) const;
749 /**
750 * enumerate all children of given node
751 *
752 * @param id parent bucket or device id
753 * @return number of items, or error
754 */
755 int get_all_children(int id, std::set<int> *children) const;
756 void get_children_of_type(int id,
757 int type,
758 std::vector<int> *children,
759 bool exclude_shadow = true) const;
760 /**
761 * enumerate all subtrees by type
762 */
763 void get_subtree_of_type(int type, std::vector<int> *subtrees);
764
765
766 /**
767 * verify upmapping results.
768 * return 0 on success or a negative errno on error.
769 */
770 int verify_upmap(CephContext *cct,
771 int rule_id,
772 int pool_size,
773 const std::vector<int>& up);
774
775 /**
776 * enumerate leaves(devices) of given node
777 *
778 * @param name parent bucket name
779 * @return 0 on success or a negative errno on error.
780 */
781 int get_leaves(const std::string &name, std::set<int> *leaves) const;
782
783 private:
784 int _get_leaves(int id, std::list<int> *leaves) const; // worker
785
786 public:
787 /**
788 * insert an item into the map at a specific position
789 *
790 * Add an item as a specific location of the hierarchy.
791 * Specifically, we look for the most specific location constraint
792 * for which a bucket already exists, and then create intervening
793 * buckets beneath that in order to place the item.
794 *
795 * Note that any location specifiers *above* the most specific match
796 * are ignored. For example, if we specify that osd.12 goes in
797 * host=foo, rack=bar, and row=baz, and rack=bar is the most
798 * specific match, we will create host=foo beneath that point and
799 * put osd.12 inside it. However, we will not verify that rack=bar
800 * is beneath row=baz or move it.
801 *
802 * In short, we will build out a hierarchy, and move leaves around,
803 * but not adjust the hierarchy's internal structure. Yet.
804 *
805 * If the item is already present in the map, we will return EEXIST.
806 * If the location key/value pairs are nonsensical
807 * (rack=nameofdevice), or location specifies that do not attach us
808 * to any existing part of the hierarchy, we will return EINVAL.
809 *
810 * @param cct cct
811 * @param id item id
812 * @param weight item weight
813 * @param name item name
814 * @param loc location (map of type to bucket names)
815 * @param init_weight_sets initialize weight-set weights to weight (vs 0)
816 * @return 0 for success, negative on error
817 */
818 int insert_item(CephContext *cct, int id, float weight, std::string name,
819 const std::map<std::string,std::string>& loc,
820 bool init_weight_sets=true);
821
822 /**
823 * move a bucket in the hierarchy to the given location
824 *
825 * This has the same location and ancestor creation behavior as
826 * insert_item(), but will relocate the specified existing bucket.
827 *
828 * @param cct cct
829 * @param id bucket id
830 * @param loc location (map of type to bucket names)
831 * @return 0 for success, negative on error
832 */
833 int move_bucket(CephContext *cct, int id, const std::map<std::string,std::string>& loc);
834
835 /**
836 * swap bucket contents of two buckets without touching bucket ids
837 *
838 * @param cct cct
839 * @param src bucket a
840 * @param dst bucket b
841 * @return 0 for success, negative on error
842 */
843 int swap_bucket(CephContext *cct, int src, int dst);
844
845 /**
846 * add a link to an existing bucket in the hierarchy to the new location
847 *
848 * This has the same location and ancestor creation behavior as
849 * insert_item(), but will add a new link to the specified existing
850 * bucket.
851 *
852 * @param cct cct
853 * @param id bucket id
854 * @param loc location (map of type to bucket names)
855 * @return 0 for success, negative on error
856 */
857 int link_bucket(CephContext *cct, int id,
858 const std::map<std::string,std::string>& loc);
859
860 /**
861 * add or update an item's position in the map
862 *
863 * This is analogous to insert_item, except we will move an item if
864 * it is already present.
865 *
866 * @param cct cct
867 * @param id item id
868 * @param weight item weight
869 * @param name item name
870 * @param loc location (map of type to bucket names)
871 * @return 0 for no change, 1 for successful change, negative on error
872 */
873 int update_item(CephContext *cct, int id, float weight, std::string name,
874 const std::map<std::string, std::string>& loc);
875
876 /**
877 * create or move an item, but do not adjust its weight if it already exists
878 *
879 * @param cct cct
880 * @param item item id
881 * @param weight initial item weight (if we need to create it)
882 * @param name item name
883 * @param loc location (map of type to bucket names)
884 * @param init_weight_sets initialize weight-set values to weight (vs 0)
885 * @return 0 for no change, 1 for successful change, negative on error
886 */
887 int create_or_move_item(CephContext *cct, int item, float weight,
888 std::string name,
889 const std::map<std::string,std::string>& loc,
890 bool init_weight_sets=true);
891
892 /**
893 * remove all instances of an item from the map
894 *
895 * @param cct cct
896 * @param id item id to remove
897 * @param unlink_only unlink but do not remove bucket (useful if multiple links or not empty)
898 * @return 0 on success, negative on error
899 */
900 int remove_item(CephContext *cct, int id, bool unlink_only);
901
902 /**
903 * recursively remove buckets starting at item and stop removing
904 * when a bucket is in use.
905 *
906 * @param item id to remove
907 * @return 0 on success, negative on error
908 */
909 int remove_root(CephContext *cct, int item);
910
911 /**
912 * remove all instances of an item nested beneath a certain point from the map
913 *
914 * @param cct cct
915 * @param id item id to remove
916 * @param ancestor ancestor item id under which to search for id
917 * @param unlink_only unlink but do not remove bucket (useful if bucket has multiple links or is not empty)
918 * @return 0 on success, negative on error
919 */
920 private:
921 bool _maybe_remove_last_instance(CephContext *cct, int id, bool unlink_only);
922 int _remove_item_under(CephContext *cct, int id, int ancestor, bool unlink_only);
923 bool _bucket_is_in_use(int id);
924 public:
925 int remove_item_under(CephContext *cct, int id, int ancestor, bool unlink_only);
926
927 /**
928 * calculate the locality/distance from a given id to a crush location map
929 *
930 * Specifically, we look for the lowest-valued type for which the
931 * location of id matches that described in loc.
932 *
933 * @param cct cct
934 * @param id the existing id in the map
935 * @param loc a set of key=value pairs describing a location in the hierarchy
936 */
937 int get_common_ancestor_distance(CephContext *cct, int id,
938 const std::multimap<std::string,std::string>& loc) const;
939
940 /**
941 * parse a set of key/value pairs out of a string vector
942 *
943 * These are used to describe a location in the CRUSH hierarchy.
944 *
945 * @param args list of strings (each key= or key=value)
946 * @param ploc pointer to a resulting location map or multimap
947 */
948 static int parse_loc_map(const std::vector<std::string>& args,
949 std::map<std::string,std::string> *ploc);
950 static int parse_loc_multimap(const std::vector<std::string>& args,
951 std::multimap<std::string,std::string> *ploc);
952
953
954 /**
955 * get an item's weight
956 *
957 * Will return the weight for the first instance it finds.
958 *
959 * @param id item id to check
960 * @return weight of item
961 */
962 int get_item_weight(int id) const;
963 float get_item_weightf(int id) const {
964 return (float)get_item_weight(id) / (float)0x10000;
965 }
966 int get_item_weight_in_loc(int id,
967 const std::map<std::string, std::string> &loc);
968 float get_item_weightf_in_loc(int id,
969 const std::map<std::string, std::string> &loc) {
970 return (float)get_item_weight_in_loc(id, loc) / (float)0x10000;
971 }
972
973 int validate_weightf(float weight) {
974 uint64_t iweight = weight * 0x10000;
975 if (iweight > static_cast<uint64_t>(std::numeric_limits<int>::max())) {
976 return -EOVERFLOW;
977 }
978 return 0;
979 }
980 int adjust_item_weight(CephContext *cct, int id, int weight,
981 bool update_weight_sets=true);
982 int adjust_item_weightf(CephContext *cct, int id, float weight,
983 bool update_weight_sets=true) {
984 int r = validate_weightf(weight);
985 if (r < 0) {
986 return r;
987 }
988 return adjust_item_weight(cct, id, (int)(weight * (float)0x10000),
989 update_weight_sets);
990 }
991 int adjust_item_weight_in_bucket(CephContext *cct, int id, int weight,
992 int bucket_id,
993 bool update_weight_sets);
994 int adjust_item_weight_in_loc(CephContext *cct, int id, int weight,
995 const std::map<std::string,std::string>& loc,
996 bool update_weight_sets=true);
997 int adjust_item_weightf_in_loc(CephContext *cct, int id, float weight,
998 const std::map<std::string,std::string>& loc,
999 bool update_weight_sets=true) {
1000 int r = validate_weightf(weight);
1001 if (r < 0) {
1002 return r;
1003 }
1004 return adjust_item_weight_in_loc(cct, id, (int)(weight * (float)0x10000),
1005 loc, update_weight_sets);
1006 }
1007 void reweight(CephContext *cct);
1008 void reweight_bucket(crush_bucket *b,
1009 crush_choose_arg_map& arg_map,
1010 std::vector<uint32_t> *weightv);
1011
1012 int adjust_subtree_weight(CephContext *cct, int id, int weight,
1013 bool update_weight_sets=true);
1014 int adjust_subtree_weightf(CephContext *cct, int id, float weight,
1015 bool update_weight_sets=true) {
1016 int r = validate_weightf(weight);
1017 if (r < 0) {
1018 return r;
1019 }
1020 return adjust_subtree_weight(cct, id, (int)(weight * (float)0x10000),
1021 update_weight_sets);
1022 }
1023
1024 /// check if item id is present in the map hierarchy
1025 bool check_item_present(int id) const;
1026
1027
1028 /*** devices ***/
1029 int get_max_devices() const {
1030 if (!crush) return 0;
1031 return crush->max_devices;
1032 }
1033
1034
1035 /*** rules ***/
1036 private:
1037 crush_rule *get_rule(unsigned ruleno) const {
1038 if (!crush) return (crush_rule *)(-ENOENT);
1039 if (ruleno >= crush->max_rules)
1040 return 0;
1041 return crush->rules[ruleno];
1042 }
1043 crush_rule_step *get_rule_step(unsigned ruleno, unsigned step) const {
1044 crush_rule *n = get_rule(ruleno);
1045 if (IS_ERR(n)) return (crush_rule_step *)(-EINVAL);
1046 if (step >= n->len) return (crush_rule_step *)(-EINVAL);
1047 return &n->steps[step];
1048 }
1049
1050 public:
1051 /* accessors */
1052 int get_max_rules() const {
1053 if (!crush) return 0;
1054 return crush->max_rules;
1055 }
1056 bool rule_exists(unsigned ruleno) const {
1057 if (!crush) return false;
1058 if (ruleno < crush->max_rules &&
1059 crush->rules[ruleno] != NULL)
1060 return true;
1061 return false;
1062 }
1063 bool rule_has_take(unsigned ruleno, int take) const {
1064 if (!crush) return false;
1065 crush_rule *rule = get_rule(ruleno);
1066 for (unsigned i = 0; i < rule->len; ++i) {
1067 if (rule->steps[i].op == CRUSH_RULE_TAKE &&
1068 rule->steps[i].arg1 == take) {
1069 return true;
1070 }
1071 }
1072 return false;
1073 }
1074 int get_rule_len(unsigned ruleno) const {
1075 crush_rule *r = get_rule(ruleno);
1076 if (IS_ERR(r)) return PTR_ERR(r);
1077 return r->len;
1078 }
1079 int get_rule_mask_ruleset(unsigned ruleno) const {
1080 crush_rule *r = get_rule(ruleno);
1081 if (IS_ERR(r)) return -1;
1082 return r->mask.ruleset;
1083 }
1084 int get_rule_mask_type(unsigned ruleno) const {
1085 crush_rule *r = get_rule(ruleno);
1086 if (IS_ERR(r)) return -1;
1087 return r->mask.type;
1088 }
1089 int get_rule_mask_min_size(unsigned ruleno) const {
1090 crush_rule *r = get_rule(ruleno);
1091 if (IS_ERR(r)) return -1;
1092 return r->mask.min_size;
1093 }
1094 int get_rule_mask_max_size(unsigned ruleno) const {
1095 crush_rule *r = get_rule(ruleno);
1096 if (IS_ERR(r)) return -1;
1097 return r->mask.max_size;
1098 }
1099 int get_rule_op(unsigned ruleno, unsigned step) const {
1100 crush_rule_step *s = get_rule_step(ruleno, step);
1101 if (IS_ERR(s)) return PTR_ERR(s);
1102 return s->op;
1103 }
1104 int get_rule_arg1(unsigned ruleno, unsigned step) const {
1105 crush_rule_step *s = get_rule_step(ruleno, step);
1106 if (IS_ERR(s)) return PTR_ERR(s);
1107 return s->arg1;
1108 }
1109 int get_rule_arg2(unsigned ruleno, unsigned step) const {
1110 crush_rule_step *s = get_rule_step(ruleno, step);
1111 if (IS_ERR(s)) return PTR_ERR(s);
1112 return s->arg2;
1113 }
1114
1115 private:
1116 float _get_take_weight_osd_map(int root, std::map<int,float> *pmap) const;
1117 void _normalize_weight_map(float sum, const std::map<int,float>& m,
1118 std::map<int,float> *pmap) const;
1119
1120 public:
1121 /**
1122 * calculate a map of osds to weights for a given rule
1123 *
1124 * Generate a map of which OSDs get how much relative weight for a
1125 * given rule.
1126 *
1127 * @param ruleno [in] rule id
1128 * @param pmap [out] map of osd to weight
1129 * @return 0 for success, or negative error code
1130 */
1131 int get_rule_weight_osd_map(unsigned ruleno, std::map<int,float> *pmap) const;
1132
1133 /**
1134 * calculate a map of osds to weights for a given starting root
1135 *
1136 * Generate a map of which OSDs get how much relative weight for a
1137 * given starting root
1138 *
1139 * @param root node
1140 * @param pmap [out] map of osd to weight
1141 * @return 0 for success, or negative error code
1142 */
1143 int get_take_weight_osd_map(int root, std::map<int,float> *pmap) const;
1144
1145 /* modifiers */
1146
1147 int add_rule(int ruleno, int len, int type, int minsize, int maxsize) {
1148 if (!crush) return -ENOENT;
1149 crush_rule *n = crush_make_rule(len, ruleno, type, minsize, maxsize);
1150 ceph_assert(n);
1151 ruleno = crush_add_rule(crush, n, ruleno);
1152 return ruleno;
1153 }
1154 int set_rule_mask_max_size(unsigned ruleno, int max_size) {
1155 crush_rule *r = get_rule(ruleno);
1156 if (IS_ERR(r)) return -1;
1157 return r->mask.max_size = max_size;
1158 }
1159 int set_rule_step(unsigned ruleno, unsigned step, int op, int arg1, int arg2) {
1160 if (!crush) return -ENOENT;
1161 crush_rule *n = get_rule(ruleno);
1162 if (!n) return -1;
1163 crush_rule_set_step(n, step, op, arg1, arg2);
1164 return 0;
1165 }
1166 int set_rule_step_take(unsigned ruleno, unsigned step, int val) {
1167 return set_rule_step(ruleno, step, CRUSH_RULE_TAKE, val, 0);
1168 }
1169 int set_rule_step_set_choose_tries(unsigned ruleno, unsigned step, int val) {
1170 return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSE_TRIES, val, 0);
1171 }
1172 int set_rule_step_set_choose_local_tries(unsigned ruleno, unsigned step, int val) {
1173 return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES, val, 0);
1174 }
1175 int set_rule_step_set_choose_local_fallback_tries(unsigned ruleno, unsigned step, int val) {
1176 return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES, val, 0);
1177 }
1178 int set_rule_step_set_chooseleaf_tries(unsigned ruleno, unsigned step, int val) {
1179 return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSELEAF_TRIES, val, 0);
1180 }
1181 int set_rule_step_set_chooseleaf_vary_r(unsigned ruleno, unsigned step, int val) {
1182 return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSELEAF_VARY_R, val, 0);
1183 }
1184 int set_rule_step_set_chooseleaf_stable(unsigned ruleno, unsigned step, int val) {
1185 return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSELEAF_STABLE, val, 0);
1186 }
1187 int set_rule_step_choose_firstn(unsigned ruleno, unsigned step, int val, int type) {
1188 return set_rule_step(ruleno, step, CRUSH_RULE_CHOOSE_FIRSTN, val, type);
1189 }
1190 int set_rule_step_choose_indep(unsigned ruleno, unsigned step, int val, int type) {
1191 return set_rule_step(ruleno, step, CRUSH_RULE_CHOOSE_INDEP, val, type);
1192 }
1193 int set_rule_step_choose_leaf_firstn(unsigned ruleno, unsigned step, int val, int type) {
1194 return set_rule_step(ruleno, step, CRUSH_RULE_CHOOSELEAF_FIRSTN, val, type);
1195 }
1196 int set_rule_step_choose_leaf_indep(unsigned ruleno, unsigned step, int val, int type) {
1197 return set_rule_step(ruleno, step, CRUSH_RULE_CHOOSELEAF_INDEP, val, type);
1198 }
1199 int set_rule_step_emit(unsigned ruleno, unsigned step) {
1200 return set_rule_step(ruleno, step, CRUSH_RULE_EMIT, 0, 0);
1201 }
1202
1203 int add_simple_rule(
1204 std::string name, std::string root_name, std::string failure_domain_type,
1205 std::string device_class, std::string mode, int rule_type,
1206 std::ostream *err = 0);
1207
1208 /**
1209 * @param rno rule[set] id to use, -1 to pick the lowest available
1210 */
1211 int add_simple_rule_at(
1212 std::string name, std::string root_name,
1213 std::string failure_domain_type, std::string device_class, std::string mode,
1214 int rule_type, int rno, std::ostream *err = 0);
1215
1216 int remove_rule(int ruleno);
1217
1218
1219 /** buckets **/
1220 const crush_bucket *get_bucket(int id) const {
1221 if (!crush)
1222 return (crush_bucket *)(-EINVAL);
1223 unsigned int pos = (unsigned int)(-1 - id);
1224 unsigned int max_buckets = crush->max_buckets;
1225 if (pos >= max_buckets)
1226 return (crush_bucket *)(-ENOENT);
1227 crush_bucket *ret = crush->buckets[pos];
1228 if (ret == NULL)
1229 return (crush_bucket *)(-ENOENT);
1230 return ret;
1231 }
1232 private:
1233 crush_bucket *get_bucket(int id) {
1234 if (!crush)
1235 return (crush_bucket *)(-EINVAL);
1236 unsigned int pos = (unsigned int)(-1 - id);
1237 unsigned int max_buckets = crush->max_buckets;
1238 if (pos >= max_buckets)
1239 return (crush_bucket *)(-ENOENT);
1240 crush_bucket *ret = crush->buckets[pos];
1241 if (ret == NULL)
1242 return (crush_bucket *)(-ENOENT);
1243 return ret;
1244 }
1245 /**
1246 * detach a bucket from its parent and adjust the parent weight
1247 *
1248 * returns the weight of the detached bucket
1249 **/
1250 int detach_bucket(CephContext *cct, int item);
1251
1252 int get_new_bucket_id();
1253
1254 public:
1255 int get_max_buckets() const {
1256 if (!crush) return -EINVAL;
1257 return crush->max_buckets;
1258 }
1259 int get_next_bucket_id() const {
1260 if (!crush) return -EINVAL;
1261 return crush_get_next_bucket_id(crush);
1262 }
1263 bool bucket_exists(int id) const {
1264 const crush_bucket *b = get_bucket(id);
1265 if (IS_ERR(b))
1266 return false;
1267 return true;
1268 }
1269 int get_bucket_weight(int id) const {
1270 const crush_bucket *b = get_bucket(id);
1271 if (IS_ERR(b)) return PTR_ERR(b);
1272 return b->weight;
1273 }
1274 float get_bucket_weightf(int id) const {
1275 const crush_bucket *b = get_bucket(id);
1276 if (IS_ERR(b)) return 0;
1277 return b->weight / (float)0x10000;
1278 }
1279 int get_bucket_type(int id) const {
1280 const crush_bucket *b = get_bucket(id);
1281 if (IS_ERR(b)) return PTR_ERR(b);
1282 return b->type;
1283 }
1284 int get_bucket_alg(int id) const {
1285 const crush_bucket *b = get_bucket(id);
1286 if (IS_ERR(b)) return PTR_ERR(b);
1287 return b->alg;
1288 }
1289 int get_bucket_hash(int id) const {
1290 const crush_bucket *b = get_bucket(id);
1291 if (IS_ERR(b)) return PTR_ERR(b);
1292 return b->hash;
1293 }
1294 int get_bucket_size(int id) const {
1295 const crush_bucket *b = get_bucket(id);
1296 if (IS_ERR(b)) return PTR_ERR(b);
1297 return b->size;
1298 }
1299 int get_bucket_item(int id, int pos) const {
1300 const crush_bucket *b = get_bucket(id);
1301 if (IS_ERR(b)) return PTR_ERR(b);
1302 if ((__u32)pos >= b->size)
1303 return PTR_ERR(b);
1304 return b->items[pos];
1305 }
1306 int get_bucket_item_weight(int id, int pos) const {
1307 const crush_bucket *b = get_bucket(id);
1308 if (IS_ERR(b)) return PTR_ERR(b);
1309 return crush_get_bucket_item_weight(b, pos);
1310 }
1311 float get_bucket_item_weightf(int id, int pos) const {
1312 const crush_bucket *b = get_bucket(id);
1313 if (IS_ERR(b)) return 0;
1314 return (float)crush_get_bucket_item_weight(b, pos) / (float)0x10000;
1315 }
1316
1317 /* modifiers */
1318 int add_bucket(int bucketno, int alg, int hash, int type, int size,
1319 int *items, int *weights, int *idout);
1320 int bucket_add_item(crush_bucket *bucket, int item, int weight);
1321 int bucket_remove_item(struct crush_bucket *bucket, int item);
1322 int bucket_adjust_item_weight(
1323 CephContext *cct, struct crush_bucket *bucket, int item, int weight,
1324 bool adjust_weight_sets);
1325
1326 void finalize() {
1327 ceph_assert(crush);
1328 crush_finalize(crush);
1329 if (!name_map.empty() &&
1330 name_map.rbegin()->first >= crush->max_devices) {
1331 crush->max_devices = name_map.rbegin()->first + 1;
1332 }
1333 have_uniform_rules = !has_legacy_rule_ids();
1334 }
1335 int bucket_set_alg(int id, int alg);
1336
1337 int update_device_class(int id, const std::string& class_name,
1338 const std::string& name, std::ostream *ss);
1339 int remove_device_class(CephContext *cct, int id, std::ostream *ss);
1340 int device_class_clone(
1341 int original, int device_class,
1342 const std::map<int32_t, std::map<int32_t, int32_t>>& old_class_bucket,
1343 const std::set<int32_t>& used_ids,
1344 int *clone,
1345 std::map<int, std::map<int,std::vector<int>>> *cmap_item_weight);
1346 bool class_is_in_use(int class_id, std::ostream *ss = nullptr);
1347 int rename_class(const std::string& srcname, const std::string& dstname);
1348 int populate_classes(
1349 const std::map<int32_t, std::map<int32_t, int32_t>>& old_class_bucket);
1350 int get_rules_by_class(const std::string &class_name, std::set<int> *rules);
1351 int get_rules_by_osd(int osd, std::set<int> *rules);
1352 bool _class_is_dead(int class_id);
1353 void cleanup_dead_classes();
1354 int rebuild_roots_with_classes(CephContext *cct);
1355 /* remove unused roots generated for class devices */
1356 int trim_roots_with_class(CephContext *cct);
1357
1358 int reclassify(
1359 CephContext *cct,
1360 std::ostream& out,
1361 const std::map<std::string,std::string>& classify_root,
1362 const std::map<std::string,std::pair<std::string,std::string>>& classify_bucket
1363 );
1364
1365 int set_subtree_class(const std::string& name, const std::string& class_name);
1366
1367 void start_choose_profile() {
1368 free(crush->choose_tries);
1369 /*
1370 * the original choose_total_tries value was off by one (it
1371 * counted "retries" and not "tries"). add one to alloc.
1372 */
1373 crush->choose_tries = (__u32 *)calloc(sizeof(*crush->choose_tries),
1374 (crush->choose_total_tries + 1));
1375 memset(crush->choose_tries, 0,
1376 sizeof(*crush->choose_tries) * (crush->choose_total_tries + 1));
1377 }
1378 void stop_choose_profile() {
1379 free(crush->choose_tries);
1380 crush->choose_tries = 0;
1381 }
1382
1383 int get_choose_profile(__u32 **vec) {
1384 if (crush->choose_tries) {
1385 *vec = crush->choose_tries;
1386 return crush->choose_total_tries;
1387 }
1388 return 0;
1389 }
1390
1391
1392 void set_max_devices(int m) {
1393 crush->max_devices = m;
1394 }
1395
1396 int find_rule(int ruleset, int type, int size) const {
1397 if (!crush) return -1;
1398 if (have_uniform_rules &&
1399 ruleset < (int)crush->max_rules &&
1400 crush->rules[ruleset] &&
1401 crush->rules[ruleset]->mask.type == type &&
1402 crush->rules[ruleset]->mask.min_size <= size &&
1403 crush->rules[ruleset]->mask.max_size >= size) {
1404 return ruleset;
1405 }
1406 return crush_find_rule(crush, ruleset, type, size);
1407 }
1408
1409 bool ruleset_exists(const int ruleset) const {
1410 for (size_t i = 0; i < crush->max_rules; ++i) {
1411 if (rule_exists(i) && crush->rules[i]->mask.ruleset == ruleset) {
1412 return true;
1413 }
1414 }
1415
1416 return false;
1417 }
1418
1419 /**
1420 * Return the lowest numbered ruleset of type `type`
1421 *
1422 * @returns a ruleset ID, or -1 if no matching rules found.
1423 */
1424 int find_first_ruleset(int type) const {
1425 int result = -1;
1426
1427 for (size_t i = 0; i < crush->max_rules; ++i) {
1428 if (crush->rules[i]
1429 && crush->rules[i]->mask.type == type
1430 && (crush->rules[i]->mask.ruleset < result || result == -1)) {
1431 result = crush->rules[i]->mask.ruleset;
1432 }
1433 }
1434
1435 return result;
1436 }
1437
1438 bool have_choose_args(int64_t choose_args_index) const {
1439 return choose_args.count(choose_args_index);
1440 }
1441
1442 crush_choose_arg_map choose_args_get_with_fallback(
1443 int64_t choose_args_index) const {
1444 auto i = choose_args.find(choose_args_index);
1445 if (i == choose_args.end()) {
1446 i = choose_args.find(DEFAULT_CHOOSE_ARGS);
1447 }
1448 if (i == choose_args.end()) {
1449 crush_choose_arg_map arg_map;
1450 arg_map.args = NULL;
1451 arg_map.size = 0;
1452 return arg_map;
1453 } else {
1454 return i->second;
1455 }
1456 }
1457 crush_choose_arg_map choose_args_get(int64_t choose_args_index) const {
1458 auto i = choose_args.find(choose_args_index);
1459 if (i == choose_args.end()) {
1460 crush_choose_arg_map arg_map;
1461 arg_map.args = NULL;
1462 arg_map.size = 0;
1463 return arg_map;
1464 } else {
1465 return i->second;
1466 }
1467 }
1468
1469 void destroy_choose_args(crush_choose_arg_map arg_map) {
1470 for (__u32 i = 0; i < arg_map.size; i++) {
1471 crush_choose_arg *arg = &arg_map.args[i];
1472 for (__u32 j = 0; j < arg->weight_set_positions; j++) {
1473 crush_weight_set *weight_set = &arg->weight_set[j];
1474 free(weight_set->weights);
1475 }
1476 if (arg->weight_set)
1477 free(arg->weight_set);
1478 if (arg->ids)
1479 free(arg->ids);
1480 }
1481 free(arg_map.args);
1482 }
1483
1484 bool create_choose_args(int64_t id, int positions) {
1485 if (choose_args.count(id))
1486 return false;
1487 ceph_assert(positions);
1488 auto &cmap = choose_args[id];
1489 cmap.args = static_cast<crush_choose_arg*>(calloc(sizeof(crush_choose_arg),
1490 crush->max_buckets));
1491 cmap.size = crush->max_buckets;
1492 for (int bidx=0; bidx < crush->max_buckets; ++bidx) {
1493 crush_bucket *b = crush->buckets[bidx];
1494 auto &carg = cmap.args[bidx];
1495 carg.ids = NULL;
1496 carg.ids_size = 0;
1497 if (b && b->alg == CRUSH_BUCKET_STRAW2) {
1498 crush_bucket_straw2 *sb = reinterpret_cast<crush_bucket_straw2*>(b);
1499 carg.weight_set_positions = positions;
1500 carg.weight_set = static_cast<crush_weight_set*>(calloc(sizeof(crush_weight_set),
1501 carg.weight_set_positions));
1502 // initialize with canonical weights
1503 for (int pos = 0; pos < positions; ++pos) {
1504 carg.weight_set[pos].size = b->size;
1505 carg.weight_set[pos].weights = (__u32*)calloc(4, b->size);
1506 for (unsigned i = 0; i < b->size; ++i) {
1507 carg.weight_set[pos].weights[i] = sb->item_weights[i];
1508 }
1509 }
1510 } else {
1511 carg.weight_set = NULL;
1512 carg.weight_set_positions = 0;
1513 }
1514 }
1515 return true;
1516 }
1517
1518 void rm_choose_args(int64_t id) {
1519 auto p = choose_args.find(id);
1520 if (p != choose_args.end()) {
1521 destroy_choose_args(p->second);
1522 choose_args.erase(p);
1523 }
1524 }
1525
1526 void choose_args_clear() {
1527 for (auto w : choose_args)
1528 destroy_choose_args(w.second);
1529 choose_args.clear();
1530 }
1531
1532 // remove choose_args for buckets that no longer exist, create them for new buckets
1533 void update_choose_args(CephContext *cct);
1534
1535 // adjust choose_args_map weight, preserving the hierarchical summation
1536 // property. used by callers optimizing layouts by tweaking weights.
1537 int _choose_args_adjust_item_weight_in_bucket(
1538 CephContext *cct,
1539 crush_choose_arg_map cmap,
1540 int bucketid,
1541 int id,
1542 const std::vector<int>& weight,
1543 std::ostream *ss);
1544 int choose_args_adjust_item_weight(
1545 CephContext *cct,
1546 crush_choose_arg_map cmap,
1547 int id, const std::vector<int>& weight,
1548 std::ostream *ss);
1549 int choose_args_adjust_item_weightf(
1550 CephContext *cct,
1551 crush_choose_arg_map cmap,
1552 int id, const std::vector<double>& weightf,
1553 std::ostream *ss) {
1554 std::vector<int> weight(weightf.size());
1555 for (unsigned i = 0; i < weightf.size(); ++i) {
1556 weight[i] = (int)(weightf[i] * (double)0x10000);
1557 }
1558 return choose_args_adjust_item_weight(cct, cmap, id, weight, ss);
1559 }
1560
1561 int get_choose_args_positions(crush_choose_arg_map cmap) {
1562 // infer positions from other buckets
1563 for (unsigned j = 0; j < cmap.size; ++j) {
1564 if (cmap.args[j].weight_set_positions) {
1565 return cmap.args[j].weight_set_positions;
1566 }
1567 }
1568 return 1;
1569 }
1570
1571 template<typename WeightVector>
1572 void do_rule(int rule, int x, std::vector<int>& out, int maxout,
1573 const WeightVector& weight,
1574 uint64_t choose_args_index) const {
1575 int rawout[maxout];
1576 char work[crush_work_size(crush, maxout)];
1577 crush_init_workspace(crush, work);
1578 crush_choose_arg_map arg_map = choose_args_get_with_fallback(
1579 choose_args_index);
1580 int numrep = crush_do_rule(crush, rule, x, rawout, maxout,
1581 std::data(weight), std::size(weight),
1582 work, arg_map.args);
1583 if (numrep < 0)
1584 numrep = 0;
1585 out.resize(numrep);
1586 for (int i=0; i<numrep; i++)
1587 out[i] = rawout[i];
1588 }
1589
1590 int _choose_type_stack(
1591 CephContext *cct,
1592 const std::vector<std::pair<int,int>>& stack,
1593 const std::set<int>& overfull,
1594 const std::vector<int>& underfull,
1595 const std::vector<int>& more_underfull,
1596 const std::vector<int>& orig,
1597 std::vector<int>::const_iterator& i,
1598 std::set<int>& used,
1599 std::vector<int> *pw,
1600 int root_bucket,
1601 int rule) const;
1602
1603 int try_remap_rule(
1604 CephContext *cct,
1605 int rule,
1606 int maxout,
1607 const std::set<int>& overfull,
1608 const std::vector<int>& underfull,
1609 const std::vector<int>& more_underfull,
1610 const std::vector<int>& orig,
1611 std::vector<int> *out) const;
1612
1613 bool check_crush_rule(int ruleset, int type, int size, std::ostream& ss) {
1614 ceph_assert(crush);
1615
1616 __u32 i;
1617 for (i = 0; i < crush->max_rules; i++) {
1618 if (crush->rules[i] &&
1619 crush->rules[i]->mask.ruleset == ruleset &&
1620 crush->rules[i]->mask.type == type) {
1621
1622 if (crush->rules[i]->mask.min_size <= size &&
1623 crush->rules[i]->mask.max_size >= size) {
1624 return true;
1625 } else if (size < crush->rules[i]->mask.min_size) {
1626 ss << "pool size is smaller than the crush rule min size";
1627 return false;
1628 } else {
1629 ss << "pool size is bigger than the crush rule max size";
1630 return false;
1631 }
1632 }
1633 }
1634
1635 return false;
1636 }
1637
1638 void encode(ceph::buffer::list &bl, uint64_t features) const;
1639 void decode(ceph::buffer::list::const_iterator &blp);
1640 void decode_crush_bucket(crush_bucket** bptr,
1641 ceph::buffer::list::const_iterator &blp);
1642 void dump(ceph::Formatter *f) const;
1643 void dump_rules(ceph::Formatter *f) const;
1644 void dump_rule(int ruleset, ceph::Formatter *f) const;
1645 void dump_tunables(ceph::Formatter *f) const;
1646 void dump_choose_args(ceph::Formatter *f) const;
1647 void list_rules(ceph::Formatter *f) const;
1648 void list_rules(std::ostream *ss) const;
1649 void dump_tree(std::ostream *out,
1650 ceph::Formatter *f,
1651 const CrushTreeDumper::name_map_t& ws,
1652 bool show_shadow = false) const;
1653 void dump_tree(std::ostream *out, ceph::Formatter *f) {
1654 dump_tree(out, f, CrushTreeDumper::name_map_t());
1655 }
1656 void dump_tree(ceph::Formatter *f,
1657 const CrushTreeDumper::name_map_t& ws) const;
1658 static void generate_test_instances(std::list<CrushWrapper*>& o);
1659
1660 int get_osd_pool_default_crush_replicated_ruleset(CephContext *cct);
1661
1662 static bool is_valid_crush_name(const std::string& s);
1663 static bool is_valid_crush_loc(CephContext *cct,
1664 const std::map<std::string,std::string>& loc);
1665 };
1666 WRITE_CLASS_ENCODER_FEATURES(CrushWrapper)
1667
1668 #endif