]> git.proxmox.com Git - ceph.git/blob - ceph/src/crush/CrushWrapper.h
update sources to v12.2.1
[ceph.git] / ceph / src / crush / CrushWrapper.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #ifndef CEPH_CRUSH_WRAPPER_H
5 #define CEPH_CRUSH_WRAPPER_H
6
7 #include <stdlib.h>
8 #include <map>
9 #include <set>
10 #include <string>
11
12 #include <iosfwd>
13
14 #include "include/types.h"
15
16 extern "C" {
17 #include "crush.h"
18 #include "hash.h"
19 #include "mapper.h"
20 #include "builder.h"
21 }
22
23 #include "include/assert.h"
24 #include "include/err.h"
25 #include "include/encoding.h"
26 #include "include/mempool.h"
27
28 #include "common/Mutex.h"
29
30 #define BUG_ON(x) assert(!(x))
31
32 namespace ceph {
33 class Formatter;
34 }
35
36 namespace CrushTreeDumper {
37 typedef mempool::osdmap::map<int64_t,string> name_map_t;
38 }
39
40 WRITE_RAW_ENCODER(crush_rule_mask) // it's all u8's
41
42 inline static void encode(const crush_rule_step &s, bufferlist &bl)
43 {
44 ::encode(s.op, bl);
45 ::encode(s.arg1, bl);
46 ::encode(s.arg2, bl);
47 }
48 inline static void decode(crush_rule_step &s, bufferlist::iterator &p)
49 {
50 ::decode(s.op, p);
51 ::decode(s.arg1, p);
52 ::decode(s.arg2, p);
53 }
54
55 using namespace std;
56 class CrushWrapper {
57 public:
58 // magic value used by OSDMap for a "default" fallback choose_args, used if
59 // the choose_arg_map passed to do_rule does not exist. if this also
60 // doesn't exist, fall back to canonical weights.
61 enum {
62 DEFAULT_CHOOSE_ARGS = -1
63 };
64
65 std::map<int32_t, string> type_map; /* bucket/device type names */
66 std::map<int32_t, string> name_map; /* bucket/device names */
67 std::map<int32_t, string> rule_name_map;
68
69 std::map<int32_t, int32_t> class_map; /* item id -> class id */
70 std::map<int32_t, string> class_name; /* class id -> class name */
71 std::map<string, int32_t> class_rname; /* class name -> class id */
72 std::map<int32_t, map<int32_t, int32_t> > class_bucket; /* bucket[id][class] == id */
73 std::map<int64_t, crush_choose_arg_map> choose_args;
74
75 private:
76 struct crush_map *crush;
77
78 bool have_uniform_rules = false;
79
80 /* reverse maps */
81 mutable bool have_rmaps;
82 mutable std::map<string, int> type_rmap, name_rmap, rule_name_rmap;
83 void build_rmaps() const {
84 if (have_rmaps) return;
85 build_rmap(type_map, type_rmap);
86 build_rmap(name_map, name_rmap);
87 build_rmap(rule_name_map, rule_name_rmap);
88 have_rmaps = true;
89 }
90 void build_rmap(const map<int, string> &f, std::map<string, int> &r) const {
91 r.clear();
92 for (std::map<int, string>::const_iterator p = f.begin(); p != f.end(); ++p)
93 r[p->second] = p->first;
94 }
95
96 public:
97 CrushWrapper(const CrushWrapper& other);
98 const CrushWrapper& operator=(const CrushWrapper& other);
99
100 CrushWrapper() : crush(0), have_rmaps(false) {
101 create();
102 }
103 ~CrushWrapper() {
104 if (crush)
105 crush_destroy(crush);
106 choose_args_clear();
107 }
108
109 crush_map *get_crush_map() { return crush; }
110
111 /* building */
112 void create() {
113 if (crush)
114 crush_destroy(crush);
115 crush = crush_create();
116 choose_args_clear();
117 assert(crush);
118 have_rmaps = false;
119
120 set_tunables_default();
121 }
122
123 /// true if any rule has a ruleset != the rule id
124 bool has_legacy_rulesets() const;
125
126 /// fix rules whose ruleid != ruleset
127 int renumber_rules_by_ruleset();
128
129 /// true if any ruleset has more than 1 rule
130 bool has_multirule_rulesets() const;
131
132 /// true if any buckets that aren't straw2
133 bool has_non_straw2_buckets() const;
134
135 // tunables
136 void set_tunables_argonaut() {
137 crush->choose_local_tries = 2;
138 crush->choose_local_fallback_tries = 5;
139 crush->choose_total_tries = 19;
140 crush->chooseleaf_descend_once = 0;
141 crush->chooseleaf_vary_r = 0;
142 crush->chooseleaf_stable = 0;
143 crush->allowed_bucket_algs = CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
144 }
145 void set_tunables_bobtail() {
146 crush->choose_local_tries = 0;
147 crush->choose_local_fallback_tries = 0;
148 crush->choose_total_tries = 50;
149 crush->chooseleaf_descend_once = 1;
150 crush->chooseleaf_vary_r = 0;
151 crush->chooseleaf_stable = 0;
152 crush->allowed_bucket_algs = CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
153 }
154 void set_tunables_firefly() {
155 crush->choose_local_tries = 0;
156 crush->choose_local_fallback_tries = 0;
157 crush->choose_total_tries = 50;
158 crush->chooseleaf_descend_once = 1;
159 crush->chooseleaf_vary_r = 1;
160 crush->chooseleaf_stable = 0;
161 crush->allowed_bucket_algs = CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
162 }
163 void set_tunables_hammer() {
164 crush->choose_local_tries = 0;
165 crush->choose_local_fallback_tries = 0;
166 crush->choose_total_tries = 50;
167 crush->chooseleaf_descend_once = 1;
168 crush->chooseleaf_vary_r = 1;
169 crush->chooseleaf_stable = 0;
170 crush->allowed_bucket_algs =
171 (1 << CRUSH_BUCKET_UNIFORM) |
172 (1 << CRUSH_BUCKET_LIST) |
173 (1 << CRUSH_BUCKET_STRAW) |
174 (1 << CRUSH_BUCKET_STRAW2);
175 }
176 void set_tunables_jewel() {
177 crush->choose_local_tries = 0;
178 crush->choose_local_fallback_tries = 0;
179 crush->choose_total_tries = 50;
180 crush->chooseleaf_descend_once = 1;
181 crush->chooseleaf_vary_r = 1;
182 crush->chooseleaf_stable = 1;
183 crush->allowed_bucket_algs =
184 (1 << CRUSH_BUCKET_UNIFORM) |
185 (1 << CRUSH_BUCKET_LIST) |
186 (1 << CRUSH_BUCKET_STRAW) |
187 (1 << CRUSH_BUCKET_STRAW2);
188 }
189
190 void set_tunables_legacy() {
191 set_tunables_argonaut();
192 crush->straw_calc_version = 0;
193 }
194 void set_tunables_optimal() {
195 set_tunables_jewel();
196 crush->straw_calc_version = 1;
197 }
198 void set_tunables_default() {
199 set_tunables_jewel();
200 crush->straw_calc_version = 1;
201 }
202
203 int get_choose_local_tries() const {
204 return crush->choose_local_tries;
205 }
206 void set_choose_local_tries(int n) {
207 crush->choose_local_tries = n;
208 }
209
210 int get_choose_local_fallback_tries() const {
211 return crush->choose_local_fallback_tries;
212 }
213 void set_choose_local_fallback_tries(int n) {
214 crush->choose_local_fallback_tries = n;
215 }
216
217 int get_choose_total_tries() const {
218 return crush->choose_total_tries;
219 }
220 void set_choose_total_tries(int n) {
221 crush->choose_total_tries = n;
222 }
223
224 int get_chooseleaf_descend_once() const {
225 return crush->chooseleaf_descend_once;
226 }
227 void set_chooseleaf_descend_once(int n) {
228 crush->chooseleaf_descend_once = !!n;
229 }
230
231 int get_chooseleaf_vary_r() const {
232 return crush->chooseleaf_vary_r;
233 }
234 void set_chooseleaf_vary_r(int n) {
235 crush->chooseleaf_vary_r = n;
236 }
237
238 int get_chooseleaf_stable() const {
239 return crush->chooseleaf_stable;
240 }
241 void set_chooseleaf_stable(int n) {
242 crush->chooseleaf_stable = n;
243 }
244
245 int get_straw_calc_version() const {
246 return crush->straw_calc_version;
247 }
248 void set_straw_calc_version(int n) {
249 crush->straw_calc_version = n;
250 }
251
252 unsigned get_allowed_bucket_algs() const {
253 return crush->allowed_bucket_algs;
254 }
255 void set_allowed_bucket_algs(unsigned n) {
256 crush->allowed_bucket_algs = n;
257 }
258
259 bool has_argonaut_tunables() const {
260 return
261 crush->choose_local_tries == 2 &&
262 crush->choose_local_fallback_tries == 5 &&
263 crush->choose_total_tries == 19 &&
264 crush->chooseleaf_descend_once == 0 &&
265 crush->chooseleaf_vary_r == 0 &&
266 crush->chooseleaf_stable == 0 &&
267 crush->allowed_bucket_algs == CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
268 }
269 bool has_bobtail_tunables() const {
270 return
271 crush->choose_local_tries == 0 &&
272 crush->choose_local_fallback_tries == 0 &&
273 crush->choose_total_tries == 50 &&
274 crush->chooseleaf_descend_once == 1 &&
275 crush->chooseleaf_vary_r == 0 &&
276 crush->chooseleaf_stable == 0 &&
277 crush->allowed_bucket_algs == CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
278 }
279 bool has_firefly_tunables() const {
280 return
281 crush->choose_local_tries == 0 &&
282 crush->choose_local_fallback_tries == 0 &&
283 crush->choose_total_tries == 50 &&
284 crush->chooseleaf_descend_once == 1 &&
285 crush->chooseleaf_vary_r == 1 &&
286 crush->chooseleaf_stable == 0 &&
287 crush->allowed_bucket_algs == CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
288 }
289 bool has_hammer_tunables() const {
290 return
291 crush->choose_local_tries == 0 &&
292 crush->choose_local_fallback_tries == 0 &&
293 crush->choose_total_tries == 50 &&
294 crush->chooseleaf_descend_once == 1 &&
295 crush->chooseleaf_vary_r == 1 &&
296 crush->chooseleaf_stable == 0 &&
297 crush->allowed_bucket_algs == ((1 << CRUSH_BUCKET_UNIFORM) |
298 (1 << CRUSH_BUCKET_LIST) |
299 (1 << CRUSH_BUCKET_STRAW) |
300 (1 << CRUSH_BUCKET_STRAW2));
301 }
302 bool has_jewel_tunables() const {
303 return
304 crush->choose_local_tries == 0 &&
305 crush->choose_local_fallback_tries == 0 &&
306 crush->choose_total_tries == 50 &&
307 crush->chooseleaf_descend_once == 1 &&
308 crush->chooseleaf_vary_r == 1 &&
309 crush->chooseleaf_stable == 1 &&
310 crush->allowed_bucket_algs == ((1 << CRUSH_BUCKET_UNIFORM) |
311 (1 << CRUSH_BUCKET_LIST) |
312 (1 << CRUSH_BUCKET_STRAW) |
313 (1 << CRUSH_BUCKET_STRAW2));
314 }
315
316 bool has_optimal_tunables() const {
317 return has_jewel_tunables();
318 }
319 bool has_legacy_tunables() const {
320 return has_argonaut_tunables();
321 }
322
323 bool has_nondefault_tunables() const {
324 return
325 (crush->choose_local_tries != 2 ||
326 crush->choose_local_fallback_tries != 5 ||
327 crush->choose_total_tries != 19);
328 }
329 bool has_nondefault_tunables2() const {
330 return
331 crush->chooseleaf_descend_once != 0;
332 }
333 bool has_nondefault_tunables3() const {
334 return
335 crush->chooseleaf_vary_r != 0;
336 }
337 bool has_nondefault_tunables5() const {
338 return
339 crush->chooseleaf_stable != 0;
340 }
341
342 bool has_v2_rules() const;
343 bool has_v3_rules() const;
344 bool has_v4_buckets() const;
345 bool has_v5_rules() const;
346 bool has_choose_args() const; // any choose_args
347 bool has_incompat_choose_args() const; // choose_args that can't be made compat
348
349 bool is_v2_rule(unsigned ruleid) const;
350 bool is_v3_rule(unsigned ruleid) const;
351 bool is_v5_rule(unsigned ruleid) const;
352
353 string get_min_required_version() const {
354 if (has_v5_rules() || has_nondefault_tunables5())
355 return "jewel";
356 else if (has_v4_buckets())
357 return "hammer";
358 else if (has_nondefault_tunables3())
359 return "firefly";
360 else if (has_nondefault_tunables2() || has_nondefault_tunables())
361 return "bobtail";
362 else
363 return "argonaut";
364 }
365
366 // default bucket types
367 unsigned get_default_bucket_alg() const {
368 // in order of preference
369 if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_STRAW2))
370 return CRUSH_BUCKET_STRAW2;
371 if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_STRAW))
372 return CRUSH_BUCKET_STRAW;
373 if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_TREE))
374 return CRUSH_BUCKET_TREE;
375 if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_LIST))
376 return CRUSH_BUCKET_LIST;
377 if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_UNIFORM))
378 return CRUSH_BUCKET_UNIFORM;
379 return 0;
380 }
381
382 // bucket types
383 int get_num_type_names() const {
384 return type_map.size();
385 }
386 int get_max_type_id() const {
387 if (type_map.empty())
388 return 0;
389 return type_map.rbegin()->first;
390 }
391 int get_type_id(const string& name) const {
392 build_rmaps();
393 if (type_rmap.count(name))
394 return type_rmap[name];
395 return -1;
396 }
397 const char *get_type_name(int t) const {
398 std::map<int,string>::const_iterator p = type_map.find(t);
399 if (p != type_map.end())
400 return p->second.c_str();
401 return 0;
402 }
403 void set_type_name(int i, const string& name) {
404 type_map[i] = name;
405 if (have_rmaps)
406 type_rmap[name] = i;
407 }
408
409 // item/bucket names
410 bool name_exists(const string& name) const {
411 build_rmaps();
412 return name_rmap.count(name);
413 }
414 bool item_exists(int i) const {
415 return name_map.count(i);
416 }
417 int get_item_id(const string& name) const {
418 build_rmaps();
419 if (name_rmap.count(name))
420 return name_rmap[name];
421 return 0; /* hrm */
422 }
423 const char *get_item_name(int t) const {
424 std::map<int,string>::const_iterator p = name_map.find(t);
425 if (p != name_map.end())
426 return p->second.c_str();
427 return 0;
428 }
429 int set_item_name(int i, const string& name) {
430 if (!is_valid_crush_name(name))
431 return -EINVAL;
432 name_map[i] = name;
433 if (have_rmaps)
434 name_rmap[name] = i;
435 return 0;
436 }
437 void swap_names(int a, int b) {
438 string an = name_map[a];
439 string bn = name_map[b];
440 name_map[a] = bn;
441 name_map[b] = an;
442 if (have_rmaps) {
443 name_rmap[an] = b;
444 name_rmap[bn] = a;
445 }
446 }
447 int split_id_class(int i, int *idout, int *classout) const;
448
449 bool class_exists(const string& name) const {
450 return class_rname.count(name);
451 }
452 const char *get_class_name(int i) const {
453 auto p = class_name.find(i);
454 if (p != class_name.end())
455 return p->second.c_str();
456 return 0;
457 }
458 int get_class_id(const string& name) const {
459 auto p = class_rname.find(name);
460 if (p != class_rname.end())
461 return p->second;
462 else
463 return -EINVAL;
464 }
465 int remove_class_name(const string& name) {
466 auto p = class_rname.find(name);
467 if (p == class_rname.end())
468 return -ENOENT;
469 int class_id = p->second;
470 auto q = class_name.find(class_id);
471 if (q == class_name.end())
472 return -ENOENT;
473 class_rname.erase(name);
474 class_name.erase(class_id);
475 return 0;
476 }
477
478 int32_t _alloc_class_id() const;
479
480 int get_or_create_class_id(const string& name) {
481 int c = get_class_id(name);
482 if (c < 0) {
483 int i = _alloc_class_id();
484 class_name[i] = name;
485 class_rname[name] = i;
486 return i;
487 } else {
488 return c;
489 }
490 }
491
492 const char *get_item_class(int t) const {
493 std::map<int,int>::const_iterator p = class_map.find(t);
494 if (p == class_map.end())
495 return 0;
496 return get_class_name(p->second);
497 }
498 int set_item_class(int i, const string& name) {
499 if (!is_valid_crush_name(name))
500 return -EINVAL;
501 class_map[i] = get_or_create_class_id(name);
502 return 0;
503 }
504 int set_item_class(int i, int c) {
505 class_map[i] = c;
506 return c;
507 }
508 void get_devices_by_class(const string &name, set<int> *devices) const {
509 assert(devices);
510 devices->clear();
511 if (!class_exists(name)) {
512 return;
513 }
514 auto cid = get_class_id(name);
515 for (auto& p : class_map) {
516 if (p.first >= 0 && p.second == cid) {
517 devices->insert(p.first);
518 }
519 }
520 }
521 void class_remove_item(int i) {
522 auto it = class_map.find(i);
523 if (it == class_map.end()) {
524 return;
525 }
526 class_map.erase(it);
527 }
528 int can_rename_item(const string& srcname,
529 const string& dstname,
530 ostream *ss) const;
531 int rename_item(const string& srcname,
532 const string& dstname,
533 ostream *ss);
534 int can_rename_bucket(const string& srcname,
535 const string& dstname,
536 ostream *ss) const;
537 int rename_bucket(const string& srcname,
538 const string& dstname,
539 ostream *ss);
540
541 // rule names
542 int rename_rule(const string& srcname,
543 const string& dstname,
544 ostream *ss);
545 bool rule_exists(string name) const {
546 build_rmaps();
547 return rule_name_rmap.count(name);
548 }
549 int get_rule_id(string name) const {
550 build_rmaps();
551 if (rule_name_rmap.count(name))
552 return rule_name_rmap[name];
553 return -ENOENT;
554 }
555 const char *get_rule_name(int t) const {
556 std::map<int,string>::const_iterator p = rule_name_map.find(t);
557 if (p != rule_name_map.end())
558 return p->second.c_str();
559 return 0;
560 }
561 void set_rule_name(int i, const string& name) {
562 rule_name_map[i] = name;
563 if (have_rmaps)
564 rule_name_rmap[name] = i;
565 }
566 bool is_shadow_item(int id) const {
567 const char *name = get_item_name(id);
568 return name && !is_valid_crush_name(name);
569 }
570
571
572 /**
573 * find tree nodes referenced by rules by a 'take' command
574 *
575 * Note that these may not be parentless roots.
576 */
577 void find_takes(set<int>& roots) const;
578
579 /**
580 * find tree roots
581 *
582 * These are parentless nodes in the map.
583 */
584 void find_roots(set<int>& roots) const;
585
586
587 /**
588 * find tree roots that contain shadow (device class) items only
589 */
590 void find_shadow_roots(set<int>& roots) const {
591 set<int> all;
592 find_roots(all);
593 for (auto& p: all) {
594 if (is_shadow_item(p)) {
595 roots.insert(p);
596 }
597 }
598 }
599
600 /**
601 * find tree roots that are not shadow (device class) items
602 *
603 * These are parentless nodes in the map that are not shadow
604 * items for device classes.
605 */
606 void find_nonshadow_roots(set<int>& roots) const {
607 set<int> all;
608 find_roots(all);
609 for (auto& p: all) {
610 if (!is_shadow_item(p)) {
611 roots.insert(p);
612 }
613 }
614 }
615
616 /**
617 * see if an item is contained within a subtree
618 *
619 * @param root haystack
620 * @param item needle
621 * @return true if the item is located beneath the given node
622 */
623 bool subtree_contains(int root, int item) const;
624
625 private:
626 /**
627 * search for an item in any bucket
628 *
629 * @param i item
630 * @return true if present
631 */
632 bool _search_item_exists(int i) const;
633 public:
634
635 /**
636 * see if item is located where we think it is
637 *
638 * This verifies that the given item is located at a particular
639 * location in the hierarchy. However, that check is imprecise; we
640 * are actually verifying that the most specific location key/value
641 * is correct. For example, if loc specifies that rack=foo and
642 * host=bar, it will verify that host=bar is correct; any placement
643 * above that level in the hierarchy is ignored. This matches the
644 * semantics for insert_item().
645 *
646 * @param cct cct
647 * @param item item id
648 * @param loc location to check (map of type to bucket names)
649 * @param weight optional pointer to weight of item at that location
650 * @return true if item is at specified location
651 */
652 bool check_item_loc(CephContext *cct, int item, const map<string,string>& loc, int *iweight);
653 bool check_item_loc(CephContext *cct, int item, const map<string,string>& loc, float *weight) {
654 int iweight;
655 bool ret = check_item_loc(cct, item, loc, &iweight);
656 if (weight)
657 *weight = (float)iweight / (float)0x10000;
658 return ret;
659 }
660
661
662 /**
663 * returns the (type, name) of the parent bucket of id
664 *
665 * FIXME: ambiguous for items that occur multiple times in the map
666 */
667 pair<string,string> get_immediate_parent(int id, int *ret = NULL);
668
669 int get_immediate_parent_id(int id, int *parent) const;
670
671 /**
672 * return ancestor of the given type, or 0 if none
673 * (parent is always a bucket and thus <0)
674 */
675 int get_parent_of_type(int id, int type) const;
676
677 /**
678 * get the fully qualified location of a device by successively finding
679 * parents beginning at ID and ending at highest type number specified in
680 * the CRUSH map which assumes that if device foo is under device bar, the
681 * type_id of foo < bar where type_id is the integer specified in the CRUSH map
682 *
683 * returns the location in the form of (type=foo) where type is a type of bucket
684 * specified in the CRUSH map and foo is a name specified in the CRUSH map
685 */
686 map<string, string> get_full_location(int id);
687
688 /*
689 * identical to get_full_location(int id) although it returns the type/name
690 * pairs in the order they occur in the hierarchy.
691 *
692 * returns -ENOENT if id is not found.
693 */
694 int get_full_location_ordered(int id, vector<pair<string, string> >& path);
695
696 /*
697 * identical to get_full_location_ordered(int id, vector<pair<string, string> >& path),
698 * although it returns a concatenated string with the type/name pairs in descending
699 * hierarchical order with format key1=val1,key2=val2.
700 *
701 * returns the location in descending hierarchy as a string.
702 */
703 string get_full_location_ordered_string(int id);
704
705 /**
706 * returns (type_id, type) of all parent buckets between id and
707 * default, can be used to check for anomolous CRUSH maps
708 */
709 map<int, string> get_parent_hierarchy(int id);
710
711 /**
712 * enumerate immediate children of given node
713 *
714 * @param id parent bucket or device id
715 * @return number of items, or error
716 */
717 int get_children(int id, list<int> *children);
718
719 /**
720 * enumerate leaves(devices) of given node
721 *
722 * @param name parent bucket name
723 * @return 0 on success or a negative errno on error.
724 */
725 int get_leaves(const string &name, set<int> *leaves);
726 int _get_leaves(int id, list<int> *leaves); // worker
727
728 /**
729 * insert an item into the map at a specific position
730 *
731 * Add an item as a specific location of the hierarchy.
732 * Specifically, we look for the most specific location constraint
733 * for which a bucket already exists, and then create intervening
734 * buckets beneath that in order to place the item.
735 *
736 * Note that any location specifiers *above* the most specific match
737 * are ignored. For example, if we specify that osd.12 goes in
738 * host=foo, rack=bar, and row=baz, and rack=bar is the most
739 * specific match, we will create host=foo beneath that point and
740 * put osd.12 inside it. However, we will not verify that rack=bar
741 * is beneath row=baz or move it.
742 *
743 * In short, we will build out a hierarchy, and move leaves around,
744 * but not adjust the hierarchy's internal structure. Yet.
745 *
746 * If the item is already present in the map, we will return EEXIST.
747 * If the location key/value pairs are nonsensical
748 * (rack=nameofdevice), or location specifies that do not attach us
749 * to any existing part of the hierarchy, we will return EINVAL.
750 *
751 * @param cct cct
752 * @param id item id
753 * @param weight item weight
754 * @param name item name
755 * @param loc location (map of type to bucket names)
756 * @return 0 for success, negative on error
757 */
758 int insert_item(CephContext *cct, int id, float weight, string name, const map<string,string>& loc);
759
760 /**
761 * move a bucket in the hierarchy to the given location
762 *
763 * This has the same location and ancestor creation behavior as
764 * insert_item(), but will relocate the specified existing bucket.
765 *
766 * @param cct cct
767 * @param id bucket id
768 * @param loc location (map of type to bucket names)
769 * @return 0 for success, negative on error
770 */
771 int move_bucket(CephContext *cct, int id, const map<string,string>& loc);
772
773 /**
774 * swap bucket contents of two buckets without touching bucket ids
775 *
776 * @param cct cct
777 * @param src bucket a
778 * @param dst bucket b
779 * @return 0 for success, negative on error
780 */
781 int swap_bucket(CephContext *cct, int src, int dst);
782
783 /**
784 * add a link to an existing bucket in the hierarchy to the new location
785 *
786 * This has the same location and ancestor creation behavior as
787 * insert_item(), but will add a new link to the specified existing
788 * bucket.
789 *
790 * @param cct cct
791 * @param id bucket id
792 * @param loc location (map of type to bucket names)
793 * @return 0 for success, negative on error
794 */
795 int link_bucket(CephContext *cct, int id, const map<string,string>& loc);
796
797 /**
798 * add or update an item's position in the map
799 *
800 * This is analogous to insert_item, except we will move an item if
801 * it is already present.
802 *
803 * @param cct cct
804 * @param id item id
805 * @param weight item weight
806 * @param name item name
807 * @param loc location (map of type to bucket names)
808 * @return 0 for no change, 1 for successful change, negative on error
809 */
810 int update_item(CephContext *cct, int id, float weight, string name, const map<string,string>& loc);
811
812 /**
813 * create or move an item, but do not adjust its weight if it already exists
814 *
815 * @param cct cct
816 * @param item item id
817 * @param weight initial item weight (if we need to create it)
818 * @param name item name
819 * @param loc location (map of type to bucket names)
820 * @return 0 for no change, 1 for successful change, negative on error
821 */
822 int create_or_move_item(CephContext *cct, int item, float weight, string name,
823 const map<string,string>& loc);
824
825 /**
826 * remove all instances of an item from the map
827 *
828 * @param cct cct
829 * @param id item id to remove
830 * @param unlink_only unlink but do not remove bucket (useful if multiple links or not empty)
831 * @return 0 on success, negative on error
832 */
833 int remove_item(CephContext *cct, int id, bool unlink_only);
834
835 /**
836 * recursively remove buckets starting at item and stop removing
837 * when a bucket is in use.
838 *
839 * @param item id to remove
840 * @return 0 on success, negative on error
841 */
842 int remove_root(int item);
843
844 /**
845 * remove all instances of an item nested beneath a certain point from the map
846 *
847 * @param cct cct
848 * @param id item id to remove
849 * @param ancestor ancestor item id under which to search for id
850 * @param unlink_only unlink but do not remove bucket (useful if bucket has multiple links or is not empty)
851 * @return 0 on success, negative on error
852 */
853 private:
854 bool _maybe_remove_last_instance(CephContext *cct, int id, bool unlink_only);
855 int _remove_item_under(CephContext *cct, int id, int ancestor, bool unlink_only);
856 bool _bucket_is_in_use(int id);
857 public:
858 int remove_item_under(CephContext *cct, int id, int ancestor, bool unlink_only);
859
860 /**
861 * calculate the locality/distance from a given id to a crush location map
862 *
863 * Specifically, we look for the lowest-valued type for which the
864 * location of id matches that described in loc.
865 *
866 * @param cct cct
867 * @param id the existing id in the map
868 * @param loc a set of key=value pairs describing a location in the hierarchy
869 */
870 int get_common_ancestor_distance(CephContext *cct, int id,
871 const std::multimap<string,string>& loc);
872
873 /**
874 * parse a set of key/value pairs out of a string vector
875 *
876 * These are used to describe a location in the CRUSH hierarchy.
877 *
878 * @param args list of strings (each key= or key=value)
879 * @param ploc pointer to a resulting location map or multimap
880 */
881 static int parse_loc_map(const std::vector<string>& args,
882 std::map<string,string> *ploc);
883 static int parse_loc_multimap(const std::vector<string>& args,
884 std::multimap<string,string> *ploc);
885
886 /**
887 * get an item's weight
888 *
889 * Will return the weight for the first instance it finds.
890 *
891 * @param id item id to check
892 * @return weight of item
893 */
894 int get_item_weight(int id) const;
895 float get_item_weightf(int id) const {
896 return (float)get_item_weight(id) / (float)0x10000;
897 }
898 int get_item_weight_in_loc(int id, const map<string,string> &loc);
899 float get_item_weightf_in_loc(int id, const map<string,string> &loc) {
900 return (float)get_item_weight_in_loc(id, loc) / (float)0x10000;
901 }
902
903 int validate_weightf(float weight) {
904 uint64_t iweight = weight * 0x10000;
905 if (iweight > std::numeric_limits<int>::max()) {
906 return -EOVERFLOW;
907 }
908 return 0;
909 }
910 int adjust_item_weight(CephContext *cct, int id, int weight);
911 int adjust_item_weightf(CephContext *cct, int id, float weight) {
912 int r = validate_weightf(weight);
913 if (r < 0) {
914 return r;
915 }
916 return adjust_item_weight(cct, id, (int)(weight * (float)0x10000));
917 }
918 int adjust_item_weight_in_loc(CephContext *cct, int id, int weight, const map<string,string>& loc);
919 int adjust_item_weightf_in_loc(CephContext *cct, int id, float weight, const map<string,string>& loc) {
920 int r = validate_weightf(weight);
921 if (r < 0) {
922 return r;
923 }
924 return adjust_item_weight_in_loc(cct, id, (int)(weight * (float)0x10000), loc);
925 }
926 void reweight(CephContext *cct);
927
928 int adjust_subtree_weight(CephContext *cct, int id, int weight);
929 int adjust_subtree_weightf(CephContext *cct, int id, float weight) {
930 int r = validate_weightf(weight);
931 if (r < 0) {
932 return r;
933 }
934 return adjust_subtree_weight(cct, id, (int)(weight * (float)0x10000));
935 }
936
937 /// check if item id is present in the map hierarchy
938 bool check_item_present(int id) const;
939
940
941 /*** devices ***/
942 int get_max_devices() const {
943 if (!crush) return 0;
944 return crush->max_devices;
945 }
946
947
948 /*** rules ***/
949 private:
950 crush_rule *get_rule(unsigned ruleno) const {
951 if (!crush) return (crush_rule *)(-ENOENT);
952 if (ruleno >= crush->max_rules)
953 return 0;
954 return crush->rules[ruleno];
955 }
956 crush_rule_step *get_rule_step(unsigned ruleno, unsigned step) const {
957 crush_rule *n = get_rule(ruleno);
958 if (IS_ERR(n)) return (crush_rule_step *)(-EINVAL);
959 if (step >= n->len) return (crush_rule_step *)(-EINVAL);
960 return &n->steps[step];
961 }
962
963 public:
964 /* accessors */
965 int get_max_rules() const {
966 if (!crush) return 0;
967 return crush->max_rules;
968 }
969 bool rule_exists(unsigned ruleno) const {
970 if (!crush) return false;
971 if (ruleno < crush->max_rules &&
972 crush->rules[ruleno] != NULL)
973 return true;
974 return false;
975 }
976 int get_rule_len(unsigned ruleno) const {
977 crush_rule *r = get_rule(ruleno);
978 if (IS_ERR(r)) return PTR_ERR(r);
979 return r->len;
980 }
981 int get_rule_mask_ruleset(unsigned ruleno) const {
982 crush_rule *r = get_rule(ruleno);
983 if (IS_ERR(r)) return -1;
984 return r->mask.ruleset;
985 }
986 int get_rule_mask_type(unsigned ruleno) const {
987 crush_rule *r = get_rule(ruleno);
988 if (IS_ERR(r)) return -1;
989 return r->mask.type;
990 }
991 int get_rule_mask_min_size(unsigned ruleno) const {
992 crush_rule *r = get_rule(ruleno);
993 if (IS_ERR(r)) return -1;
994 return r->mask.min_size;
995 }
996 int get_rule_mask_max_size(unsigned ruleno) const {
997 crush_rule *r = get_rule(ruleno);
998 if (IS_ERR(r)) return -1;
999 return r->mask.max_size;
1000 }
1001 int get_rule_op(unsigned ruleno, unsigned step) const {
1002 crush_rule_step *s = get_rule_step(ruleno, step);
1003 if (IS_ERR(s)) return PTR_ERR(s);
1004 return s->op;
1005 }
1006 int get_rule_arg1(unsigned ruleno, unsigned step) const {
1007 crush_rule_step *s = get_rule_step(ruleno, step);
1008 if (IS_ERR(s)) return PTR_ERR(s);
1009 return s->arg1;
1010 }
1011 int get_rule_arg2(unsigned ruleno, unsigned step) const {
1012 crush_rule_step *s = get_rule_step(ruleno, step);
1013 if (IS_ERR(s)) return PTR_ERR(s);
1014 return s->arg2;
1015 }
1016
1017 /**
1018 * calculate a map of osds to weights for a given rule
1019 *
1020 * Generate a map of which OSDs get how much relative weight for a
1021 * given rule.
1022 *
1023 * @param ruleno [in] rule id
1024 * @param pmap [out] map of osd to weight
1025 * @return 0 for success, or negative error code
1026 */
1027 int get_rule_weight_osd_map(unsigned ruleno, map<int,float> *pmap);
1028
1029 /* modifiers */
1030
1031 int add_rule(int ruleno, int len, int type, int minsize, int maxsize) {
1032 if (!crush) return -ENOENT;
1033 crush_rule *n = crush_make_rule(len, ruleno, type, minsize, maxsize);
1034 assert(n);
1035 ruleno = crush_add_rule(crush, n, ruleno);
1036 return ruleno;
1037 }
1038 int set_rule_mask_max_size(unsigned ruleno, int max_size) {
1039 crush_rule *r = get_rule(ruleno);
1040 if (IS_ERR(r)) return -1;
1041 return r->mask.max_size = max_size;
1042 }
1043 int set_rule_step(unsigned ruleno, unsigned step, int op, int arg1, int arg2) {
1044 if (!crush) return -ENOENT;
1045 crush_rule *n = get_rule(ruleno);
1046 if (!n) return -1;
1047 crush_rule_set_step(n, step, op, arg1, arg2);
1048 return 0;
1049 }
1050 int set_rule_step_take(unsigned ruleno, unsigned step, int val) {
1051 return set_rule_step(ruleno, step, CRUSH_RULE_TAKE, val, 0);
1052 }
1053 int set_rule_step_set_choose_tries(unsigned ruleno, unsigned step, int val) {
1054 return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSE_TRIES, val, 0);
1055 }
1056 int set_rule_step_set_choose_local_tries(unsigned ruleno, unsigned step, int val) {
1057 return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES, val, 0);
1058 }
1059 int set_rule_step_set_choose_local_fallback_tries(unsigned ruleno, unsigned step, int val) {
1060 return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES, val, 0);
1061 }
1062 int set_rule_step_set_chooseleaf_tries(unsigned ruleno, unsigned step, int val) {
1063 return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSELEAF_TRIES, val, 0);
1064 }
1065 int set_rule_step_set_chooseleaf_vary_r(unsigned ruleno, unsigned step, int val) {
1066 return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSELEAF_VARY_R, val, 0);
1067 }
1068 int set_rule_step_set_chooseleaf_stable(unsigned ruleno, unsigned step, int val) {
1069 return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSELEAF_STABLE, val, 0);
1070 }
1071 int set_rule_step_choose_firstn(unsigned ruleno, unsigned step, int val, int type) {
1072 return set_rule_step(ruleno, step, CRUSH_RULE_CHOOSE_FIRSTN, val, type);
1073 }
1074 int set_rule_step_choose_indep(unsigned ruleno, unsigned step, int val, int type) {
1075 return set_rule_step(ruleno, step, CRUSH_RULE_CHOOSE_INDEP, val, type);
1076 }
1077 int set_rule_step_choose_leaf_firstn(unsigned ruleno, unsigned step, int val, int type) {
1078 return set_rule_step(ruleno, step, CRUSH_RULE_CHOOSELEAF_FIRSTN, val, type);
1079 }
1080 int set_rule_step_choose_leaf_indep(unsigned ruleno, unsigned step, int val, int type) {
1081 return set_rule_step(ruleno, step, CRUSH_RULE_CHOOSELEAF_INDEP, val, type);
1082 }
1083 int set_rule_step_emit(unsigned ruleno, unsigned step) {
1084 return set_rule_step(ruleno, step, CRUSH_RULE_EMIT, 0, 0);
1085 }
1086
1087 int add_simple_rule(
1088 string name, string root_name, string failure_domain_type,
1089 string device_class,
1090 string mode, int rule_type, ostream *err = 0);
1091
1092 /**
1093 * @param rno rule[set] id to use, -1 to pick the lowest available
1094 */
1095 int add_simple_rule_at(
1096 string name, string root_name,
1097 string failure_domain_type, string device_class, string mode,
1098 int rule_type, int rno, ostream *err = 0);
1099
1100 int remove_rule(int ruleno);
1101
1102
1103 /** buckets **/
1104 const crush_bucket *get_bucket(int id) const {
1105 if (!crush)
1106 return (crush_bucket *)(-EINVAL);
1107 unsigned int pos = (unsigned int)(-1 - id);
1108 unsigned int max_buckets = crush->max_buckets;
1109 if (pos >= max_buckets)
1110 return (crush_bucket *)(-ENOENT);
1111 crush_bucket *ret = crush->buckets[pos];
1112 if (ret == NULL)
1113 return (crush_bucket *)(-ENOENT);
1114 return ret;
1115 }
1116 private:
1117 crush_bucket *get_bucket(int id) {
1118 if (!crush)
1119 return (crush_bucket *)(-EINVAL);
1120 unsigned int pos = (unsigned int)(-1 - id);
1121 unsigned int max_buckets = crush->max_buckets;
1122 if (pos >= max_buckets)
1123 return (crush_bucket *)(-ENOENT);
1124 crush_bucket *ret = crush->buckets[pos];
1125 if (ret == NULL)
1126 return (crush_bucket *)(-ENOENT);
1127 return ret;
1128 }
1129 /**
1130 * detach a bucket from its parent and adjust the parent weight
1131 *
1132 * returns the weight of the detached bucket
1133 **/
1134 int detach_bucket(CephContext *cct, int item);
1135
1136 public:
1137 int get_max_buckets() const {
1138 if (!crush) return -EINVAL;
1139 return crush->max_buckets;
1140 }
1141 int get_next_bucket_id() const {
1142 if (!crush) return -EINVAL;
1143 return crush_get_next_bucket_id(crush);
1144 }
1145 bool bucket_exists(int id) const {
1146 const crush_bucket *b = get_bucket(id);
1147 if (IS_ERR(b))
1148 return false;
1149 return true;
1150 }
1151 int get_bucket_weight(int id) const {
1152 const crush_bucket *b = get_bucket(id);
1153 if (IS_ERR(b)) return PTR_ERR(b);
1154 return b->weight;
1155 }
1156 float get_bucket_weightf(int id) const {
1157 const crush_bucket *b = get_bucket(id);
1158 if (IS_ERR(b)) return 0;
1159 return b->weight / (float)0x10000;
1160 }
1161 int get_bucket_type(int id) const {
1162 const crush_bucket *b = get_bucket(id);
1163 if (IS_ERR(b)) return PTR_ERR(b);
1164 return b->type;
1165 }
1166 int get_bucket_alg(int id) const {
1167 const crush_bucket *b = get_bucket(id);
1168 if (IS_ERR(b)) return PTR_ERR(b);
1169 return b->alg;
1170 }
1171 int get_bucket_hash(int id) const {
1172 const crush_bucket *b = get_bucket(id);
1173 if (IS_ERR(b)) return PTR_ERR(b);
1174 return b->hash;
1175 }
1176 int get_bucket_size(int id) const {
1177 const crush_bucket *b = get_bucket(id);
1178 if (IS_ERR(b)) return PTR_ERR(b);
1179 return b->size;
1180 }
1181 int get_bucket_item(int id, int pos) const {
1182 const crush_bucket *b = get_bucket(id);
1183 if (IS_ERR(b)) return PTR_ERR(b);
1184 if ((__u32)pos >= b->size)
1185 return PTR_ERR(b);
1186 return b->items[pos];
1187 }
1188 int get_bucket_item_weight(int id, int pos) const {
1189 const crush_bucket *b = get_bucket(id);
1190 if (IS_ERR(b)) return PTR_ERR(b);
1191 return crush_get_bucket_item_weight(b, pos);
1192 }
1193 float get_bucket_item_weightf(int id, int pos) const {
1194 const crush_bucket *b = get_bucket(id);
1195 if (IS_ERR(b)) return 0;
1196 return (float)crush_get_bucket_item_weight(b, pos) / (float)0x10000;
1197 }
1198
1199 /* modifiers */
1200 int add_bucket(int bucketno, int alg, int hash, int type, int size,
1201 int *items, int *weights, int *idout);
1202 int bucket_add_item(crush_bucket *bucket, int item, int weight);
1203 int bucket_remove_item(struct crush_bucket *bucket, int item);
1204 int bucket_adjust_item_weight(CephContext *cct, struct crush_bucket *bucket, int item, int weight);
1205
1206 void finalize() {
1207 assert(crush);
1208 crush_finalize(crush);
1209 have_uniform_rules = !has_legacy_rulesets();
1210 }
1211
1212 int update_device_class(int id, const string& class_name, const string& name, ostream *ss);
1213 int remove_device_class(CephContext *cct, int id, ostream *ss);
1214 int device_class_clone(
1215 int original, int device_class,
1216 const std::map<int32_t, map<int32_t, int32_t>>& old_class_bucket,
1217 const std::set<int32_t>& used_ids,
1218 int *clone,
1219 map<int,map<int,vector<int>>> *cmap_item_weight);
1220 int rename_class(const string& srcname, const string& dstname);
1221 int populate_classes(
1222 const std::map<int32_t, map<int32_t, int32_t>>& old_class_bucket);
1223 int get_rules_by_class(const string &class_name, set<int> *rules);
1224 bool _class_is_dead(int class_id);
1225 void cleanup_dead_classes();
1226 int rebuild_roots_with_classes();
1227 /* remove unused roots generated for class devices */
1228 int trim_roots_with_class();
1229
1230 void start_choose_profile() {
1231 free(crush->choose_tries);
1232 /*
1233 * the original choose_total_tries value was off by one (it
1234 * counted "retries" and not "tries"). add one to alloc.
1235 */
1236 crush->choose_tries = (__u32 *)calloc(sizeof(*crush->choose_tries),
1237 (crush->choose_total_tries + 1));
1238 memset(crush->choose_tries, 0,
1239 sizeof(*crush->choose_tries) * (crush->choose_total_tries + 1));
1240 }
1241 void stop_choose_profile() {
1242 free(crush->choose_tries);
1243 crush->choose_tries = 0;
1244 }
1245
1246 int get_choose_profile(__u32 **vec) {
1247 if (crush->choose_tries) {
1248 *vec = crush->choose_tries;
1249 return crush->choose_total_tries;
1250 }
1251 return 0;
1252 }
1253
1254
1255 void set_max_devices(int m) {
1256 crush->max_devices = m;
1257 }
1258
1259 int find_rule(int ruleset, int type, int size) const {
1260 if (!crush) return -1;
1261 if (have_uniform_rules &&
1262 ruleset < (int)crush->max_rules &&
1263 crush->rules[ruleset] &&
1264 crush->rules[ruleset]->mask.type == type &&
1265 crush->rules[ruleset]->mask.min_size <= size &&
1266 crush->rules[ruleset]->mask.max_size >= size) {
1267 return ruleset;
1268 }
1269 return crush_find_rule(crush, ruleset, type, size);
1270 }
1271
1272 bool ruleset_exists(const int ruleset) const {
1273 for (size_t i = 0; i < crush->max_rules; ++i) {
1274 if (rule_exists(i) && crush->rules[i]->mask.ruleset == ruleset) {
1275 return true;
1276 }
1277 }
1278
1279 return false;
1280 }
1281
1282 /**
1283 * Return the lowest numbered ruleset of type `type`
1284 *
1285 * @returns a ruleset ID, or -1 if no matching rulesets found.
1286 */
1287 int find_first_ruleset(int type) const {
1288 int result = -1;
1289
1290 for (size_t i = 0; i < crush->max_rules; ++i) {
1291 if (crush->rules[i]
1292 && crush->rules[i]->mask.type == type
1293 && (crush->rules[i]->mask.ruleset < result || result == -1)) {
1294 result = crush->rules[i]->mask.ruleset;
1295 }
1296 }
1297
1298 return result;
1299 }
1300
1301 bool have_choose_args(int64_t choose_args_index) const {
1302 return choose_args.count(choose_args_index);
1303 }
1304
1305 crush_choose_arg_map choose_args_get_with_fallback(
1306 int64_t choose_args_index) const {
1307 auto i = choose_args.find(choose_args_index);
1308 if (i == choose_args.end()) {
1309 i = choose_args.find(DEFAULT_CHOOSE_ARGS);
1310 }
1311 if (i == choose_args.end()) {
1312 crush_choose_arg_map arg_map;
1313 arg_map.args = NULL;
1314 arg_map.size = 0;
1315 return arg_map;
1316 } else {
1317 return i->second;
1318 }
1319 }
1320 crush_choose_arg_map choose_args_get(int64_t choose_args_index) const {
1321 auto i = choose_args.find(choose_args_index);
1322 if (i == choose_args.end()) {
1323 crush_choose_arg_map arg_map;
1324 arg_map.args = NULL;
1325 arg_map.size = 0;
1326 return arg_map;
1327 } else {
1328 return i->second;
1329 }
1330 }
1331
1332 void destroy_choose_args(crush_choose_arg_map arg_map) {
1333 for (__u32 i = 0; i < arg_map.size; i++) {
1334 crush_choose_arg *arg = &arg_map.args[i];
1335 for (__u32 j = 0; j < arg->weight_set_size; j++) {
1336 crush_weight_set *weight_set = &arg->weight_set[j];
1337 free(weight_set->weights);
1338 }
1339 if (arg->weight_set)
1340 free(arg->weight_set);
1341 if (arg->ids)
1342 free(arg->ids);
1343 }
1344 free(arg_map.args);
1345 }
1346
1347 void create_choose_args(int64_t id, int positions) {
1348 if (choose_args.count(id))
1349 return;
1350 assert(positions);
1351 auto &cmap = choose_args[id];
1352 cmap.args = (crush_choose_arg*)calloc(sizeof(crush_choose_arg),
1353 crush->max_buckets);
1354 cmap.size = crush->max_buckets;
1355 for (int bidx=0; bidx < crush->max_buckets; ++bidx) {
1356 crush_bucket *b = crush->buckets[bidx];
1357 auto &carg = cmap.args[bidx];
1358 carg.ids = NULL;
1359 carg.ids_size = 0;
1360 if (b && b->alg == CRUSH_BUCKET_STRAW2) {
1361 crush_bucket_straw2 *sb = (crush_bucket_straw2*)b;
1362 carg.weight_set_size = positions;
1363 carg.weight_set = (crush_weight_set*)calloc(sizeof(crush_weight_set),
1364 carg.weight_set_size);
1365 // initialize with canonical weights
1366 for (int pos = 0; pos < positions; ++pos) {
1367 carg.weight_set[pos].size = b->size;
1368 carg.weight_set[pos].weights = (__u32*)calloc(4, b->size);
1369 for (unsigned i = 0; i < b->size; ++i) {
1370 carg.weight_set[pos].weights[i] = sb->item_weights[i];
1371 }
1372 }
1373 } else {
1374 carg.weight_set = NULL;
1375 carg.weight_set_size = 0;
1376 }
1377 }
1378 }
1379
1380 void rm_choose_args(int64_t id) {
1381 auto p = choose_args.find(id);
1382 if (p != choose_args.end()) {
1383 destroy_choose_args(p->second);
1384 choose_args.erase(p);
1385 }
1386 }
1387
1388 void choose_args_clear() {
1389 for (auto w : choose_args)
1390 destroy_choose_args(w.second);
1391 choose_args.clear();
1392 }
1393
1394 // adjust choose_args_map weight, preserving the hierarchical summation
1395 // property. used by callers optimizing layouts by tweaking weights.
1396 int _choose_args_adjust_item_weight_in_bucket(
1397 CephContext *cct,
1398 crush_choose_arg_map cmap,
1399 int bucketid,
1400 int id,
1401 const vector<int>& weight,
1402 ostream *ss);
1403 int choose_args_adjust_item_weight(
1404 CephContext *cct,
1405 crush_choose_arg_map cmap,
1406 int id, const vector<int>& weight,
1407 ostream *ss);
1408 int choose_args_adjust_item_weightf(
1409 CephContext *cct,
1410 crush_choose_arg_map cmap,
1411 int id, const vector<double>& weightf,
1412 ostream *ss) {
1413 vector<int> weight(weightf.size());
1414 for (unsigned i = 0; i < weightf.size(); ++i) {
1415 weight[i] = (int)(weightf[i] * (float)0x10000);
1416 }
1417 return choose_args_adjust_item_weight(cct, cmap, id, weight, ss);
1418 }
1419
1420 int get_choose_args_positions(crush_choose_arg_map cmap) {
1421 // infer positions from other buckets
1422 for (unsigned j = 0; j < cmap.size; ++j) {
1423 if (cmap.args[j].weight_set_size) {
1424 return cmap.args[j].weight_set_size;
1425 }
1426 }
1427 return 1;
1428 }
1429
1430 template<typename WeightVector>
1431 void do_rule(int rule, int x, vector<int>& out, int maxout,
1432 const WeightVector& weight,
1433 uint64_t choose_args_index) const {
1434 int rawout[maxout];
1435 char work[crush_work_size(crush, maxout)];
1436 crush_init_workspace(crush, work);
1437 crush_choose_arg_map arg_map = choose_args_get_with_fallback(
1438 choose_args_index);
1439 int numrep = crush_do_rule(crush, rule, x, rawout, maxout, &weight[0],
1440 weight.size(), work, arg_map.args);
1441 if (numrep < 0)
1442 numrep = 0;
1443 out.resize(numrep);
1444 for (int i=0; i<numrep; i++)
1445 out[i] = rawout[i];
1446 }
1447
1448 int _choose_type_stack(
1449 CephContext *cct,
1450 const vector<pair<int,int>>& stack,
1451 const set<int>& overfull,
1452 const vector<int>& underfull,
1453 const vector<int>& orig,
1454 vector<int>::const_iterator& i,
1455 set<int>& used,
1456 vector<int> *pw) const;
1457
1458 int try_remap_rule(
1459 CephContext *cct,
1460 int rule,
1461 int maxout,
1462 const set<int>& overfull,
1463 const vector<int>& underfull,
1464 const vector<int>& orig,
1465 vector<int> *out) const;
1466
1467 bool check_crush_rule(int ruleset, int type, int size, ostream& ss) {
1468 assert(crush);
1469
1470 __u32 i;
1471 for (i = 0; i < crush->max_rules; i++) {
1472 if (crush->rules[i] &&
1473 crush->rules[i]->mask.ruleset == ruleset &&
1474 crush->rules[i]->mask.type == type) {
1475
1476 if (crush->rules[i]->mask.min_size <= size &&
1477 crush->rules[i]->mask.max_size >= size) {
1478 return true;
1479 } else if (size < crush->rules[i]->mask.min_size) {
1480 ss << "pool size is smaller than the crush rule min size";
1481 return false;
1482 } else {
1483 ss << "pool size is bigger than the crush rule max size";
1484 return false;
1485 }
1486 }
1487 }
1488
1489 return false;
1490 }
1491
1492 void encode(bufferlist &bl, uint64_t features) const;
1493 void decode(bufferlist::iterator &blp);
1494 void decode_crush_bucket(crush_bucket** bptr, bufferlist::iterator &blp);
1495 void dump(Formatter *f) const;
1496 void dump_rules(Formatter *f) const;
1497 void dump_rule(int ruleset, Formatter *f) const;
1498 void dump_tunables(Formatter *f) const;
1499 void dump_choose_args(Formatter *f) const;
1500 void list_rules(Formatter *f) const;
1501 void list_rules(ostream *ss) const;
1502 void dump_tree(ostream *out,
1503 Formatter *f,
1504 const CrushTreeDumper::name_map_t& ws,
1505 bool show_shadow = false) const;
1506 void dump_tree(ostream *out, Formatter *f) {
1507 dump_tree(out, f, CrushTreeDumper::name_map_t());
1508 }
1509 void dump_tree(Formatter *f,
1510 const CrushTreeDumper::name_map_t& ws) const;
1511 static void generate_test_instances(list<CrushWrapper*>& o);
1512
1513 int get_osd_pool_default_crush_replicated_ruleset(CephContext *cct);
1514
1515 static bool is_valid_crush_name(const string& s);
1516 static bool is_valid_crush_loc(CephContext *cct,
1517 const map<string,string>& loc);
1518 };
1519 WRITE_CLASS_ENCODER_FEATURES(CrushWrapper)
1520
1521 #endif