]> git.proxmox.com Git - ceph.git/blame - ceph/src/crush/CrushWrapper.h
update sources to 12.2.7
[ceph.git] / ceph / src / crush / CrushWrapper.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3
4#ifndef CEPH_CRUSH_WRAPPER_H
5#define CEPH_CRUSH_WRAPPER_H
6
7#include <stdlib.h>
8#include <map>
9#include <set>
10#include <string>
11
12#include <iosfwd>
13
14#include "include/types.h"
15
16extern "C" {
17#include "crush.h"
18#include "hash.h"
19#include "mapper.h"
20#include "builder.h"
21}
22
31f18b77 23#include "include/assert.h"
7c673cae
FG
24#include "include/err.h"
25#include "include/encoding.h"
c07f9fc5 26#include "include/mempool.h"
7c673cae
FG
27
28#include "common/Mutex.h"
29
7c673cae
FG
30#define BUG_ON(x) assert(!(x))
31
32namespace ceph {
33 class Formatter;
34}
35
c07f9fc5
FG
36namespace CrushTreeDumper {
37 typedef mempool::osdmap::map<int64_t,string> name_map_t;
38}
39
7c673cae
FG
40WRITE_RAW_ENCODER(crush_rule_mask) // it's all u8's
41
42inline static void encode(const crush_rule_step &s, bufferlist &bl)
43{
44 ::encode(s.op, bl);
45 ::encode(s.arg1, bl);
46 ::encode(s.arg2, bl);
47}
48inline static void decode(crush_rule_step &s, bufferlist::iterator &p)
49{
50 ::decode(s.op, p);
51 ::decode(s.arg1, p);
52 ::decode(s.arg2, p);
53}
54
55using namespace std;
56class CrushWrapper {
57public:
c07f9fc5
FG
58 // magic value used by OSDMap for a "default" fallback choose_args, used if
59 // the choose_arg_map passed to do_rule does not exist. if this also
60 // doesn't exist, fall back to canonical weights.
61 enum {
62 DEFAULT_CHOOSE_ARGS = -1
63 };
64
7c673cae
FG
65 std::map<int32_t, string> type_map; /* bucket/device type names */
66 std::map<int32_t, string> name_map; /* bucket/device names */
67 std::map<int32_t, string> rule_name_map;
d2e6a577 68
7c673cae
FG
69 std::map<int32_t, int32_t> class_map; /* item id -> class id */
70 std::map<int32_t, string> class_name; /* class id -> class name */
71 std::map<string, int32_t> class_rname; /* class name -> class id */
72 std::map<int32_t, map<int32_t, int32_t> > class_bucket; /* bucket[id][class] == id */
c07f9fc5 73 std::map<int64_t, crush_choose_arg_map> choose_args;
7c673cae
FG
74
75private:
28e407b8 76 struct crush_map *crush = nullptr;
31f18b77
FG
77
78 bool have_uniform_rules = false;
79
7c673cae 80 /* reverse maps */
28e407b8 81 mutable bool have_rmaps = false;
7c673cae
FG
82 mutable std::map<string, int> type_rmap, name_rmap, rule_name_rmap;
83 void build_rmaps() const {
84 if (have_rmaps) return;
85 build_rmap(type_map, type_rmap);
86 build_rmap(name_map, name_rmap);
87 build_rmap(rule_name_map, rule_name_rmap);
88 have_rmaps = true;
89 }
90 void build_rmap(const map<int, string> &f, std::map<string, int> &r) const {
91 r.clear();
92 for (std::map<int, string>::const_iterator p = f.begin(); p != f.end(); ++p)
93 r[p->second] = p->first;
94 }
95
96public:
97 CrushWrapper(const CrushWrapper& other);
98 const CrushWrapper& operator=(const CrushWrapper& other);
99
28e407b8 100 CrushWrapper() {
7c673cae
FG
101 create();
102 }
103 ~CrushWrapper() {
104 if (crush)
105 crush_destroy(crush);
106 choose_args_clear();
107 }
108
109 crush_map *get_crush_map() { return crush; }
110
111 /* building */
112 void create() {
113 if (crush)
114 crush_destroy(crush);
115 crush = crush_create();
116 choose_args_clear();
117 assert(crush);
118 have_rmaps = false;
119
120 set_tunables_default();
121 }
122
3efd9988
FG
123 /**
124 * true if any rule has a rule id != its position in the array
125 *
126 * These indicate "ruleset" IDs that were created by older versions
127 * of Ceph. They are cleaned up in renumber_rules so that eventually
128 * we can remove the code for handling them.
129 */
130 bool has_legacy_rule_ids() const;
31f18b77 131
3efd9988
FG
132 /**
133 * fix rules whose ruleid != ruleset
134 *
135 * These rules were created in older versions of Ceph. The concept
136 * of a ruleset no longer exists.
137 *
138 * Return a map of old ID -> new ID. Caller must update OSDMap
139 * to use new IDs.
140 */
141 std::map<int, int> renumber_rules();
31f18b77 142
c07f9fc5
FG
143 /// true if any buckets that aren't straw2
144 bool has_non_straw2_buckets() const;
145
7c673cae
FG
146 // tunables
147 void set_tunables_argonaut() {
148 crush->choose_local_tries = 2;
149 crush->choose_local_fallback_tries = 5;
150 crush->choose_total_tries = 19;
151 crush->chooseleaf_descend_once = 0;
152 crush->chooseleaf_vary_r = 0;
153 crush->chooseleaf_stable = 0;
154 crush->allowed_bucket_algs = CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
155 }
156 void set_tunables_bobtail() {
157 crush->choose_local_tries = 0;
158 crush->choose_local_fallback_tries = 0;
159 crush->choose_total_tries = 50;
160 crush->chooseleaf_descend_once = 1;
161 crush->chooseleaf_vary_r = 0;
162 crush->chooseleaf_stable = 0;
163 crush->allowed_bucket_algs = CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
164 }
165 void set_tunables_firefly() {
166 crush->choose_local_tries = 0;
167 crush->choose_local_fallback_tries = 0;
168 crush->choose_total_tries = 50;
169 crush->chooseleaf_descend_once = 1;
170 crush->chooseleaf_vary_r = 1;
171 crush->chooseleaf_stable = 0;
172 crush->allowed_bucket_algs = CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
173 }
174 void set_tunables_hammer() {
175 crush->choose_local_tries = 0;
176 crush->choose_local_fallback_tries = 0;
177 crush->choose_total_tries = 50;
178 crush->chooseleaf_descend_once = 1;
179 crush->chooseleaf_vary_r = 1;
180 crush->chooseleaf_stable = 0;
181 crush->allowed_bucket_algs =
182 (1 << CRUSH_BUCKET_UNIFORM) |
183 (1 << CRUSH_BUCKET_LIST) |
184 (1 << CRUSH_BUCKET_STRAW) |
185 (1 << CRUSH_BUCKET_STRAW2);
186 }
187 void set_tunables_jewel() {
188 crush->choose_local_tries = 0;
189 crush->choose_local_fallback_tries = 0;
190 crush->choose_total_tries = 50;
191 crush->chooseleaf_descend_once = 1;
192 crush->chooseleaf_vary_r = 1;
193 crush->chooseleaf_stable = 1;
194 crush->allowed_bucket_algs =
195 (1 << CRUSH_BUCKET_UNIFORM) |
196 (1 << CRUSH_BUCKET_LIST) |
197 (1 << CRUSH_BUCKET_STRAW) |
198 (1 << CRUSH_BUCKET_STRAW2);
199 }
200
201 void set_tunables_legacy() {
202 set_tunables_argonaut();
203 crush->straw_calc_version = 0;
204 }
205 void set_tunables_optimal() {
206 set_tunables_jewel();
207 crush->straw_calc_version = 1;
208 }
209 void set_tunables_default() {
31f18b77 210 set_tunables_jewel();
7c673cae
FG
211 crush->straw_calc_version = 1;
212 }
213
214 int get_choose_local_tries() const {
215 return crush->choose_local_tries;
216 }
217 void set_choose_local_tries(int n) {
218 crush->choose_local_tries = n;
219 }
220
221 int get_choose_local_fallback_tries() const {
222 return crush->choose_local_fallback_tries;
223 }
224 void set_choose_local_fallback_tries(int n) {
225 crush->choose_local_fallback_tries = n;
226 }
227
228 int get_choose_total_tries() const {
229 return crush->choose_total_tries;
230 }
231 void set_choose_total_tries(int n) {
232 crush->choose_total_tries = n;
233 }
234
235 int get_chooseleaf_descend_once() const {
236 return crush->chooseleaf_descend_once;
237 }
238 void set_chooseleaf_descend_once(int n) {
239 crush->chooseleaf_descend_once = !!n;
240 }
241
242 int get_chooseleaf_vary_r() const {
243 return crush->chooseleaf_vary_r;
244 }
245 void set_chooseleaf_vary_r(int n) {
246 crush->chooseleaf_vary_r = n;
247 }
248
249 int get_chooseleaf_stable() const {
250 return crush->chooseleaf_stable;
251 }
252 void set_chooseleaf_stable(int n) {
253 crush->chooseleaf_stable = n;
254 }
255
256 int get_straw_calc_version() const {
257 return crush->straw_calc_version;
258 }
259 void set_straw_calc_version(int n) {
260 crush->straw_calc_version = n;
261 }
262
263 unsigned get_allowed_bucket_algs() const {
264 return crush->allowed_bucket_algs;
265 }
266 void set_allowed_bucket_algs(unsigned n) {
267 crush->allowed_bucket_algs = n;
268 }
269
270 bool has_argonaut_tunables() const {
271 return
272 crush->choose_local_tries == 2 &&
273 crush->choose_local_fallback_tries == 5 &&
274 crush->choose_total_tries == 19 &&
275 crush->chooseleaf_descend_once == 0 &&
276 crush->chooseleaf_vary_r == 0 &&
277 crush->chooseleaf_stable == 0 &&
278 crush->allowed_bucket_algs == CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
279 }
280 bool has_bobtail_tunables() const {
281 return
282 crush->choose_local_tries == 0 &&
283 crush->choose_local_fallback_tries == 0 &&
284 crush->choose_total_tries == 50 &&
285 crush->chooseleaf_descend_once == 1 &&
286 crush->chooseleaf_vary_r == 0 &&
287 crush->chooseleaf_stable == 0 &&
288 crush->allowed_bucket_algs == CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
289 }
290 bool has_firefly_tunables() const {
291 return
292 crush->choose_local_tries == 0 &&
293 crush->choose_local_fallback_tries == 0 &&
294 crush->choose_total_tries == 50 &&
295 crush->chooseleaf_descend_once == 1 &&
296 crush->chooseleaf_vary_r == 1 &&
297 crush->chooseleaf_stable == 0 &&
298 crush->allowed_bucket_algs == CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
299 }
300 bool has_hammer_tunables() const {
301 return
302 crush->choose_local_tries == 0 &&
303 crush->choose_local_fallback_tries == 0 &&
304 crush->choose_total_tries == 50 &&
305 crush->chooseleaf_descend_once == 1 &&
306 crush->chooseleaf_vary_r == 1 &&
307 crush->chooseleaf_stable == 0 &&
308 crush->allowed_bucket_algs == ((1 << CRUSH_BUCKET_UNIFORM) |
309 (1 << CRUSH_BUCKET_LIST) |
310 (1 << CRUSH_BUCKET_STRAW) |
311 (1 << CRUSH_BUCKET_STRAW2));
312 }
313 bool has_jewel_tunables() const {
314 return
315 crush->choose_local_tries == 0 &&
316 crush->choose_local_fallback_tries == 0 &&
317 crush->choose_total_tries == 50 &&
318 crush->chooseleaf_descend_once == 1 &&
319 crush->chooseleaf_vary_r == 1 &&
320 crush->chooseleaf_stable == 1 &&
321 crush->allowed_bucket_algs == ((1 << CRUSH_BUCKET_UNIFORM) |
322 (1 << CRUSH_BUCKET_LIST) |
323 (1 << CRUSH_BUCKET_STRAW) |
324 (1 << CRUSH_BUCKET_STRAW2));
325 }
326
327 bool has_optimal_tunables() const {
328 return has_jewel_tunables();
329 }
330 bool has_legacy_tunables() const {
331 return has_argonaut_tunables();
332 }
333
334 bool has_nondefault_tunables() const {
335 return
336 (crush->choose_local_tries != 2 ||
337 crush->choose_local_fallback_tries != 5 ||
338 crush->choose_total_tries != 19);
339 }
340 bool has_nondefault_tunables2() const {
341 return
342 crush->chooseleaf_descend_once != 0;
343 }
344 bool has_nondefault_tunables3() const {
345 return
346 crush->chooseleaf_vary_r != 0;
347 }
348 bool has_nondefault_tunables5() const {
349 return
350 crush->chooseleaf_stable != 0;
351 }
352
353 bool has_v2_rules() const;
354 bool has_v3_rules() const;
355 bool has_v4_buckets() const;
356 bool has_v5_rules() const;
31f18b77
FG
357 bool has_choose_args() const; // any choose_args
358 bool has_incompat_choose_args() const; // choose_args that can't be made compat
7c673cae
FG
359
360 bool is_v2_rule(unsigned ruleid) const;
361 bool is_v3_rule(unsigned ruleid) const;
362 bool is_v5_rule(unsigned ruleid) const;
363
364 string get_min_required_version() const {
365 if (has_v5_rules() || has_nondefault_tunables5())
366 return "jewel";
367 else if (has_v4_buckets())
368 return "hammer";
369 else if (has_nondefault_tunables3())
370 return "firefly";
371 else if (has_nondefault_tunables2() || has_nondefault_tunables())
372 return "bobtail";
373 else
374 return "argonaut";
375 }
376
377 // default bucket types
378 unsigned get_default_bucket_alg() const {
379 // in order of preference
380 if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_STRAW2))
381 return CRUSH_BUCKET_STRAW2;
382 if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_STRAW))
383 return CRUSH_BUCKET_STRAW;
384 if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_TREE))
385 return CRUSH_BUCKET_TREE;
386 if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_LIST))
387 return CRUSH_BUCKET_LIST;
388 if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_UNIFORM))
389 return CRUSH_BUCKET_UNIFORM;
390 return 0;
391 }
392
393 // bucket types
394 int get_num_type_names() const {
395 return type_map.size();
396 }
31f18b77
FG
397 int get_max_type_id() const {
398 if (type_map.empty())
399 return 0;
400 return type_map.rbegin()->first;
401 }
7c673cae
FG
402 int get_type_id(const string& name) const {
403 build_rmaps();
404 if (type_rmap.count(name))
405 return type_rmap[name];
406 return -1;
407 }
408 const char *get_type_name(int t) const {
409 std::map<int,string>::const_iterator p = type_map.find(t);
410 if (p != type_map.end())
411 return p->second.c_str();
412 return 0;
413 }
414 void set_type_name(int i, const string& name) {
415 type_map[i] = name;
416 if (have_rmaps)
417 type_rmap[name] = i;
418 }
419
420 // item/bucket names
421 bool name_exists(const string& name) const {
422 build_rmaps();
423 return name_rmap.count(name);
424 }
425 bool item_exists(int i) const {
426 return name_map.count(i);
427 }
428 int get_item_id(const string& name) const {
429 build_rmaps();
430 if (name_rmap.count(name))
431 return name_rmap[name];
432 return 0; /* hrm */
433 }
434 const char *get_item_name(int t) const {
435 std::map<int,string>::const_iterator p = name_map.find(t);
436 if (p != name_map.end())
437 return p->second.c_str();
438 return 0;
439 }
440 int set_item_name(int i, const string& name) {
441 if (!is_valid_crush_name(name))
442 return -EINVAL;
443 name_map[i] = name;
444 if (have_rmaps)
445 name_rmap[name] = i;
446 return 0;
447 }
31f18b77
FG
448 void swap_names(int a, int b) {
449 string an = name_map[a];
450 string bn = name_map[b];
451 name_map[a] = bn;
452 name_map[b] = an;
453 if (have_rmaps) {
454 name_rmap[an] = b;
455 name_rmap[bn] = a;
456 }
457 }
7c673cae
FG
458 int split_id_class(int i, int *idout, int *classout) const;
459
460 bool class_exists(const string& name) const {
461 return class_rname.count(name);
462 }
463 const char *get_class_name(int i) const {
224ce89b 464 auto p = class_name.find(i);
7c673cae
FG
465 if (p != class_name.end())
466 return p->second.c_str();
467 return 0;
468 }
469 int get_class_id(const string& name) const {
224ce89b 470 auto p = class_rname.find(name);
7c673cae
FG
471 if (p != class_rname.end())
472 return p->second;
473 else
474 return -EINVAL;
475 }
476 int remove_class_name(const string& name) {
224ce89b 477 auto p = class_rname.find(name);
7c673cae
FG
478 if (p == class_rname.end())
479 return -ENOENT;
480 int class_id = p->second;
224ce89b 481 auto q = class_name.find(class_id);
7c673cae
FG
482 if (q == class_name.end())
483 return -ENOENT;
484 class_rname.erase(name);
485 class_name.erase(class_id);
486 return 0;
487 }
224ce89b 488
224ce89b
WB
489 int32_t _alloc_class_id() const;
490
7c673cae
FG
491 int get_or_create_class_id(const string& name) {
492 int c = get_class_id(name);
493 if (c < 0) {
224ce89b 494 int i = _alloc_class_id();
7c673cae
FG
495 class_name[i] = name;
496 class_rname[name] = i;
497 return i;
498 } else {
499 return c;
500 }
501 }
502
503 const char *get_item_class(int t) const {
504 std::map<int,int>::const_iterator p = class_map.find(t);
505 if (p == class_map.end())
506 return 0;
507 return get_class_name(p->second);
508 }
509 int set_item_class(int i, const string& name) {
510 if (!is_valid_crush_name(name))
511 return -EINVAL;
512 class_map[i] = get_or_create_class_id(name);
513 return 0;
514 }
515 int set_item_class(int i, int c) {
516 class_map[i] = c;
517 return c;
518 }
224ce89b
WB
519 void get_devices_by_class(const string &name, set<int> *devices) const {
520 assert(devices);
521 devices->clear();
522 if (!class_exists(name)) {
523 return;
524 }
525 auto cid = get_class_id(name);
526 for (auto& p : class_map) {
527 if (p.first >= 0 && p.second == cid) {
528 devices->insert(p.first);
529 }
530 }
531 }
532 void class_remove_item(int i) {
533 auto it = class_map.find(i);
534 if (it == class_map.end()) {
535 return;
536 }
537 class_map.erase(it);
538 }
7c673cae
FG
539 int can_rename_item(const string& srcname,
540 const string& dstname,
541 ostream *ss) const;
542 int rename_item(const string& srcname,
543 const string& dstname,
544 ostream *ss);
545 int can_rename_bucket(const string& srcname,
546 const string& dstname,
547 ostream *ss) const;
548 int rename_bucket(const string& srcname,
549 const string& dstname,
550 ostream *ss);
551
552 // rule names
b5b8bbf5
FG
553 int rename_rule(const string& srcname,
554 const string& dstname,
555 ostream *ss);
7c673cae
FG
556 bool rule_exists(string name) const {
557 build_rmaps();
558 return rule_name_rmap.count(name);
559 }
560 int get_rule_id(string name) const {
561 build_rmaps();
562 if (rule_name_rmap.count(name))
563 return rule_name_rmap[name];
564 return -ENOENT;
565 }
566 const char *get_rule_name(int t) const {
567 std::map<int,string>::const_iterator p = rule_name_map.find(t);
568 if (p != rule_name_map.end())
569 return p->second.c_str();
570 return 0;
571 }
572 void set_rule_name(int i, const string& name) {
573 rule_name_map[i] = name;
574 if (have_rmaps)
575 rule_name_rmap[name] = i;
576 }
c07f9fc5
FG
577 bool is_shadow_item(int id) const {
578 const char *name = get_item_name(id);
579 return name && !is_valid_crush_name(name);
580 }
7c673cae
FG
581
582
583 /**
584 * find tree nodes referenced by rules by a 'take' command
585 *
586 * Note that these may not be parentless roots.
587 */
3efd9988 588 void find_takes(set<int> *roots) const;
28e407b8 589 void find_takes_by_rule(int rule, set<int> *roots) const;
7c673cae
FG
590
591 /**
592 * find tree roots
593 *
594 * These are parentless nodes in the map.
595 */
3efd9988 596 void find_roots(set<int> *roots) const;
7c673cae 597
c07f9fc5
FG
598
599 /**
600 * find tree roots that contain shadow (device class) items only
601 */
3efd9988 602 void find_shadow_roots(set<int> *roots) const {
c07f9fc5 603 set<int> all;
3efd9988 604 find_roots(&all);
c07f9fc5
FG
605 for (auto& p: all) {
606 if (is_shadow_item(p)) {
3efd9988 607 roots->insert(p);
c07f9fc5
FG
608 }
609 }
610 }
611
224ce89b
WB
612 /**
613 * find tree roots that are not shadow (device class) items
614 *
615 * These are parentless nodes in the map that are not shadow
616 * items for device classes.
617 */
3efd9988 618 void find_nonshadow_roots(set<int> *roots) const {
c07f9fc5 619 set<int> all;
3efd9988 620 find_roots(&all);
c07f9fc5
FG
621 for (auto& p: all) {
622 if (!is_shadow_item(p)) {
3efd9988 623 roots->insert(p);
c07f9fc5
FG
624 }
625 }
626 }
224ce89b 627
7c673cae
FG
628 /**
629 * see if an item is contained within a subtree
630 *
631 * @param root haystack
632 * @param item needle
633 * @return true if the item is located beneath the given node
634 */
635 bool subtree_contains(int root, int item) const;
636
637private:
638 /**
639 * search for an item in any bucket
640 *
641 * @param i item
642 * @return true if present
643 */
644 bool _search_item_exists(int i) const;
645public:
646
647 /**
648 * see if item is located where we think it is
649 *
650 * This verifies that the given item is located at a particular
651 * location in the hierarchy. However, that check is imprecise; we
652 * are actually verifying that the most specific location key/value
653 * is correct. For example, if loc specifies that rack=foo and
654 * host=bar, it will verify that host=bar is correct; any placement
655 * above that level in the hierarchy is ignored. This matches the
656 * semantics for insert_item().
657 *
658 * @param cct cct
659 * @param item item id
660 * @param loc location to check (map of type to bucket names)
661 * @param weight optional pointer to weight of item at that location
662 * @return true if item is at specified location
663 */
664 bool check_item_loc(CephContext *cct, int item, const map<string,string>& loc, int *iweight);
665 bool check_item_loc(CephContext *cct, int item, const map<string,string>& loc, float *weight) {
666 int iweight;
667 bool ret = check_item_loc(cct, item, loc, &iweight);
668 if (weight)
669 *weight = (float)iweight / (float)0x10000;
670 return ret;
671 }
672
673
674 /**
675 * returns the (type, name) of the parent bucket of id
676 *
677 * FIXME: ambiguous for items that occur multiple times in the map
678 */
679 pair<string,string> get_immediate_parent(int id, int *ret = NULL);
c07f9fc5 680
7c673cae
FG
681 int get_immediate_parent_id(int id, int *parent) const;
682
31f18b77
FG
683 /**
684 * return ancestor of the given type, or 0 if none
28e407b8 685 * can pass in a specific crush **rule** to return ancestor from that rule only
31f18b77
FG
686 * (parent is always a bucket and thus <0)
687 */
28e407b8 688 int get_parent_of_type(int id, int type, int rule = -1) const;
31f18b77 689
7c673cae
FG
690 /**
691 * get the fully qualified location of a device by successively finding
692 * parents beginning at ID and ending at highest type number specified in
693 * the CRUSH map which assumes that if device foo is under device bar, the
694 * type_id of foo < bar where type_id is the integer specified in the CRUSH map
695 *
696 * returns the location in the form of (type=foo) where type is a type of bucket
697 * specified in the CRUSH map and foo is a name specified in the CRUSH map
698 */
699 map<string, string> get_full_location(int id);
700
701 /*
702 * identical to get_full_location(int id) although it returns the type/name
703 * pairs in the order they occur in the hierarchy.
704 *
705 * returns -ENOENT if id is not found.
706 */
707 int get_full_location_ordered(int id, vector<pair<string, string> >& path);
708
31f18b77
FG
709 /*
710 * identical to get_full_location_ordered(int id, vector<pair<string, string> >& path),
711 * although it returns a concatenated string with the type/name pairs in descending
712 * hierarchical order with format key1=val1,key2=val2.
713 *
714 * returns the location in descending hierarchy as a string.
715 */
716 string get_full_location_ordered_string(int id);
717
7c673cae
FG
718 /**
719 * returns (type_id, type) of all parent buckets between id and
720 * default, can be used to check for anomolous CRUSH maps
721 */
722 map<int, string> get_parent_hierarchy(int id);
723
724 /**
725 * enumerate immediate children of given node
726 *
727 * @param id parent bucket or device id
728 * @return number of items, or error
729 */
730 int get_children(int id, list<int> *children);
28e407b8
AA
731 void get_children_of_type(int id,
732 int type,
733 set<int> *children,
734 bool exclude_shadow = true) const;
7c673cae 735
94b18763
FG
736 /**
737 * get failure-domain type of a specific crush rule
738 * @param rule_id crush rule id
739 * @return type of failure-domain or a negative errno on error.
740 */
741 int get_rule_failure_domain(int rule_id);
742
31f18b77
FG
743 /**
744 * enumerate leaves(devices) of given node
745 *
746 * @param name parent bucket name
747 * @return 0 on success or a negative errno on error.
748 */
749 int get_leaves(const string &name, set<int> *leaves);
750 int _get_leaves(int id, list<int> *leaves); // worker
751
7c673cae
FG
752 /**
753 * insert an item into the map at a specific position
754 *
755 * Add an item as a specific location of the hierarchy.
756 * Specifically, we look for the most specific location constraint
757 * for which a bucket already exists, and then create intervening
758 * buckets beneath that in order to place the item.
759 *
760 * Note that any location specifiers *above* the most specific match
761 * are ignored. For example, if we specify that osd.12 goes in
762 * host=foo, rack=bar, and row=baz, and rack=bar is the most
763 * specific match, we will create host=foo beneath that point and
764 * put osd.12 inside it. However, we will not verify that rack=bar
765 * is beneath row=baz or move it.
766 *
767 * In short, we will build out a hierarchy, and move leaves around,
768 * but not adjust the hierarchy's internal structure. Yet.
769 *
770 * If the item is already present in the map, we will return EEXIST.
771 * If the location key/value pairs are nonsensical
772 * (rack=nameofdevice), or location specifies that do not attach us
773 * to any existing part of the hierarchy, we will return EINVAL.
774 *
775 * @param cct cct
776 * @param id item id
777 * @param weight item weight
778 * @param name item name
779 * @param loc location (map of type to bucket names)
780 * @return 0 for success, negative on error
781 */
782 int insert_item(CephContext *cct, int id, float weight, string name, const map<string,string>& loc);
783
784 /**
785 * move a bucket in the hierarchy to the given location
786 *
787 * This has the same location and ancestor creation behavior as
788 * insert_item(), but will relocate the specified existing bucket.
789 *
790 * @param cct cct
791 * @param id bucket id
792 * @param loc location (map of type to bucket names)
793 * @return 0 for success, negative on error
794 */
795 int move_bucket(CephContext *cct, int id, const map<string,string>& loc);
796
31f18b77
FG
797 /**
798 * swap bucket contents of two buckets without touching bucket ids
799 *
800 * @param cct cct
801 * @param src bucket a
802 * @param dst bucket b
803 * @return 0 for success, negative on error
804 */
805 int swap_bucket(CephContext *cct, int src, int dst);
806
7c673cae
FG
807 /**
808 * add a link to an existing bucket in the hierarchy to the new location
809 *
810 * This has the same location and ancestor creation behavior as
811 * insert_item(), but will add a new link to the specified existing
812 * bucket.
813 *
814 * @param cct cct
815 * @param id bucket id
816 * @param loc location (map of type to bucket names)
817 * @return 0 for success, negative on error
818 */
819 int link_bucket(CephContext *cct, int id, const map<string,string>& loc);
820
821 /**
822 * add or update an item's position in the map
823 *
824 * This is analogous to insert_item, except we will move an item if
825 * it is already present.
826 *
827 * @param cct cct
828 * @param id item id
829 * @param weight item weight
830 * @param name item name
831 * @param loc location (map of type to bucket names)
832 * @return 0 for no change, 1 for successful change, negative on error
833 */
834 int update_item(CephContext *cct, int id, float weight, string name, const map<string,string>& loc);
835
836 /**
837 * create or move an item, but do not adjust its weight if it already exists
838 *
839 * @param cct cct
840 * @param item item id
841 * @param weight initial item weight (if we need to create it)
842 * @param name item name
843 * @param loc location (map of type to bucket names)
844 * @return 0 for no change, 1 for successful change, negative on error
845 */
846 int create_or_move_item(CephContext *cct, int item, float weight, string name,
847 const map<string,string>& loc);
848
849 /**
850 * remove all instances of an item from the map
851 *
852 * @param cct cct
853 * @param id item id to remove
854 * @param unlink_only unlink but do not remove bucket (useful if multiple links or not empty)
855 * @return 0 on success, negative on error
856 */
857 int remove_item(CephContext *cct, int id, bool unlink_only);
858
859 /**
860 * recursively remove buckets starting at item and stop removing
861 * when a bucket is in use.
862 *
863 * @param item id to remove
7c673cae
FG
864 * @return 0 on success, negative on error
865 */
35e4c445 866 int remove_root(int item);
7c673cae
FG
867
868 /**
869 * remove all instances of an item nested beneath a certain point from the map
870 *
871 * @param cct cct
872 * @param id item id to remove
873 * @param ancestor ancestor item id under which to search for id
874 * @param unlink_only unlink but do not remove bucket (useful if bucket has multiple links or is not empty)
875 * @return 0 on success, negative on error
876 */
877private:
878 bool _maybe_remove_last_instance(CephContext *cct, int id, bool unlink_only);
879 int _remove_item_under(CephContext *cct, int id, int ancestor, bool unlink_only);
880 bool _bucket_is_in_use(int id);
881public:
882 int remove_item_under(CephContext *cct, int id, int ancestor, bool unlink_only);
883
884 /**
885 * calculate the locality/distance from a given id to a crush location map
886 *
887 * Specifically, we look for the lowest-valued type for which the
888 * location of id matches that described in loc.
889 *
890 * @param cct cct
891 * @param id the existing id in the map
892 * @param loc a set of key=value pairs describing a location in the hierarchy
893 */
894 int get_common_ancestor_distance(CephContext *cct, int id,
895 const std::multimap<string,string>& loc);
896
897 /**
898 * parse a set of key/value pairs out of a string vector
899 *
900 * These are used to describe a location in the CRUSH hierarchy.
901 *
902 * @param args list of strings (each key= or key=value)
903 * @param ploc pointer to a resulting location map or multimap
904 */
905 static int parse_loc_map(const std::vector<string>& args,
906 std::map<string,string> *ploc);
907 static int parse_loc_multimap(const std::vector<string>& args,
908 std::multimap<string,string> *ploc);
909
910 /**
911 * get an item's weight
912 *
913 * Will return the weight for the first instance it finds.
914 *
915 * @param id item id to check
916 * @return weight of item
917 */
918 int get_item_weight(int id) const;
919 float get_item_weightf(int id) const {
920 return (float)get_item_weight(id) / (float)0x10000;
921 }
922 int get_item_weight_in_loc(int id, const map<string,string> &loc);
923 float get_item_weightf_in_loc(int id, const map<string,string> &loc) {
924 return (float)get_item_weight_in_loc(id, loc) / (float)0x10000;
925 }
926
224ce89b
WB
927 int validate_weightf(float weight) {
928 uint64_t iweight = weight * 0x10000;
929 if (iweight > std::numeric_limits<int>::max()) {
930 return -EOVERFLOW;
931 }
932 return 0;
933 }
7c673cae
FG
934 int adjust_item_weight(CephContext *cct, int id, int weight);
935 int adjust_item_weightf(CephContext *cct, int id, float weight) {
224ce89b
WB
936 int r = validate_weightf(weight);
937 if (r < 0) {
938 return r;
939 }
7c673cae
FG
940 return adjust_item_weight(cct, id, (int)(weight * (float)0x10000));
941 }
942 int adjust_item_weight_in_loc(CephContext *cct, int id, int weight, const map<string,string>& loc);
943 int adjust_item_weightf_in_loc(CephContext *cct, int id, float weight, const map<string,string>& loc) {
224ce89b
WB
944 int r = validate_weightf(weight);
945 if (r < 0) {
946 return r;
947 }
7c673cae
FG
948 return adjust_item_weight_in_loc(cct, id, (int)(weight * (float)0x10000), loc);
949 }
950 void reweight(CephContext *cct);
951
952 int adjust_subtree_weight(CephContext *cct, int id, int weight);
953 int adjust_subtree_weightf(CephContext *cct, int id, float weight) {
224ce89b
WB
954 int r = validate_weightf(weight);
955 if (r < 0) {
956 return r;
957 }
7c673cae
FG
958 return adjust_subtree_weight(cct, id, (int)(weight * (float)0x10000));
959 }
960
961 /// check if item id is present in the map hierarchy
962 bool check_item_present(int id) const;
963
964
965 /*** devices ***/
966 int get_max_devices() const {
967 if (!crush) return 0;
968 return crush->max_devices;
969 }
970
971
972 /*** rules ***/
973private:
974 crush_rule *get_rule(unsigned ruleno) const {
975 if (!crush) return (crush_rule *)(-ENOENT);
976 if (ruleno >= crush->max_rules)
977 return 0;
978 return crush->rules[ruleno];
979 }
980 crush_rule_step *get_rule_step(unsigned ruleno, unsigned step) const {
981 crush_rule *n = get_rule(ruleno);
982 if (IS_ERR(n)) return (crush_rule_step *)(-EINVAL);
983 if (step >= n->len) return (crush_rule_step *)(-EINVAL);
984 return &n->steps[step];
985 }
986
987public:
988 /* accessors */
989 int get_max_rules() const {
990 if (!crush) return 0;
991 return crush->max_rules;
992 }
993 bool rule_exists(unsigned ruleno) const {
994 if (!crush) return false;
995 if (ruleno < crush->max_rules &&
996 crush->rules[ruleno] != NULL)
997 return true;
998 return false;
999 }
3efd9988
FG
1000 bool rule_has_take(unsigned ruleno, int take) const {
1001 if (!crush) return false;
1002 crush_rule *rule = get_rule(ruleno);
1003 for (unsigned i = 0; i < rule->len; ++i) {
1004 if (rule->steps[i].op == CRUSH_RULE_TAKE &&
1005 rule->steps[i].arg1 == take) {
1006 return true;
1007 }
1008 }
1009 return false;
1010 }
7c673cae
FG
1011 int get_rule_len(unsigned ruleno) const {
1012 crush_rule *r = get_rule(ruleno);
1013 if (IS_ERR(r)) return PTR_ERR(r);
1014 return r->len;
1015 }
1016 int get_rule_mask_ruleset(unsigned ruleno) const {
1017 crush_rule *r = get_rule(ruleno);
1018 if (IS_ERR(r)) return -1;
1019 return r->mask.ruleset;
1020 }
1021 int get_rule_mask_type(unsigned ruleno) const {
1022 crush_rule *r = get_rule(ruleno);
1023 if (IS_ERR(r)) return -1;
1024 return r->mask.type;
1025 }
1026 int get_rule_mask_min_size(unsigned ruleno) const {
1027 crush_rule *r = get_rule(ruleno);
1028 if (IS_ERR(r)) return -1;
1029 return r->mask.min_size;
1030 }
1031 int get_rule_mask_max_size(unsigned ruleno) const {
1032 crush_rule *r = get_rule(ruleno);
1033 if (IS_ERR(r)) return -1;
1034 return r->mask.max_size;
1035 }
1036 int get_rule_op(unsigned ruleno, unsigned step) const {
1037 crush_rule_step *s = get_rule_step(ruleno, step);
1038 if (IS_ERR(s)) return PTR_ERR(s);
1039 return s->op;
1040 }
1041 int get_rule_arg1(unsigned ruleno, unsigned step) const {
1042 crush_rule_step *s = get_rule_step(ruleno, step);
1043 if (IS_ERR(s)) return PTR_ERR(s);
1044 return s->arg1;
1045 }
1046 int get_rule_arg2(unsigned ruleno, unsigned step) const {
1047 crush_rule_step *s = get_rule_step(ruleno, step);
1048 if (IS_ERR(s)) return PTR_ERR(s);
1049 return s->arg2;
1050 }
1051
3efd9988
FG
1052private:
1053 float _get_take_weight_osd_map(int root, map<int,float> *pmap) const;
1054 void _normalize_weight_map(float sum, const map<int,float>& m,
1055 map<int,float> *pmap) const;
1056
1057public:
7c673cae
FG
1058 /**
1059 * calculate a map of osds to weights for a given rule
1060 *
1061 * Generate a map of which OSDs get how much relative weight for a
1062 * given rule.
1063 *
1064 * @param ruleno [in] rule id
1065 * @param pmap [out] map of osd to weight
1066 * @return 0 for success, or negative error code
1067 */
3efd9988
FG
1068 int get_rule_weight_osd_map(unsigned ruleno, map<int,float> *pmap) const;
1069
1070 /**
1071 * calculate a map of osds to weights for a given starting root
1072 *
1073 * Generate a map of which OSDs get how much relative weight for a
1074 * given starting root
1075 *
1076 * @param root node
1077 * @param pmap [out] map of osd to weight
1078 * @return 0 for success, or negative error code
1079 */
1080 int get_take_weight_osd_map(int root, map<int,float> *pmap) const;
7c673cae
FG
1081
1082 /* modifiers */
c07f9fc5
FG
1083
1084 int add_rule(int ruleno, int len, int type, int minsize, int maxsize) {
7c673cae 1085 if (!crush) return -ENOENT;
c07f9fc5 1086 crush_rule *n = crush_make_rule(len, ruleno, type, minsize, maxsize);
7c673cae
FG
1087 assert(n);
1088 ruleno = crush_add_rule(crush, n, ruleno);
1089 return ruleno;
1090 }
1091 int set_rule_mask_max_size(unsigned ruleno, int max_size) {
1092 crush_rule *r = get_rule(ruleno);
1093 if (IS_ERR(r)) return -1;
1094 return r->mask.max_size = max_size;
1095 }
1096 int set_rule_step(unsigned ruleno, unsigned step, int op, int arg1, int arg2) {
1097 if (!crush) return -ENOENT;
1098 crush_rule *n = get_rule(ruleno);
1099 if (!n) return -1;
1100 crush_rule_set_step(n, step, op, arg1, arg2);
1101 return 0;
1102 }
1103 int set_rule_step_take(unsigned ruleno, unsigned step, int val) {
1104 return set_rule_step(ruleno, step, CRUSH_RULE_TAKE, val, 0);
1105 }
1106 int set_rule_step_set_choose_tries(unsigned ruleno, unsigned step, int val) {
1107 return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSE_TRIES, val, 0);
1108 }
1109 int set_rule_step_set_choose_local_tries(unsigned ruleno, unsigned step, int val) {
1110 return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES, val, 0);
1111 }
1112 int set_rule_step_set_choose_local_fallback_tries(unsigned ruleno, unsigned step, int val) {
1113 return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES, val, 0);
1114 }
1115 int set_rule_step_set_chooseleaf_tries(unsigned ruleno, unsigned step, int val) {
1116 return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSELEAF_TRIES, val, 0);
1117 }
1118 int set_rule_step_set_chooseleaf_vary_r(unsigned ruleno, unsigned step, int val) {
1119 return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSELEAF_VARY_R, val, 0);
1120 }
1121 int set_rule_step_set_chooseleaf_stable(unsigned ruleno, unsigned step, int val) {
1122 return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSELEAF_STABLE, val, 0);
1123 }
1124 int set_rule_step_choose_firstn(unsigned ruleno, unsigned step, int val, int type) {
1125 return set_rule_step(ruleno, step, CRUSH_RULE_CHOOSE_FIRSTN, val, type);
1126 }
1127 int set_rule_step_choose_indep(unsigned ruleno, unsigned step, int val, int type) {
1128 return set_rule_step(ruleno, step, CRUSH_RULE_CHOOSE_INDEP, val, type);
1129 }
1130 int set_rule_step_choose_leaf_firstn(unsigned ruleno, unsigned step, int val, int type) {
1131 return set_rule_step(ruleno, step, CRUSH_RULE_CHOOSELEAF_FIRSTN, val, type);
1132 }
1133 int set_rule_step_choose_leaf_indep(unsigned ruleno, unsigned step, int val, int type) {
1134 return set_rule_step(ruleno, step, CRUSH_RULE_CHOOSELEAF_INDEP, val, type);
1135 }
1136 int set_rule_step_emit(unsigned ruleno, unsigned step) {
1137 return set_rule_step(ruleno, step, CRUSH_RULE_EMIT, 0, 0);
1138 }
1139
31f18b77
FG
1140 int add_simple_rule(
1141 string name, string root_name, string failure_domain_type,
224ce89b 1142 string device_class,
31f18b77
FG
1143 string mode, int rule_type, ostream *err = 0);
1144
7c673cae 1145 /**
31f18b77 1146 * @param rno rule[set] id to use, -1 to pick the lowest available
7c673cae 1147 */
31f18b77
FG
1148 int add_simple_rule_at(
1149 string name, string root_name,
224ce89b 1150 string failure_domain_type, string device_class, string mode,
31f18b77 1151 int rule_type, int rno, ostream *err = 0);
7c673cae
FG
1152
1153 int remove_rule(int ruleno);
1154
1155
1156 /** buckets **/
7c673cae
FG
1157 const crush_bucket *get_bucket(int id) const {
1158 if (!crush)
1159 return (crush_bucket *)(-EINVAL);
1160 unsigned int pos = (unsigned int)(-1 - id);
1161 unsigned int max_buckets = crush->max_buckets;
1162 if (pos >= max_buckets)
1163 return (crush_bucket *)(-ENOENT);
1164 crush_bucket *ret = crush->buckets[pos];
1165 if (ret == NULL)
1166 return (crush_bucket *)(-ENOENT);
1167 return ret;
1168 }
c07f9fc5 1169private:
7c673cae
FG
1170 crush_bucket *get_bucket(int id) {
1171 if (!crush)
1172 return (crush_bucket *)(-EINVAL);
1173 unsigned int pos = (unsigned int)(-1 - id);
1174 unsigned int max_buckets = crush->max_buckets;
1175 if (pos >= max_buckets)
1176 return (crush_bucket *)(-ENOENT);
1177 crush_bucket *ret = crush->buckets[pos];
1178 if (ret == NULL)
1179 return (crush_bucket *)(-ENOENT);
1180 return ret;
1181 }
1182 /**
1183 * detach a bucket from its parent and adjust the parent weight
1184 *
1185 * returns the weight of the detached bucket
1186 **/
c07f9fc5 1187 int detach_bucket(CephContext *cct, int item);
7c673cae
FG
1188
1189public:
1190 int get_max_buckets() const {
1191 if (!crush) return -EINVAL;
1192 return crush->max_buckets;
1193 }
1194 int get_next_bucket_id() const {
1195 if (!crush) return -EINVAL;
1196 return crush_get_next_bucket_id(crush);
1197 }
1198 bool bucket_exists(int id) const {
1199 const crush_bucket *b = get_bucket(id);
1200 if (IS_ERR(b))
1201 return false;
1202 return true;
1203 }
1204 int get_bucket_weight(int id) const {
1205 const crush_bucket *b = get_bucket(id);
1206 if (IS_ERR(b)) return PTR_ERR(b);
1207 return b->weight;
1208 }
1209 float get_bucket_weightf(int id) const {
1210 const crush_bucket *b = get_bucket(id);
1211 if (IS_ERR(b)) return 0;
1212 return b->weight / (float)0x10000;
1213 }
1214 int get_bucket_type(int id) const {
1215 const crush_bucket *b = get_bucket(id);
1216 if (IS_ERR(b)) return PTR_ERR(b);
1217 return b->type;
1218 }
1219 int get_bucket_alg(int id) const {
1220 const crush_bucket *b = get_bucket(id);
1221 if (IS_ERR(b)) return PTR_ERR(b);
1222 return b->alg;
1223 }
1224 int get_bucket_hash(int id) const {
1225 const crush_bucket *b = get_bucket(id);
1226 if (IS_ERR(b)) return PTR_ERR(b);
1227 return b->hash;
1228 }
1229 int get_bucket_size(int id) const {
1230 const crush_bucket *b = get_bucket(id);
1231 if (IS_ERR(b)) return PTR_ERR(b);
1232 return b->size;
1233 }
1234 int get_bucket_item(int id, int pos) const {
1235 const crush_bucket *b = get_bucket(id);
1236 if (IS_ERR(b)) return PTR_ERR(b);
1237 if ((__u32)pos >= b->size)
1238 return PTR_ERR(b);
1239 return b->items[pos];
1240 }
1241 int get_bucket_item_weight(int id, int pos) const {
1242 const crush_bucket *b = get_bucket(id);
1243 if (IS_ERR(b)) return PTR_ERR(b);
1244 return crush_get_bucket_item_weight(b, pos);
1245 }
1246 float get_bucket_item_weightf(int id, int pos) const {
1247 const crush_bucket *b = get_bucket(id);
1248 if (IS_ERR(b)) return 0;
1249 return (float)crush_get_bucket_item_weight(b, pos) / (float)0x10000;
1250 }
1251
1252 /* modifiers */
1253 int add_bucket(int bucketno, int alg, int hash, int type, int size,
c07f9fc5 1254 int *items, int *weights, int *idout);
31f18b77
FG
1255 int bucket_add_item(crush_bucket *bucket, int item, int weight);
1256 int bucket_remove_item(struct crush_bucket *bucket, int item);
1257 int bucket_adjust_item_weight(CephContext *cct, struct crush_bucket *bucket, int item, int weight);
1258
7c673cae
FG
1259 void finalize() {
1260 assert(crush);
1261 crush_finalize(crush);
3a9019d9
FG
1262 if (!name_map.empty() &&
1263 name_map.rbegin()->first >= crush->max_devices) {
1264 crush->max_devices = name_map.rbegin()->first + 1;
1265 }
3efd9988 1266 have_uniform_rules = !has_legacy_rule_ids();
7c673cae 1267 }
3efd9988 1268 int bucket_set_alg(int id, int alg);
7c673cae 1269
224ce89b 1270 int update_device_class(int id, const string& class_name, const string& name, ostream *ss);
c07f9fc5 1271 int remove_device_class(CephContext *cct, int id, ostream *ss);
d2e6a577
FG
1272 int device_class_clone(
1273 int original, int device_class,
1274 const std::map<int32_t, map<int32_t, int32_t>>& old_class_bucket,
1275 const std::set<int32_t>& used_ids,
35e4c445
FG
1276 int *clone,
1277 map<int,map<int,vector<int>>> *cmap_item_weight);
1278 int rename_class(const string& srcname, const string& dstname);
d2e6a577
FG
1279 int populate_classes(
1280 const std::map<int32_t, map<int32_t, int32_t>>& old_class_bucket);
b5b8bbf5 1281 int get_rules_by_class(const string &class_name, set<int> *rules);
3efd9988 1282 int get_rules_by_osd(int osd, set<int> *rules);
d2e6a577
FG
1283 bool _class_is_dead(int class_id);
1284 void cleanup_dead_classes();
7c673cae
FG
1285 int rebuild_roots_with_classes();
1286 /* remove unused roots generated for class devices */
35e4c445 1287 int trim_roots_with_class();
7c673cae
FG
1288
1289 void start_choose_profile() {
1290 free(crush->choose_tries);
1291 /*
1292 * the original choose_total_tries value was off by one (it
1293 * counted "retries" and not "tries"). add one to alloc.
1294 */
c07f9fc5
FG
1295 crush->choose_tries = (__u32 *)calloc(sizeof(*crush->choose_tries),
1296 (crush->choose_total_tries + 1));
7c673cae
FG
1297 memset(crush->choose_tries, 0,
1298 sizeof(*crush->choose_tries) * (crush->choose_total_tries + 1));
1299 }
1300 void stop_choose_profile() {
1301 free(crush->choose_tries);
1302 crush->choose_tries = 0;
1303 }
1304
1305 int get_choose_profile(__u32 **vec) {
1306 if (crush->choose_tries) {
1307 *vec = crush->choose_tries;
1308 return crush->choose_total_tries;
1309 }
1310 return 0;
1311 }
1312
1313
1314 void set_max_devices(int m) {
1315 crush->max_devices = m;
1316 }
1317
1318 int find_rule(int ruleset, int type, int size) const {
1319 if (!crush) return -1;
181888fb
FG
1320 if (have_uniform_rules &&
1321 ruleset < (int)crush->max_rules &&
1322 crush->rules[ruleset] &&
1323 crush->rules[ruleset]->mask.type == type &&
1324 crush->rules[ruleset]->mask.min_size <= size &&
1325 crush->rules[ruleset]->mask.max_size >= size) {
1326 return ruleset;
31f18b77 1327 }
181888fb 1328 return crush_find_rule(crush, ruleset, type, size);
7c673cae
FG
1329 }
1330
d2e6a577 1331 bool ruleset_exists(const int ruleset) const {
7c673cae
FG
1332 for (size_t i = 0; i < crush->max_rules; ++i) {
1333 if (rule_exists(i) && crush->rules[i]->mask.ruleset == ruleset) {
1334 return true;
1335 }
1336 }
1337
1338 return false;
1339 }
1340
1341 /**
1342 * Return the lowest numbered ruleset of type `type`
1343 *
3efd9988 1344 * @returns a ruleset ID, or -1 if no matching rules found.
7c673cae
FG
1345 */
1346 int find_first_ruleset(int type) const {
1347 int result = -1;
1348
1349 for (size_t i = 0; i < crush->max_rules; ++i) {
1350 if (crush->rules[i]
1351 && crush->rules[i]->mask.type == type
1352 && (crush->rules[i]->mask.ruleset < result || result == -1)) {
1353 result = crush->rules[i]->mask.ruleset;
1354 }
1355 }
1356
1357 return result;
1358 }
1359
c07f9fc5
FG
1360 bool have_choose_args(int64_t choose_args_index) const {
1361 return choose_args.count(choose_args_index);
1362 }
1363
1364 crush_choose_arg_map choose_args_get_with_fallback(
1365 int64_t choose_args_index) const {
1366 auto i = choose_args.find(choose_args_index);
1367 if (i == choose_args.end()) {
1368 i = choose_args.find(DEFAULT_CHOOSE_ARGS);
1369 }
1370 if (i == choose_args.end()) {
1371 crush_choose_arg_map arg_map;
1372 arg_map.args = NULL;
1373 arg_map.size = 0;
1374 return arg_map;
1375 } else {
1376 return i->second;
1377 }
1378 }
1379 crush_choose_arg_map choose_args_get(int64_t choose_args_index) const {
7c673cae
FG
1380 auto i = choose_args.find(choose_args_index);
1381 if (i == choose_args.end()) {
1382 crush_choose_arg_map arg_map;
1383 arg_map.args = NULL;
1384 arg_map.size = 0;
1385 return arg_map;
1386 } else {
1387 return i->second;
1388 }
1389 }
1390
1391 void destroy_choose_args(crush_choose_arg_map arg_map) {
1392 for (__u32 i = 0; i < arg_map.size; i++) {
1393 crush_choose_arg *arg = &arg_map.args[i];
28e407b8 1394 for (__u32 j = 0; j < arg->weight_set_positions; j++) {
7c673cae
FG
1395 crush_weight_set *weight_set = &arg->weight_set[j];
1396 free(weight_set->weights);
1397 }
1398 if (arg->weight_set)
1399 free(arg->weight_set);
1400 if (arg->ids)
1401 free(arg->ids);
1402 }
1403 free(arg_map.args);
1404 }
c07f9fc5
FG
1405
1406 void create_choose_args(int64_t id, int positions) {
1407 if (choose_args.count(id))
1408 return;
1409 assert(positions);
1410 auto &cmap = choose_args[id];
1411 cmap.args = (crush_choose_arg*)calloc(sizeof(crush_choose_arg),
1412 crush->max_buckets);
1413 cmap.size = crush->max_buckets;
1414 for (int bidx=0; bidx < crush->max_buckets; ++bidx) {
1415 crush_bucket *b = crush->buckets[bidx];
1416 auto &carg = cmap.args[bidx];
1417 carg.ids = NULL;
1418 carg.ids_size = 0;
1419 if (b && b->alg == CRUSH_BUCKET_STRAW2) {
1420 crush_bucket_straw2 *sb = (crush_bucket_straw2*)b;
28e407b8 1421 carg.weight_set_positions = positions;
c07f9fc5 1422 carg.weight_set = (crush_weight_set*)calloc(sizeof(crush_weight_set),
28e407b8 1423 carg.weight_set_positions);
c07f9fc5
FG
1424 // initialize with canonical weights
1425 for (int pos = 0; pos < positions; ++pos) {
1426 carg.weight_set[pos].size = b->size;
1427 carg.weight_set[pos].weights = (__u32*)calloc(4, b->size);
1428 for (unsigned i = 0; i < b->size; ++i) {
1429 carg.weight_set[pos].weights[i] = sb->item_weights[i];
1430 }
1431 }
1432 } else {
1433 carg.weight_set = NULL;
28e407b8 1434 carg.weight_set_positions = 0;
c07f9fc5
FG
1435 }
1436 }
1437 }
1438
1439 void rm_choose_args(int64_t id) {
1440 auto p = choose_args.find(id);
1441 if (p != choose_args.end()) {
1442 destroy_choose_args(p->second);
1443 choose_args.erase(p);
1444 }
1445 }
1446
7c673cae
FG
1447 void choose_args_clear() {
1448 for (auto w : choose_args)
1449 destroy_choose_args(w.second);
1450 choose_args.clear();
1451 }
1452
28e407b8
AA
1453 // remove choose_args for buckets that no longer exist, create them for new buckets
1454 void update_choose_args(CephContext *cct);
1455
c07f9fc5
FG
1456 // adjust choose_args_map weight, preserving the hierarchical summation
1457 // property. used by callers optimizing layouts by tweaking weights.
1458 int _choose_args_adjust_item_weight_in_bucket(
1459 CephContext *cct,
1460 crush_choose_arg_map cmap,
1461 int bucketid,
1462 int id,
1463 const vector<int>& weight,
1464 ostream *ss);
1465 int choose_args_adjust_item_weight(
1466 CephContext *cct,
1467 crush_choose_arg_map cmap,
1468 int id, const vector<int>& weight,
1469 ostream *ss);
1470 int choose_args_adjust_item_weightf(
1471 CephContext *cct,
1472 crush_choose_arg_map cmap,
1473 int id, const vector<double>& weightf,
1474 ostream *ss) {
1475 vector<int> weight(weightf.size());
1476 for (unsigned i = 0; i < weightf.size(); ++i) {
1477 weight[i] = (int)(weightf[i] * (float)0x10000);
1478 }
1479 return choose_args_adjust_item_weight(cct, cmap, id, weight, ss);
1480 }
1481
1482 int get_choose_args_positions(crush_choose_arg_map cmap) {
1483 // infer positions from other buckets
1484 for (unsigned j = 0; j < cmap.size; ++j) {
28e407b8
AA
1485 if (cmap.args[j].weight_set_positions) {
1486 return cmap.args[j].weight_set_positions;
c07f9fc5
FG
1487 }
1488 }
1489 return 1;
1490 }
1491
7c673cae
FG
1492 template<typename WeightVector>
1493 void do_rule(int rule, int x, vector<int>& out, int maxout,
1494 const WeightVector& weight,
1495 uint64_t choose_args_index) const {
1496 int rawout[maxout];
1497 char work[crush_work_size(crush, maxout)];
1498 crush_init_workspace(crush, work);
c07f9fc5
FG
1499 crush_choose_arg_map arg_map = choose_args_get_with_fallback(
1500 choose_args_index);
7c673cae
FG
1501 int numrep = crush_do_rule(crush, rule, x, rawout, maxout, &weight[0],
1502 weight.size(), work, arg_map.args);
1503 if (numrep < 0)
1504 numrep = 0;
1505 out.resize(numrep);
1506 for (int i=0; i<numrep; i++)
1507 out[i] = rawout[i];
1508 }
1509
1510 int _choose_type_stack(
1511 CephContext *cct,
1512 const vector<pair<int,int>>& stack,
1513 const set<int>& overfull,
1514 const vector<int>& underfull,
1515 const vector<int>& orig,
1516 vector<int>::const_iterator& i,
1517 set<int>& used,
1518 vector<int> *pw) const;
1519
1520 int try_remap_rule(
1521 CephContext *cct,
1522 int rule,
1523 int maxout,
1524 const set<int>& overfull,
1525 const vector<int>& underfull,
1526 const vector<int>& orig,
1527 vector<int> *out) const;
1528
1529 bool check_crush_rule(int ruleset, int type, int size, ostream& ss) {
1530 assert(crush);
1531
1532 __u32 i;
1533 for (i = 0; i < crush->max_rules; i++) {
1534 if (crush->rules[i] &&
1535 crush->rules[i]->mask.ruleset == ruleset &&
1536 crush->rules[i]->mask.type == type) {
1537
1538 if (crush->rules[i]->mask.min_size <= size &&
1539 crush->rules[i]->mask.max_size >= size) {
1540 return true;
1541 } else if (size < crush->rules[i]->mask.min_size) {
1542 ss << "pool size is smaller than the crush rule min size";
1543 return false;
1544 } else {
1545 ss << "pool size is bigger than the crush rule max size";
1546 return false;
1547 }
1548 }
1549 }
1550
1551 return false;
1552 }
1553
1554 void encode(bufferlist &bl, uint64_t features) const;
1555 void decode(bufferlist::iterator &blp);
1556 void decode_crush_bucket(crush_bucket** bptr, bufferlist::iterator &blp);
1557 void dump(Formatter *f) const;
1558 void dump_rules(Formatter *f) const;
1559 void dump_rule(int ruleset, Formatter *f) const;
1560 void dump_tunables(Formatter *f) const;
1561 void dump_choose_args(Formatter *f) const;
1562 void list_rules(Formatter *f) const;
c07f9fc5
FG
1563 void list_rules(ostream *ss) const;
1564 void dump_tree(ostream *out,
1565 Formatter *f,
1566 const CrushTreeDumper::name_map_t& ws,
1567 bool show_shadow = false) const;
1568 void dump_tree(ostream *out, Formatter *f) {
1569 dump_tree(out, f, CrushTreeDumper::name_map_t());
1570 }
1571 void dump_tree(Formatter *f,
1572 const CrushTreeDumper::name_map_t& ws) const;
7c673cae
FG
1573 static void generate_test_instances(list<CrushWrapper*>& o);
1574
7c673cae
FG
1575 int get_osd_pool_default_crush_replicated_ruleset(CephContext *cct);
1576
1577 static bool is_valid_crush_name(const string& s);
1578 static bool is_valid_crush_loc(CephContext *cct,
1579 const map<string,string>& loc);
1580};
1581WRITE_CLASS_ENCODER_FEATURES(CrushWrapper)
1582
1583#endif