]> git.proxmox.com Git - ceph.git/blame - ceph/src/crush/CrushWrapper.h
import new upstream nautilus stable release 14.2.8
[ceph.git] / ceph / src / crush / CrushWrapper.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3
4#ifndef CEPH_CRUSH_WRAPPER_H
5#define CEPH_CRUSH_WRAPPER_H
6
7#include <stdlib.h>
8#include <map>
9#include <set>
10#include <string>
11
12#include <iosfwd>
13
14#include "include/types.h"
15
16extern "C" {
17#include "crush.h"
18#include "hash.h"
19#include "mapper.h"
20#include "builder.h"
21}
22
11fdf7f2 23#include "include/ceph_assert.h"
7c673cae
FG
24#include "include/err.h"
25#include "include/encoding.h"
c07f9fc5 26#include "include/mempool.h"
7c673cae
FG
27
28#include "common/Mutex.h"
29
7c673cae
FG
30namespace ceph {
31 class Formatter;
32}
33
c07f9fc5
FG
34namespace CrushTreeDumper {
35 typedef mempool::osdmap::map<int64_t,string> name_map_t;
36}
37
7c673cae
FG
38WRITE_RAW_ENCODER(crush_rule_mask) // it's all u8's
39
11fdf7f2 40inline void encode(const crush_rule_step &s, bufferlist &bl)
7c673cae 41{
11fdf7f2
TL
42 using ceph::encode;
43 encode(s.op, bl);
44 encode(s.arg1, bl);
45 encode(s.arg2, bl);
7c673cae 46}
11fdf7f2 47inline void decode(crush_rule_step &s, bufferlist::const_iterator &p)
7c673cae 48{
11fdf7f2
TL
49 using ceph::decode;
50 decode(s.op, p);
51 decode(s.arg1, p);
52 decode(s.arg2, p);
7c673cae
FG
53}
54
7c673cae
FG
55class CrushWrapper {
56public:
c07f9fc5
FG
57 // magic value used by OSDMap for a "default" fallback choose_args, used if
58 // the choose_arg_map passed to do_rule does not exist. if this also
59 // doesn't exist, fall back to canonical weights.
60 enum {
61 DEFAULT_CHOOSE_ARGS = -1
62 };
63
7c673cae
FG
64 std::map<int32_t, string> type_map; /* bucket/device type names */
65 std::map<int32_t, string> name_map; /* bucket/device names */
66 std::map<int32_t, string> rule_name_map;
d2e6a577 67
7c673cae
FG
68 std::map<int32_t, int32_t> class_map; /* item id -> class id */
69 std::map<int32_t, string> class_name; /* class id -> class name */
70 std::map<string, int32_t> class_rname; /* class name -> class id */
71 std::map<int32_t, map<int32_t, int32_t> > class_bucket; /* bucket[id][class] == id */
c07f9fc5 72 std::map<int64_t, crush_choose_arg_map> choose_args;
7c673cae
FG
73
74private:
28e407b8 75 struct crush_map *crush = nullptr;
31f18b77
FG
76
77 bool have_uniform_rules = false;
78
7c673cae 79 /* reverse maps */
28e407b8 80 mutable bool have_rmaps = false;
7c673cae
FG
81 mutable std::map<string, int> type_rmap, name_rmap, rule_name_rmap;
82 void build_rmaps() const {
83 if (have_rmaps) return;
84 build_rmap(type_map, type_rmap);
85 build_rmap(name_map, name_rmap);
86 build_rmap(rule_name_map, rule_name_rmap);
87 have_rmaps = true;
88 }
89 void build_rmap(const map<int, string> &f, std::map<string, int> &r) const {
90 r.clear();
91 for (std::map<int, string>::const_iterator p = f.begin(); p != f.end(); ++p)
92 r[p->second] = p->first;
93 }
94
95public:
96 CrushWrapper(const CrushWrapper& other);
97 const CrushWrapper& operator=(const CrushWrapper& other);
98
28e407b8 99 CrushWrapper() {
7c673cae
FG
100 create();
101 }
102 ~CrushWrapper() {
103 if (crush)
104 crush_destroy(crush);
105 choose_args_clear();
106 }
107
108 crush_map *get_crush_map() { return crush; }
109
110 /* building */
111 void create() {
112 if (crush)
113 crush_destroy(crush);
114 crush = crush_create();
115 choose_args_clear();
11fdf7f2 116 ceph_assert(crush);
7c673cae
FG
117 have_rmaps = false;
118
119 set_tunables_default();
120 }
121
3efd9988
FG
122 /**
123 * true if any rule has a rule id != its position in the array
124 *
125 * These indicate "ruleset" IDs that were created by older versions
126 * of Ceph. They are cleaned up in renumber_rules so that eventually
127 * we can remove the code for handling them.
128 */
129 bool has_legacy_rule_ids() const;
31f18b77 130
3efd9988
FG
131 /**
132 * fix rules whose ruleid != ruleset
133 *
134 * These rules were created in older versions of Ceph. The concept
135 * of a ruleset no longer exists.
136 *
137 * Return a map of old ID -> new ID. Caller must update OSDMap
138 * to use new IDs.
139 */
140 std::map<int, int> renumber_rules();
31f18b77 141
c07f9fc5
FG
142 /// true if any buckets that aren't straw2
143 bool has_non_straw2_buckets() const;
144
7c673cae
FG
145 // tunables
146 void set_tunables_argonaut() {
147 crush->choose_local_tries = 2;
148 crush->choose_local_fallback_tries = 5;
149 crush->choose_total_tries = 19;
150 crush->chooseleaf_descend_once = 0;
151 crush->chooseleaf_vary_r = 0;
152 crush->chooseleaf_stable = 0;
153 crush->allowed_bucket_algs = CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
154 }
155 void set_tunables_bobtail() {
156 crush->choose_local_tries = 0;
157 crush->choose_local_fallback_tries = 0;
158 crush->choose_total_tries = 50;
159 crush->chooseleaf_descend_once = 1;
160 crush->chooseleaf_vary_r = 0;
161 crush->chooseleaf_stable = 0;
162 crush->allowed_bucket_algs = CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
163 }
164 void set_tunables_firefly() {
165 crush->choose_local_tries = 0;
166 crush->choose_local_fallback_tries = 0;
167 crush->choose_total_tries = 50;
168 crush->chooseleaf_descend_once = 1;
169 crush->chooseleaf_vary_r = 1;
170 crush->chooseleaf_stable = 0;
171 crush->allowed_bucket_algs = CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
172 }
173 void set_tunables_hammer() {
174 crush->choose_local_tries = 0;
175 crush->choose_local_fallback_tries = 0;
176 crush->choose_total_tries = 50;
177 crush->chooseleaf_descend_once = 1;
178 crush->chooseleaf_vary_r = 1;
179 crush->chooseleaf_stable = 0;
180 crush->allowed_bucket_algs =
181 (1 << CRUSH_BUCKET_UNIFORM) |
182 (1 << CRUSH_BUCKET_LIST) |
183 (1 << CRUSH_BUCKET_STRAW) |
184 (1 << CRUSH_BUCKET_STRAW2);
185 }
186 void set_tunables_jewel() {
187 crush->choose_local_tries = 0;
188 crush->choose_local_fallback_tries = 0;
189 crush->choose_total_tries = 50;
190 crush->chooseleaf_descend_once = 1;
191 crush->chooseleaf_vary_r = 1;
192 crush->chooseleaf_stable = 1;
193 crush->allowed_bucket_algs =
194 (1 << CRUSH_BUCKET_UNIFORM) |
195 (1 << CRUSH_BUCKET_LIST) |
196 (1 << CRUSH_BUCKET_STRAW) |
197 (1 << CRUSH_BUCKET_STRAW2);
198 }
199
200 void set_tunables_legacy() {
201 set_tunables_argonaut();
202 crush->straw_calc_version = 0;
203 }
204 void set_tunables_optimal() {
205 set_tunables_jewel();
206 crush->straw_calc_version = 1;
207 }
208 void set_tunables_default() {
31f18b77 209 set_tunables_jewel();
7c673cae
FG
210 crush->straw_calc_version = 1;
211 }
212
213 int get_choose_local_tries() const {
214 return crush->choose_local_tries;
215 }
216 void set_choose_local_tries(int n) {
217 crush->choose_local_tries = n;
218 }
219
220 int get_choose_local_fallback_tries() const {
221 return crush->choose_local_fallback_tries;
222 }
223 void set_choose_local_fallback_tries(int n) {
224 crush->choose_local_fallback_tries = n;
225 }
226
227 int get_choose_total_tries() const {
228 return crush->choose_total_tries;
229 }
230 void set_choose_total_tries(int n) {
231 crush->choose_total_tries = n;
232 }
233
234 int get_chooseleaf_descend_once() const {
235 return crush->chooseleaf_descend_once;
236 }
237 void set_chooseleaf_descend_once(int n) {
238 crush->chooseleaf_descend_once = !!n;
239 }
240
241 int get_chooseleaf_vary_r() const {
242 return crush->chooseleaf_vary_r;
243 }
244 void set_chooseleaf_vary_r(int n) {
245 crush->chooseleaf_vary_r = n;
246 }
247
248 int get_chooseleaf_stable() const {
249 return crush->chooseleaf_stable;
250 }
251 void set_chooseleaf_stable(int n) {
252 crush->chooseleaf_stable = n;
253 }
254
255 int get_straw_calc_version() const {
256 return crush->straw_calc_version;
257 }
258 void set_straw_calc_version(int n) {
259 crush->straw_calc_version = n;
260 }
261
262 unsigned get_allowed_bucket_algs() const {
263 return crush->allowed_bucket_algs;
264 }
265 void set_allowed_bucket_algs(unsigned n) {
266 crush->allowed_bucket_algs = n;
267 }
268
269 bool has_argonaut_tunables() const {
270 return
271 crush->choose_local_tries == 2 &&
272 crush->choose_local_fallback_tries == 5 &&
273 crush->choose_total_tries == 19 &&
274 crush->chooseleaf_descend_once == 0 &&
275 crush->chooseleaf_vary_r == 0 &&
276 crush->chooseleaf_stable == 0 &&
277 crush->allowed_bucket_algs == CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
278 }
279 bool has_bobtail_tunables() const {
280 return
281 crush->choose_local_tries == 0 &&
282 crush->choose_local_fallback_tries == 0 &&
283 crush->choose_total_tries == 50 &&
284 crush->chooseleaf_descend_once == 1 &&
285 crush->chooseleaf_vary_r == 0 &&
286 crush->chooseleaf_stable == 0 &&
287 crush->allowed_bucket_algs == CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
288 }
289 bool has_firefly_tunables() const {
290 return
291 crush->choose_local_tries == 0 &&
292 crush->choose_local_fallback_tries == 0 &&
293 crush->choose_total_tries == 50 &&
294 crush->chooseleaf_descend_once == 1 &&
295 crush->chooseleaf_vary_r == 1 &&
296 crush->chooseleaf_stable == 0 &&
297 crush->allowed_bucket_algs == CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
298 }
299 bool has_hammer_tunables() const {
300 return
301 crush->choose_local_tries == 0 &&
302 crush->choose_local_fallback_tries == 0 &&
303 crush->choose_total_tries == 50 &&
304 crush->chooseleaf_descend_once == 1 &&
305 crush->chooseleaf_vary_r == 1 &&
306 crush->chooseleaf_stable == 0 &&
307 crush->allowed_bucket_algs == ((1 << CRUSH_BUCKET_UNIFORM) |
308 (1 << CRUSH_BUCKET_LIST) |
309 (1 << CRUSH_BUCKET_STRAW) |
310 (1 << CRUSH_BUCKET_STRAW2));
311 }
312 bool has_jewel_tunables() const {
313 return
314 crush->choose_local_tries == 0 &&
315 crush->choose_local_fallback_tries == 0 &&
316 crush->choose_total_tries == 50 &&
317 crush->chooseleaf_descend_once == 1 &&
318 crush->chooseleaf_vary_r == 1 &&
319 crush->chooseleaf_stable == 1 &&
320 crush->allowed_bucket_algs == ((1 << CRUSH_BUCKET_UNIFORM) |
321 (1 << CRUSH_BUCKET_LIST) |
322 (1 << CRUSH_BUCKET_STRAW) |
323 (1 << CRUSH_BUCKET_STRAW2));
324 }
325
326 bool has_optimal_tunables() const {
327 return has_jewel_tunables();
328 }
329 bool has_legacy_tunables() const {
330 return has_argonaut_tunables();
331 }
332
333 bool has_nondefault_tunables() const {
334 return
335 (crush->choose_local_tries != 2 ||
336 crush->choose_local_fallback_tries != 5 ||
337 crush->choose_total_tries != 19);
338 }
339 bool has_nondefault_tunables2() const {
340 return
341 crush->chooseleaf_descend_once != 0;
342 }
343 bool has_nondefault_tunables3() const {
344 return
345 crush->chooseleaf_vary_r != 0;
346 }
347 bool has_nondefault_tunables5() const {
348 return
349 crush->chooseleaf_stable != 0;
350 }
351
352 bool has_v2_rules() const;
353 bool has_v3_rules() const;
354 bool has_v4_buckets() const;
355 bool has_v5_rules() const;
31f18b77
FG
356 bool has_choose_args() const; // any choose_args
357 bool has_incompat_choose_args() const; // choose_args that can't be made compat
7c673cae
FG
358
359 bool is_v2_rule(unsigned ruleid) const;
360 bool is_v3_rule(unsigned ruleid) const;
361 bool is_v5_rule(unsigned ruleid) const;
362
363 string get_min_required_version() const {
364 if (has_v5_rules() || has_nondefault_tunables5())
365 return "jewel";
366 else if (has_v4_buckets())
367 return "hammer";
368 else if (has_nondefault_tunables3())
369 return "firefly";
370 else if (has_nondefault_tunables2() || has_nondefault_tunables())
371 return "bobtail";
372 else
373 return "argonaut";
374 }
375
376 // default bucket types
377 unsigned get_default_bucket_alg() const {
378 // in order of preference
379 if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_STRAW2))
380 return CRUSH_BUCKET_STRAW2;
381 if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_STRAW))
382 return CRUSH_BUCKET_STRAW;
383 if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_TREE))
384 return CRUSH_BUCKET_TREE;
385 if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_LIST))
386 return CRUSH_BUCKET_LIST;
387 if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_UNIFORM))
388 return CRUSH_BUCKET_UNIFORM;
389 return 0;
390 }
391
392 // bucket types
393 int get_num_type_names() const {
394 return type_map.size();
395 }
31f18b77
FG
396 int get_max_type_id() const {
397 if (type_map.empty())
398 return 0;
399 return type_map.rbegin()->first;
400 }
7c673cae
FG
401 int get_type_id(const string& name) const {
402 build_rmaps();
403 if (type_rmap.count(name))
404 return type_rmap[name];
405 return -1;
406 }
407 const char *get_type_name(int t) const {
408 std::map<int,string>::const_iterator p = type_map.find(t);
409 if (p != type_map.end())
410 return p->second.c_str();
411 return 0;
412 }
413 void set_type_name(int i, const string& name) {
414 type_map[i] = name;
415 if (have_rmaps)
416 type_rmap[name] = i;
417 }
418
419 // item/bucket names
420 bool name_exists(const string& name) const {
421 build_rmaps();
422 return name_rmap.count(name);
423 }
424 bool item_exists(int i) const {
425 return name_map.count(i);
426 }
427 int get_item_id(const string& name) const {
428 build_rmaps();
429 if (name_rmap.count(name))
430 return name_rmap[name];
431 return 0; /* hrm */
432 }
433 const char *get_item_name(int t) const {
434 std::map<int,string>::const_iterator p = name_map.find(t);
435 if (p != name_map.end())
436 return p->second.c_str();
437 return 0;
438 }
439 int set_item_name(int i, const string& name) {
440 if (!is_valid_crush_name(name))
441 return -EINVAL;
442 name_map[i] = name;
443 if (have_rmaps)
444 name_rmap[name] = i;
445 return 0;
446 }
31f18b77
FG
447 void swap_names(int a, int b) {
448 string an = name_map[a];
449 string bn = name_map[b];
450 name_map[a] = bn;
451 name_map[b] = an;
452 if (have_rmaps) {
453 name_rmap[an] = b;
454 name_rmap[bn] = a;
455 }
456 }
7c673cae
FG
457 int split_id_class(int i, int *idout, int *classout) const;
458
459 bool class_exists(const string& name) const {
460 return class_rname.count(name);
461 }
462 const char *get_class_name(int i) const {
224ce89b 463 auto p = class_name.find(i);
7c673cae
FG
464 if (p != class_name.end())
465 return p->second.c_str();
466 return 0;
467 }
468 int get_class_id(const string& name) const {
224ce89b 469 auto p = class_rname.find(name);
7c673cae
FG
470 if (p != class_rname.end())
471 return p->second;
472 else
473 return -EINVAL;
474 }
475 int remove_class_name(const string& name) {
224ce89b 476 auto p = class_rname.find(name);
7c673cae
FG
477 if (p == class_rname.end())
478 return -ENOENT;
479 int class_id = p->second;
224ce89b 480 auto q = class_name.find(class_id);
7c673cae
FG
481 if (q == class_name.end())
482 return -ENOENT;
483 class_rname.erase(name);
484 class_name.erase(class_id);
485 return 0;
486 }
224ce89b 487
224ce89b
WB
488 int32_t _alloc_class_id() const;
489
7c673cae
FG
490 int get_or_create_class_id(const string& name) {
491 int c = get_class_id(name);
492 if (c < 0) {
224ce89b 493 int i = _alloc_class_id();
7c673cae
FG
494 class_name[i] = name;
495 class_rname[name] = i;
496 return i;
497 } else {
498 return c;
499 }
500 }
501
502 const char *get_item_class(int t) const {
503 std::map<int,int>::const_iterator p = class_map.find(t);
504 if (p == class_map.end())
505 return 0;
506 return get_class_name(p->second);
507 }
81eedcae
TL
508 int get_item_class_id(int t) const {
509 auto p = class_map.find(t);
510 if (p == class_map.end())
511 return -ENOENT;
512 return p->second;
513 }
7c673cae
FG
514 int set_item_class(int i, const string& name) {
515 if (!is_valid_crush_name(name))
516 return -EINVAL;
517 class_map[i] = get_or_create_class_id(name);
518 return 0;
519 }
520 int set_item_class(int i, int c) {
521 class_map[i] = c;
522 return c;
523 }
224ce89b 524 void get_devices_by_class(const string &name, set<int> *devices) const {
11fdf7f2 525 ceph_assert(devices);
224ce89b
WB
526 devices->clear();
527 if (!class_exists(name)) {
528 return;
529 }
530 auto cid = get_class_id(name);
531 for (auto& p : class_map) {
532 if (p.first >= 0 && p.second == cid) {
533 devices->insert(p.first);
534 }
535 }
536 }
537 void class_remove_item(int i) {
538 auto it = class_map.find(i);
539 if (it == class_map.end()) {
540 return;
541 }
542 class_map.erase(it);
543 }
7c673cae
FG
544 int can_rename_item(const string& srcname,
545 const string& dstname,
546 ostream *ss) const;
547 int rename_item(const string& srcname,
548 const string& dstname,
549 ostream *ss);
550 int can_rename_bucket(const string& srcname,
551 const string& dstname,
552 ostream *ss) const;
553 int rename_bucket(const string& srcname,
554 const string& dstname,
555 ostream *ss);
556
557 // rule names
b5b8bbf5
FG
558 int rename_rule(const string& srcname,
559 const string& dstname,
560 ostream *ss);
7c673cae
FG
561 bool rule_exists(string name) const {
562 build_rmaps();
563 return rule_name_rmap.count(name);
564 }
565 int get_rule_id(string name) const {
566 build_rmaps();
567 if (rule_name_rmap.count(name))
568 return rule_name_rmap[name];
569 return -ENOENT;
570 }
571 const char *get_rule_name(int t) const {
572 std::map<int,string>::const_iterator p = rule_name_map.find(t);
573 if (p != rule_name_map.end())
574 return p->second.c_str();
575 return 0;
576 }
577 void set_rule_name(int i, const string& name) {
578 rule_name_map[i] = name;
579 if (have_rmaps)
580 rule_name_rmap[name] = i;
581 }
c07f9fc5
FG
582 bool is_shadow_item(int id) const {
583 const char *name = get_item_name(id);
584 return name && !is_valid_crush_name(name);
585 }
7c673cae
FG
586
587
588 /**
589 * find tree nodes referenced by rules by a 'take' command
590 *
591 * Note that these may not be parentless roots.
592 */
3efd9988 593 void find_takes(set<int> *roots) const;
28e407b8 594 void find_takes_by_rule(int rule, set<int> *roots) const;
7c673cae
FG
595
596 /**
597 * find tree roots
598 *
599 * These are parentless nodes in the map.
600 */
3efd9988 601 void find_roots(set<int> *roots) const;
7c673cae 602
c07f9fc5
FG
603
604 /**
605 * find tree roots that contain shadow (device class) items only
606 */
3efd9988 607 void find_shadow_roots(set<int> *roots) const {
c07f9fc5 608 set<int> all;
3efd9988 609 find_roots(&all);
c07f9fc5
FG
610 for (auto& p: all) {
611 if (is_shadow_item(p)) {
3efd9988 612 roots->insert(p);
c07f9fc5
FG
613 }
614 }
615 }
616
224ce89b
WB
617 /**
618 * find tree roots that are not shadow (device class) items
619 *
620 * These are parentless nodes in the map that are not shadow
621 * items for device classes.
622 */
3efd9988 623 void find_nonshadow_roots(set<int> *roots) const {
c07f9fc5 624 set<int> all;
3efd9988 625 find_roots(&all);
c07f9fc5
FG
626 for (auto& p: all) {
627 if (!is_shadow_item(p)) {
3efd9988 628 roots->insert(p);
c07f9fc5
FG
629 }
630 }
631 }
224ce89b 632
7c673cae
FG
633 /**
634 * see if an item is contained within a subtree
635 *
636 * @param root haystack
637 * @param item needle
638 * @return true if the item is located beneath the given node
639 */
640 bool subtree_contains(int root, int item) const;
641
642private:
643 /**
644 * search for an item in any bucket
645 *
646 * @param i item
647 * @return true if present
648 */
649 bool _search_item_exists(int i) const;
11fdf7f2 650 bool is_parent_of(int child, int p) const;
7c673cae
FG
651public:
652
653 /**
654 * see if item is located where we think it is
655 *
656 * This verifies that the given item is located at a particular
657 * location in the hierarchy. However, that check is imprecise; we
658 * are actually verifying that the most specific location key/value
659 * is correct. For example, if loc specifies that rack=foo and
660 * host=bar, it will verify that host=bar is correct; any placement
661 * above that level in the hierarchy is ignored. This matches the
662 * semantics for insert_item().
663 *
664 * @param cct cct
665 * @param item item id
666 * @param loc location to check (map of type to bucket names)
667 * @param weight optional pointer to weight of item at that location
668 * @return true if item is at specified location
669 */
670 bool check_item_loc(CephContext *cct, int item, const map<string,string>& loc, int *iweight);
671 bool check_item_loc(CephContext *cct, int item, const map<string,string>& loc, float *weight) {
672 int iweight;
673 bool ret = check_item_loc(cct, item, loc, &iweight);
674 if (weight)
675 *weight = (float)iweight / (float)0x10000;
676 return ret;
677 }
678
679
680 /**
681 * returns the (type, name) of the parent bucket of id
682 *
683 * FIXME: ambiguous for items that occur multiple times in the map
684 */
11fdf7f2 685 pair<string,string> get_immediate_parent(int id, int *ret = NULL) const;
c07f9fc5 686
7c673cae
FG
687 int get_immediate_parent_id(int id, int *parent) const;
688
31f18b77
FG
689 /**
690 * return ancestor of the given type, or 0 if none
28e407b8 691 * can pass in a specific crush **rule** to return ancestor from that rule only
31f18b77
FG
692 * (parent is always a bucket and thus <0)
693 */
28e407b8 694 int get_parent_of_type(int id, int type, int rule = -1) const;
31f18b77 695
7c673cae
FG
696 /**
697 * get the fully qualified location of a device by successively finding
698 * parents beginning at ID and ending at highest type number specified in
699 * the CRUSH map which assumes that if device foo is under device bar, the
700 * type_id of foo < bar where type_id is the integer specified in the CRUSH map
701 *
702 * returns the location in the form of (type=foo) where type is a type of bucket
703 * specified in the CRUSH map and foo is a name specified in the CRUSH map
704 */
11fdf7f2
TL
705 map<string, string> get_full_location(int id) const;
706
707 /**
708 * return location map for a item, by name
709 */
710 int get_full_location(
711 const string& name,
712 std::map<string,string> *ploc);
7c673cae
FG
713
714 /*
715 * identical to get_full_location(int id) although it returns the type/name
716 * pairs in the order they occur in the hierarchy.
717 *
718 * returns -ENOENT if id is not found.
719 */
11fdf7f2 720 int get_full_location_ordered(int id, vector<pair<string, string> >& path) const;
7c673cae 721
31f18b77
FG
722 /*
723 * identical to get_full_location_ordered(int id, vector<pair<string, string> >& path),
724 * although it returns a concatenated string with the type/name pairs in descending
725 * hierarchical order with format key1=val1,key2=val2.
726 *
727 * returns the location in descending hierarchy as a string.
728 */
11fdf7f2 729 string get_full_location_ordered_string(int id) const;
31f18b77 730
7c673cae
FG
731 /**
732 * returns (type_id, type) of all parent buckets between id and
11fdf7f2 733 * default, can be used to check for anomalous CRUSH maps
7c673cae 734 */
11fdf7f2 735 map<int, string> get_parent_hierarchy(int id) const;
7c673cae
FG
736
737 /**
738 * enumerate immediate children of given node
739 *
740 * @param id parent bucket or device id
741 * @return number of items, or error
742 */
11fdf7f2
TL
743 int get_children(int id, list<int> *children) const;
744 /**
745 * enumerate all children of given node
746 *
747 * @param id parent bucket or device id
748 * @return number of items, or error
749 */
750 int get_all_children(int id, set<int> *children) const;
28e407b8
AA
751 void get_children_of_type(int id,
752 int type,
11fdf7f2 753 vector<int> *children,
28e407b8 754 bool exclude_shadow = true) const;
11fdf7f2
TL
755 /**
756 * enumerate all subtrees by type
757 */
758 void get_subtree_of_type(int type, vector<int> *subtrees);
7c673cae 759
a8e16298 760
94b18763 761 /**
a8e16298
TL
762 * verify upmapping results.
763 * return 0 on success or a negative errno on error.
764 */
765 int verify_upmap(CephContext *cct,
766 int rule_id,
767 int pool_size,
768 const vector<int>& up);
94b18763 769
31f18b77
FG
770 /**
771 * enumerate leaves(devices) of given node
772 *
773 * @param name parent bucket name
774 * @return 0 on success or a negative errno on error.
775 */
11fdf7f2
TL
776 int get_leaves(const string &name, set<int> *leaves) const;
777
778private:
779 int _get_leaves(int id, list<int> *leaves) const; // worker
31f18b77 780
11fdf7f2 781public:
7c673cae
FG
782 /**
783 * insert an item into the map at a specific position
784 *
785 * Add an item as a specific location of the hierarchy.
786 * Specifically, we look for the most specific location constraint
787 * for which a bucket already exists, and then create intervening
788 * buckets beneath that in order to place the item.
789 *
790 * Note that any location specifiers *above* the most specific match
791 * are ignored. For example, if we specify that osd.12 goes in
792 * host=foo, rack=bar, and row=baz, and rack=bar is the most
793 * specific match, we will create host=foo beneath that point and
794 * put osd.12 inside it. However, we will not verify that rack=bar
795 * is beneath row=baz or move it.
796 *
797 * In short, we will build out a hierarchy, and move leaves around,
798 * but not adjust the hierarchy's internal structure. Yet.
799 *
800 * If the item is already present in the map, we will return EEXIST.
801 * If the location key/value pairs are nonsensical
802 * (rack=nameofdevice), or location specifies that do not attach us
803 * to any existing part of the hierarchy, we will return EINVAL.
804 *
805 * @param cct cct
806 * @param id item id
807 * @param weight item weight
808 * @param name item name
809 * @param loc location (map of type to bucket names)
11fdf7f2 810 * @param init_weight_sets initialize weight-set weights to weight (vs 0)
7c673cae
FG
811 * @return 0 for success, negative on error
812 */
11fdf7f2
TL
813 int insert_item(CephContext *cct, int id, float weight, string name,
814 const map<string,string>& loc,
815 bool init_weight_sets=true);
7c673cae
FG
816
817 /**
818 * move a bucket in the hierarchy to the given location
819 *
820 * This has the same location and ancestor creation behavior as
821 * insert_item(), but will relocate the specified existing bucket.
822 *
823 * @param cct cct
824 * @param id bucket id
825 * @param loc location (map of type to bucket names)
826 * @return 0 for success, negative on error
827 */
828 int move_bucket(CephContext *cct, int id, const map<string,string>& loc);
829
31f18b77
FG
830 /**
831 * swap bucket contents of two buckets without touching bucket ids
832 *
833 * @param cct cct
834 * @param src bucket a
835 * @param dst bucket b
836 * @return 0 for success, negative on error
837 */
838 int swap_bucket(CephContext *cct, int src, int dst);
839
7c673cae
FG
840 /**
841 * add a link to an existing bucket in the hierarchy to the new location
842 *
843 * This has the same location and ancestor creation behavior as
844 * insert_item(), but will add a new link to the specified existing
845 * bucket.
846 *
847 * @param cct cct
848 * @param id bucket id
849 * @param loc location (map of type to bucket names)
850 * @return 0 for success, negative on error
851 */
852 int link_bucket(CephContext *cct, int id, const map<string,string>& loc);
853
854 /**
855 * add or update an item's position in the map
856 *
857 * This is analogous to insert_item, except we will move an item if
858 * it is already present.
859 *
860 * @param cct cct
861 * @param id item id
862 * @param weight item weight
863 * @param name item name
864 * @param loc location (map of type to bucket names)
865 * @return 0 for no change, 1 for successful change, negative on error
866 */
867 int update_item(CephContext *cct, int id, float weight, string name, const map<string,string>& loc);
868
869 /**
870 * create or move an item, but do not adjust its weight if it already exists
871 *
872 * @param cct cct
873 * @param item item id
874 * @param weight initial item weight (if we need to create it)
875 * @param name item name
876 * @param loc location (map of type to bucket names)
11fdf7f2 877 * @param init_weight_sets initialize weight-set values to weight (vs 0)
7c673cae
FG
878 * @return 0 for no change, 1 for successful change, negative on error
879 */
880 int create_or_move_item(CephContext *cct, int item, float weight, string name,
11fdf7f2
TL
881 const map<string,string>& loc,
882 bool init_weight_sets=true);
7c673cae
FG
883
884 /**
885 * remove all instances of an item from the map
886 *
887 * @param cct cct
888 * @param id item id to remove
889 * @param unlink_only unlink but do not remove bucket (useful if multiple links or not empty)
890 * @return 0 on success, negative on error
891 */
892 int remove_item(CephContext *cct, int id, bool unlink_only);
893
894 /**
895 * recursively remove buckets starting at item and stop removing
896 * when a bucket is in use.
897 *
898 * @param item id to remove
7c673cae
FG
899 * @return 0 on success, negative on error
900 */
11fdf7f2 901 int remove_root(CephContext *cct, int item);
7c673cae
FG
902
903 /**
904 * remove all instances of an item nested beneath a certain point from the map
905 *
906 * @param cct cct
907 * @param id item id to remove
908 * @param ancestor ancestor item id under which to search for id
909 * @param unlink_only unlink but do not remove bucket (useful if bucket has multiple links or is not empty)
910 * @return 0 on success, negative on error
911 */
912private:
913 bool _maybe_remove_last_instance(CephContext *cct, int id, bool unlink_only);
914 int _remove_item_under(CephContext *cct, int id, int ancestor, bool unlink_only);
915 bool _bucket_is_in_use(int id);
916public:
917 int remove_item_under(CephContext *cct, int id, int ancestor, bool unlink_only);
918
919 /**
920 * calculate the locality/distance from a given id to a crush location map
921 *
922 * Specifically, we look for the lowest-valued type for which the
923 * location of id matches that described in loc.
924 *
925 * @param cct cct
926 * @param id the existing id in the map
927 * @param loc a set of key=value pairs describing a location in the hierarchy
928 */
929 int get_common_ancestor_distance(CephContext *cct, int id,
11fdf7f2 930 const std::multimap<string,string>& loc) const;
7c673cae
FG
931
932 /**
933 * parse a set of key/value pairs out of a string vector
934 *
935 * These are used to describe a location in the CRUSH hierarchy.
936 *
937 * @param args list of strings (each key= or key=value)
938 * @param ploc pointer to a resulting location map or multimap
939 */
940 static int parse_loc_map(const std::vector<string>& args,
941 std::map<string,string> *ploc);
942 static int parse_loc_multimap(const std::vector<string>& args,
943 std::multimap<string,string> *ploc);
944
11fdf7f2 945
7c673cae
FG
946 /**
947 * get an item's weight
948 *
949 * Will return the weight for the first instance it finds.
950 *
951 * @param id item id to check
952 * @return weight of item
953 */
954 int get_item_weight(int id) const;
955 float get_item_weightf(int id) const {
956 return (float)get_item_weight(id) / (float)0x10000;
957 }
958 int get_item_weight_in_loc(int id, const map<string,string> &loc);
959 float get_item_weightf_in_loc(int id, const map<string,string> &loc) {
960 return (float)get_item_weight_in_loc(id, loc) / (float)0x10000;
961 }
962
224ce89b
WB
963 int validate_weightf(float weight) {
964 uint64_t iweight = weight * 0x10000;
965 if (iweight > std::numeric_limits<int>::max()) {
966 return -EOVERFLOW;
967 }
968 return 0;
969 }
11fdf7f2
TL
970 int adjust_item_weight(CephContext *cct, int id, int weight,
971 bool update_weight_sets=true);
972 int adjust_item_weightf(CephContext *cct, int id, float weight,
973 bool update_weight_sets=true) {
224ce89b
WB
974 int r = validate_weightf(weight);
975 if (r < 0) {
976 return r;
977 }
11fdf7f2
TL
978 return adjust_item_weight(cct, id, (int)(weight * (float)0x10000),
979 update_weight_sets);
980 }
981 int adjust_item_weight_in_bucket(CephContext *cct, int id, int weight,
982 int bucket_id,
983 bool update_weight_sets);
984 int adjust_item_weight_in_loc(CephContext *cct, int id, int weight,
985 const map<string,string>& loc,
986 bool update_weight_sets=true);
987 int adjust_item_weightf_in_loc(CephContext *cct, int id, float weight,
988 const map<string,string>& loc,
989 bool update_weight_sets=true) {
224ce89b
WB
990 int r = validate_weightf(weight);
991 if (r < 0) {
992 return r;
993 }
11fdf7f2
TL
994 return adjust_item_weight_in_loc(cct, id, (int)(weight * (float)0x10000),
995 loc, update_weight_sets);
7c673cae
FG
996 }
997 void reweight(CephContext *cct);
f64942e4
AA
998 void reweight_bucket(crush_bucket *b,
999 crush_choose_arg_map& arg_map,
1000 vector<uint32_t> *weightv);
7c673cae 1001
11fdf7f2
TL
1002 int adjust_subtree_weight(CephContext *cct, int id, int weight,
1003 bool update_weight_sets=true);
1004 int adjust_subtree_weightf(CephContext *cct, int id, float weight,
1005 bool update_weight_sets=true) {
224ce89b
WB
1006 int r = validate_weightf(weight);
1007 if (r < 0) {
1008 return r;
1009 }
11fdf7f2
TL
1010 return adjust_subtree_weight(cct, id, (int)(weight * (float)0x10000),
1011 update_weight_sets);
7c673cae
FG
1012 }
1013
1014 /// check if item id is present in the map hierarchy
1015 bool check_item_present(int id) const;
1016
1017
1018 /*** devices ***/
1019 int get_max_devices() const {
1020 if (!crush) return 0;
1021 return crush->max_devices;
1022 }
1023
1024
1025 /*** rules ***/
1026private:
1027 crush_rule *get_rule(unsigned ruleno) const {
1028 if (!crush) return (crush_rule *)(-ENOENT);
1029 if (ruleno >= crush->max_rules)
1030 return 0;
1031 return crush->rules[ruleno];
1032 }
1033 crush_rule_step *get_rule_step(unsigned ruleno, unsigned step) const {
1034 crush_rule *n = get_rule(ruleno);
1035 if (IS_ERR(n)) return (crush_rule_step *)(-EINVAL);
1036 if (step >= n->len) return (crush_rule_step *)(-EINVAL);
1037 return &n->steps[step];
1038 }
1039
1040public:
1041 /* accessors */
1042 int get_max_rules() const {
1043 if (!crush) return 0;
1044 return crush->max_rules;
1045 }
1046 bool rule_exists(unsigned ruleno) const {
1047 if (!crush) return false;
1048 if (ruleno < crush->max_rules &&
1049 crush->rules[ruleno] != NULL)
1050 return true;
1051 return false;
1052 }
3efd9988
FG
1053 bool rule_has_take(unsigned ruleno, int take) const {
1054 if (!crush) return false;
1055 crush_rule *rule = get_rule(ruleno);
1056 for (unsigned i = 0; i < rule->len; ++i) {
1057 if (rule->steps[i].op == CRUSH_RULE_TAKE &&
1058 rule->steps[i].arg1 == take) {
1059 return true;
1060 }
1061 }
1062 return false;
1063 }
7c673cae
FG
1064 int get_rule_len(unsigned ruleno) const {
1065 crush_rule *r = get_rule(ruleno);
1066 if (IS_ERR(r)) return PTR_ERR(r);
1067 return r->len;
1068 }
1069 int get_rule_mask_ruleset(unsigned ruleno) const {
1070 crush_rule *r = get_rule(ruleno);
1071 if (IS_ERR(r)) return -1;
1072 return r->mask.ruleset;
1073 }
1074 int get_rule_mask_type(unsigned ruleno) const {
1075 crush_rule *r = get_rule(ruleno);
1076 if (IS_ERR(r)) return -1;
1077 return r->mask.type;
1078 }
1079 int get_rule_mask_min_size(unsigned ruleno) const {
1080 crush_rule *r = get_rule(ruleno);
1081 if (IS_ERR(r)) return -1;
1082 return r->mask.min_size;
1083 }
1084 int get_rule_mask_max_size(unsigned ruleno) const {
1085 crush_rule *r = get_rule(ruleno);
1086 if (IS_ERR(r)) return -1;
1087 return r->mask.max_size;
1088 }
1089 int get_rule_op(unsigned ruleno, unsigned step) const {
1090 crush_rule_step *s = get_rule_step(ruleno, step);
1091 if (IS_ERR(s)) return PTR_ERR(s);
1092 return s->op;
1093 }
1094 int get_rule_arg1(unsigned ruleno, unsigned step) const {
1095 crush_rule_step *s = get_rule_step(ruleno, step);
1096 if (IS_ERR(s)) return PTR_ERR(s);
1097 return s->arg1;
1098 }
1099 int get_rule_arg2(unsigned ruleno, unsigned step) const {
1100 crush_rule_step *s = get_rule_step(ruleno, step);
1101 if (IS_ERR(s)) return PTR_ERR(s);
1102 return s->arg2;
1103 }
1104
3efd9988
FG
1105private:
1106 float _get_take_weight_osd_map(int root, map<int,float> *pmap) const;
1107 void _normalize_weight_map(float sum, const map<int,float>& m,
1108 map<int,float> *pmap) const;
1109
1110public:
7c673cae
FG
1111 /**
1112 * calculate a map of osds to weights for a given rule
1113 *
1114 * Generate a map of which OSDs get how much relative weight for a
1115 * given rule.
1116 *
1117 * @param ruleno [in] rule id
1118 * @param pmap [out] map of osd to weight
1119 * @return 0 for success, or negative error code
1120 */
3efd9988
FG
1121 int get_rule_weight_osd_map(unsigned ruleno, map<int,float> *pmap) const;
1122
1123 /**
1124 * calculate a map of osds to weights for a given starting root
1125 *
1126 * Generate a map of which OSDs get how much relative weight for a
1127 * given starting root
1128 *
1129 * @param root node
1130 * @param pmap [out] map of osd to weight
1131 * @return 0 for success, or negative error code
1132 */
1133 int get_take_weight_osd_map(int root, map<int,float> *pmap) const;
7c673cae
FG
1134
1135 /* modifiers */
c07f9fc5
FG
1136
1137 int add_rule(int ruleno, int len, int type, int minsize, int maxsize) {
7c673cae 1138 if (!crush) return -ENOENT;
c07f9fc5 1139 crush_rule *n = crush_make_rule(len, ruleno, type, minsize, maxsize);
11fdf7f2 1140 ceph_assert(n);
7c673cae
FG
1141 ruleno = crush_add_rule(crush, n, ruleno);
1142 return ruleno;
1143 }
1144 int set_rule_mask_max_size(unsigned ruleno, int max_size) {
1145 crush_rule *r = get_rule(ruleno);
1146 if (IS_ERR(r)) return -1;
1147 return r->mask.max_size = max_size;
1148 }
1149 int set_rule_step(unsigned ruleno, unsigned step, int op, int arg1, int arg2) {
1150 if (!crush) return -ENOENT;
1151 crush_rule *n = get_rule(ruleno);
1152 if (!n) return -1;
1153 crush_rule_set_step(n, step, op, arg1, arg2);
1154 return 0;
1155 }
1156 int set_rule_step_take(unsigned ruleno, unsigned step, int val) {
1157 return set_rule_step(ruleno, step, CRUSH_RULE_TAKE, val, 0);
1158 }
1159 int set_rule_step_set_choose_tries(unsigned ruleno, unsigned step, int val) {
1160 return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSE_TRIES, val, 0);
1161 }
1162 int set_rule_step_set_choose_local_tries(unsigned ruleno, unsigned step, int val) {
1163 return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES, val, 0);
1164 }
1165 int set_rule_step_set_choose_local_fallback_tries(unsigned ruleno, unsigned step, int val) {
1166 return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES, val, 0);
1167 }
1168 int set_rule_step_set_chooseleaf_tries(unsigned ruleno, unsigned step, int val) {
1169 return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSELEAF_TRIES, val, 0);
1170 }
1171 int set_rule_step_set_chooseleaf_vary_r(unsigned ruleno, unsigned step, int val) {
1172 return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSELEAF_VARY_R, val, 0);
1173 }
1174 int set_rule_step_set_chooseleaf_stable(unsigned ruleno, unsigned step, int val) {
1175 return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSELEAF_STABLE, val, 0);
1176 }
1177 int set_rule_step_choose_firstn(unsigned ruleno, unsigned step, int val, int type) {
1178 return set_rule_step(ruleno, step, CRUSH_RULE_CHOOSE_FIRSTN, val, type);
1179 }
1180 int set_rule_step_choose_indep(unsigned ruleno, unsigned step, int val, int type) {
1181 return set_rule_step(ruleno, step, CRUSH_RULE_CHOOSE_INDEP, val, type);
1182 }
1183 int set_rule_step_choose_leaf_firstn(unsigned ruleno, unsigned step, int val, int type) {
1184 return set_rule_step(ruleno, step, CRUSH_RULE_CHOOSELEAF_FIRSTN, val, type);
1185 }
1186 int set_rule_step_choose_leaf_indep(unsigned ruleno, unsigned step, int val, int type) {
1187 return set_rule_step(ruleno, step, CRUSH_RULE_CHOOSELEAF_INDEP, val, type);
1188 }
1189 int set_rule_step_emit(unsigned ruleno, unsigned step) {
1190 return set_rule_step(ruleno, step, CRUSH_RULE_EMIT, 0, 0);
1191 }
1192
31f18b77
FG
1193 int add_simple_rule(
1194 string name, string root_name, string failure_domain_type,
224ce89b 1195 string device_class,
31f18b77
FG
1196 string mode, int rule_type, ostream *err = 0);
1197
7c673cae 1198 /**
31f18b77 1199 * @param rno rule[set] id to use, -1 to pick the lowest available
7c673cae 1200 */
31f18b77
FG
1201 int add_simple_rule_at(
1202 string name, string root_name,
224ce89b 1203 string failure_domain_type, string device_class, string mode,
31f18b77 1204 int rule_type, int rno, ostream *err = 0);
7c673cae
FG
1205
1206 int remove_rule(int ruleno);
1207
1208
1209 /** buckets **/
7c673cae
FG
1210 const crush_bucket *get_bucket(int id) const {
1211 if (!crush)
1212 return (crush_bucket *)(-EINVAL);
1213 unsigned int pos = (unsigned int)(-1 - id);
1214 unsigned int max_buckets = crush->max_buckets;
1215 if (pos >= max_buckets)
1216 return (crush_bucket *)(-ENOENT);
1217 crush_bucket *ret = crush->buckets[pos];
1218 if (ret == NULL)
1219 return (crush_bucket *)(-ENOENT);
1220 return ret;
1221 }
c07f9fc5 1222private:
7c673cae
FG
1223 crush_bucket *get_bucket(int id) {
1224 if (!crush)
1225 return (crush_bucket *)(-EINVAL);
1226 unsigned int pos = (unsigned int)(-1 - id);
1227 unsigned int max_buckets = crush->max_buckets;
1228 if (pos >= max_buckets)
1229 return (crush_bucket *)(-ENOENT);
1230 crush_bucket *ret = crush->buckets[pos];
1231 if (ret == NULL)
1232 return (crush_bucket *)(-ENOENT);
1233 return ret;
1234 }
1235 /**
1236 * detach a bucket from its parent and adjust the parent weight
1237 *
1238 * returns the weight of the detached bucket
1239 **/
c07f9fc5 1240 int detach_bucket(CephContext *cct, int item);
7c673cae 1241
f64942e4
AA
1242 int get_new_bucket_id();
1243
7c673cae
FG
1244public:
1245 int get_max_buckets() const {
1246 if (!crush) return -EINVAL;
1247 return crush->max_buckets;
1248 }
1249 int get_next_bucket_id() const {
1250 if (!crush) return -EINVAL;
1251 return crush_get_next_bucket_id(crush);
1252 }
1253 bool bucket_exists(int id) const {
1254 const crush_bucket *b = get_bucket(id);
1255 if (IS_ERR(b))
1256 return false;
1257 return true;
1258 }
1259 int get_bucket_weight(int id) const {
1260 const crush_bucket *b = get_bucket(id);
1261 if (IS_ERR(b)) return PTR_ERR(b);
1262 return b->weight;
1263 }
1264 float get_bucket_weightf(int id) const {
1265 const crush_bucket *b = get_bucket(id);
1266 if (IS_ERR(b)) return 0;
1267 return b->weight / (float)0x10000;
1268 }
1269 int get_bucket_type(int id) const {
1270 const crush_bucket *b = get_bucket(id);
1271 if (IS_ERR(b)) return PTR_ERR(b);
1272 return b->type;
1273 }
1274 int get_bucket_alg(int id) const {
1275 const crush_bucket *b = get_bucket(id);
1276 if (IS_ERR(b)) return PTR_ERR(b);
1277 return b->alg;
1278 }
1279 int get_bucket_hash(int id) const {
1280 const crush_bucket *b = get_bucket(id);
1281 if (IS_ERR(b)) return PTR_ERR(b);
1282 return b->hash;
1283 }
1284 int get_bucket_size(int id) const {
1285 const crush_bucket *b = get_bucket(id);
1286 if (IS_ERR(b)) return PTR_ERR(b);
1287 return b->size;
1288 }
1289 int get_bucket_item(int id, int pos) const {
1290 const crush_bucket *b = get_bucket(id);
1291 if (IS_ERR(b)) return PTR_ERR(b);
1292 if ((__u32)pos >= b->size)
1293 return PTR_ERR(b);
1294 return b->items[pos];
1295 }
1296 int get_bucket_item_weight(int id, int pos) const {
1297 const crush_bucket *b = get_bucket(id);
1298 if (IS_ERR(b)) return PTR_ERR(b);
1299 return crush_get_bucket_item_weight(b, pos);
1300 }
1301 float get_bucket_item_weightf(int id, int pos) const {
1302 const crush_bucket *b = get_bucket(id);
1303 if (IS_ERR(b)) return 0;
1304 return (float)crush_get_bucket_item_weight(b, pos) / (float)0x10000;
1305 }
1306
1307 /* modifiers */
1308 int add_bucket(int bucketno, int alg, int hash, int type, int size,
c07f9fc5 1309 int *items, int *weights, int *idout);
31f18b77
FG
1310 int bucket_add_item(crush_bucket *bucket, int item, int weight);
1311 int bucket_remove_item(struct crush_bucket *bucket, int item);
11fdf7f2
TL
1312 int bucket_adjust_item_weight(
1313 CephContext *cct, struct crush_bucket *bucket, int item, int weight,
1314 bool adjust_weight_sets);
31f18b77 1315
7c673cae 1316 void finalize() {
11fdf7f2 1317 ceph_assert(crush);
7c673cae 1318 crush_finalize(crush);
3a9019d9
FG
1319 if (!name_map.empty() &&
1320 name_map.rbegin()->first >= crush->max_devices) {
1321 crush->max_devices = name_map.rbegin()->first + 1;
1322 }
3efd9988 1323 have_uniform_rules = !has_legacy_rule_ids();
7c673cae 1324 }
3efd9988 1325 int bucket_set_alg(int id, int alg);
7c673cae 1326
224ce89b 1327 int update_device_class(int id, const string& class_name, const string& name, ostream *ss);
c07f9fc5 1328 int remove_device_class(CephContext *cct, int id, ostream *ss);
d2e6a577
FG
1329 int device_class_clone(
1330 int original, int device_class,
1331 const std::map<int32_t, map<int32_t, int32_t>>& old_class_bucket,
1332 const std::set<int32_t>& used_ids,
35e4c445
FG
1333 int *clone,
1334 map<int,map<int,vector<int>>> *cmap_item_weight);
11fdf7f2 1335 bool class_is_in_use(int class_id, ostream *ss = nullptr);
35e4c445 1336 int rename_class(const string& srcname, const string& dstname);
d2e6a577
FG
1337 int populate_classes(
1338 const std::map<int32_t, map<int32_t, int32_t>>& old_class_bucket);
b5b8bbf5 1339 int get_rules_by_class(const string &class_name, set<int> *rules);
3efd9988 1340 int get_rules_by_osd(int osd, set<int> *rules);
d2e6a577
FG
1341 bool _class_is_dead(int class_id);
1342 void cleanup_dead_classes();
11fdf7f2 1343 int rebuild_roots_with_classes(CephContext *cct);
7c673cae 1344 /* remove unused roots generated for class devices */
11fdf7f2 1345 int trim_roots_with_class(CephContext *cct);
7c673cae 1346
f64942e4
AA
1347 int reclassify(
1348 CephContext *cct,
1349 ostream& out,
1350 const map<string,string>& classify_root,
1351 const map<string,pair<string,string>>& classify_bucket
1352 );
1353
1354 int set_subtree_class(const string& name, const string& class_name);
1355
7c673cae
FG
1356 void start_choose_profile() {
1357 free(crush->choose_tries);
1358 /*
1359 * the original choose_total_tries value was off by one (it
1360 * counted "retries" and not "tries"). add one to alloc.
1361 */
c07f9fc5
FG
1362 crush->choose_tries = (__u32 *)calloc(sizeof(*crush->choose_tries),
1363 (crush->choose_total_tries + 1));
7c673cae
FG
1364 memset(crush->choose_tries, 0,
1365 sizeof(*crush->choose_tries) * (crush->choose_total_tries + 1));
1366 }
1367 void stop_choose_profile() {
1368 free(crush->choose_tries);
1369 crush->choose_tries = 0;
1370 }
1371
1372 int get_choose_profile(__u32 **vec) {
1373 if (crush->choose_tries) {
1374 *vec = crush->choose_tries;
1375 return crush->choose_total_tries;
1376 }
1377 return 0;
1378 }
1379
1380
1381 void set_max_devices(int m) {
1382 crush->max_devices = m;
1383 }
1384
1385 int find_rule(int ruleset, int type, int size) const {
1386 if (!crush) return -1;
181888fb
FG
1387 if (have_uniform_rules &&
1388 ruleset < (int)crush->max_rules &&
1389 crush->rules[ruleset] &&
1390 crush->rules[ruleset]->mask.type == type &&
1391 crush->rules[ruleset]->mask.min_size <= size &&
1392 crush->rules[ruleset]->mask.max_size >= size) {
1393 return ruleset;
31f18b77 1394 }
181888fb 1395 return crush_find_rule(crush, ruleset, type, size);
7c673cae
FG
1396 }
1397
d2e6a577 1398 bool ruleset_exists(const int ruleset) const {
7c673cae
FG
1399 for (size_t i = 0; i < crush->max_rules; ++i) {
1400 if (rule_exists(i) && crush->rules[i]->mask.ruleset == ruleset) {
1401 return true;
1402 }
1403 }
1404
1405 return false;
1406 }
1407
1408 /**
1409 * Return the lowest numbered ruleset of type `type`
1410 *
3efd9988 1411 * @returns a ruleset ID, or -1 if no matching rules found.
7c673cae
FG
1412 */
1413 int find_first_ruleset(int type) const {
1414 int result = -1;
1415
1416 for (size_t i = 0; i < crush->max_rules; ++i) {
1417 if (crush->rules[i]
1418 && crush->rules[i]->mask.type == type
1419 && (crush->rules[i]->mask.ruleset < result || result == -1)) {
1420 result = crush->rules[i]->mask.ruleset;
1421 }
1422 }
1423
1424 return result;
1425 }
1426
c07f9fc5
FG
1427 bool have_choose_args(int64_t choose_args_index) const {
1428 return choose_args.count(choose_args_index);
1429 }
1430
1431 crush_choose_arg_map choose_args_get_with_fallback(
1432 int64_t choose_args_index) const {
1433 auto i = choose_args.find(choose_args_index);
1434 if (i == choose_args.end()) {
1435 i = choose_args.find(DEFAULT_CHOOSE_ARGS);
1436 }
1437 if (i == choose_args.end()) {
1438 crush_choose_arg_map arg_map;
1439 arg_map.args = NULL;
1440 arg_map.size = 0;
1441 return arg_map;
1442 } else {
1443 return i->second;
1444 }
1445 }
1446 crush_choose_arg_map choose_args_get(int64_t choose_args_index) const {
7c673cae
FG
1447 auto i = choose_args.find(choose_args_index);
1448 if (i == choose_args.end()) {
1449 crush_choose_arg_map arg_map;
1450 arg_map.args = NULL;
1451 arg_map.size = 0;
1452 return arg_map;
1453 } else {
1454 return i->second;
1455 }
1456 }
1457
1458 void destroy_choose_args(crush_choose_arg_map arg_map) {
1459 for (__u32 i = 0; i < arg_map.size; i++) {
1460 crush_choose_arg *arg = &arg_map.args[i];
28e407b8 1461 for (__u32 j = 0; j < arg->weight_set_positions; j++) {
7c673cae
FG
1462 crush_weight_set *weight_set = &arg->weight_set[j];
1463 free(weight_set->weights);
1464 }
1465 if (arg->weight_set)
1466 free(arg->weight_set);
1467 if (arg->ids)
1468 free(arg->ids);
1469 }
1470 free(arg_map.args);
1471 }
c07f9fc5 1472
11fdf7f2 1473 bool create_choose_args(int64_t id, int positions) {
c07f9fc5 1474 if (choose_args.count(id))
11fdf7f2
TL
1475 return false;
1476 ceph_assert(positions);
c07f9fc5 1477 auto &cmap = choose_args[id];
11fdf7f2
TL
1478 cmap.args = static_cast<crush_choose_arg*>(calloc(sizeof(crush_choose_arg),
1479 crush->max_buckets));
c07f9fc5
FG
1480 cmap.size = crush->max_buckets;
1481 for (int bidx=0; bidx < crush->max_buckets; ++bidx) {
1482 crush_bucket *b = crush->buckets[bidx];
1483 auto &carg = cmap.args[bidx];
1484 carg.ids = NULL;
1485 carg.ids_size = 0;
1486 if (b && b->alg == CRUSH_BUCKET_STRAW2) {
11fdf7f2 1487 crush_bucket_straw2 *sb = reinterpret_cast<crush_bucket_straw2*>(b);
28e407b8 1488 carg.weight_set_positions = positions;
11fdf7f2
TL
1489 carg.weight_set = static_cast<crush_weight_set*>(calloc(sizeof(crush_weight_set),
1490 carg.weight_set_positions));
c07f9fc5
FG
1491 // initialize with canonical weights
1492 for (int pos = 0; pos < positions; ++pos) {
1493 carg.weight_set[pos].size = b->size;
1494 carg.weight_set[pos].weights = (__u32*)calloc(4, b->size);
1495 for (unsigned i = 0; i < b->size; ++i) {
1496 carg.weight_set[pos].weights[i] = sb->item_weights[i];
1497 }
1498 }
1499 } else {
1500 carg.weight_set = NULL;
28e407b8 1501 carg.weight_set_positions = 0;
c07f9fc5
FG
1502 }
1503 }
11fdf7f2 1504 return true;
c07f9fc5
FG
1505 }
1506
1507 void rm_choose_args(int64_t id) {
1508 auto p = choose_args.find(id);
1509 if (p != choose_args.end()) {
1510 destroy_choose_args(p->second);
1511 choose_args.erase(p);
1512 }
1513 }
1514
7c673cae
FG
1515 void choose_args_clear() {
1516 for (auto w : choose_args)
1517 destroy_choose_args(w.second);
1518 choose_args.clear();
1519 }
1520
28e407b8
AA
1521 // remove choose_args for buckets that no longer exist, create them for new buckets
1522 void update_choose_args(CephContext *cct);
1523
c07f9fc5
FG
1524 // adjust choose_args_map weight, preserving the hierarchical summation
1525 // property. used by callers optimizing layouts by tweaking weights.
1526 int _choose_args_adjust_item_weight_in_bucket(
1527 CephContext *cct,
1528 crush_choose_arg_map cmap,
1529 int bucketid,
1530 int id,
1531 const vector<int>& weight,
1532 ostream *ss);
1533 int choose_args_adjust_item_weight(
1534 CephContext *cct,
1535 crush_choose_arg_map cmap,
1536 int id, const vector<int>& weight,
1537 ostream *ss);
1538 int choose_args_adjust_item_weightf(
1539 CephContext *cct,
1540 crush_choose_arg_map cmap,
1541 int id, const vector<double>& weightf,
1542 ostream *ss) {
1543 vector<int> weight(weightf.size());
1544 for (unsigned i = 0; i < weightf.size(); ++i) {
11fdf7f2 1545 weight[i] = (int)(weightf[i] * (double)0x10000);
c07f9fc5
FG
1546 }
1547 return choose_args_adjust_item_weight(cct, cmap, id, weight, ss);
1548 }
1549
1550 int get_choose_args_positions(crush_choose_arg_map cmap) {
1551 // infer positions from other buckets
1552 for (unsigned j = 0; j < cmap.size; ++j) {
28e407b8
AA
1553 if (cmap.args[j].weight_set_positions) {
1554 return cmap.args[j].weight_set_positions;
c07f9fc5
FG
1555 }
1556 }
1557 return 1;
1558 }
1559
7c673cae
FG
1560 template<typename WeightVector>
1561 void do_rule(int rule, int x, vector<int>& out, int maxout,
1562 const WeightVector& weight,
1563 uint64_t choose_args_index) const {
1564 int rawout[maxout];
1565 char work[crush_work_size(crush, maxout)];
1566 crush_init_workspace(crush, work);
c07f9fc5
FG
1567 crush_choose_arg_map arg_map = choose_args_get_with_fallback(
1568 choose_args_index);
92f5a8d4
TL
1569 int numrep = crush_do_rule(crush, rule, x, rawout, maxout,
1570 std::data(weight), std::size(weight),
1571 work, arg_map.args);
7c673cae
FG
1572 if (numrep < 0)
1573 numrep = 0;
1574 out.resize(numrep);
1575 for (int i=0; i<numrep; i++)
1576 out[i] = rawout[i];
1577 }
1578
1579 int _choose_type_stack(
1580 CephContext *cct,
1581 const vector<pair<int,int>>& stack,
1582 const set<int>& overfull,
1583 const vector<int>& underfull,
92f5a8d4 1584 const vector<int>& more_underfull,
7c673cae
FG
1585 const vector<int>& orig,
1586 vector<int>::const_iterator& i,
1587 set<int>& used,
a8e16298 1588 vector<int> *pw,
92f5a8d4
TL
1589 int root_bucket,
1590 int rule) const;
7c673cae
FG
1591
1592 int try_remap_rule(
1593 CephContext *cct,
1594 int rule,
1595 int maxout,
1596 const set<int>& overfull,
1597 const vector<int>& underfull,
92f5a8d4 1598 const vector<int>& more_underfull,
7c673cae
FG
1599 const vector<int>& orig,
1600 vector<int> *out) const;
1601
1602 bool check_crush_rule(int ruleset, int type, int size, ostream& ss) {
11fdf7f2 1603 ceph_assert(crush);
7c673cae
FG
1604
1605 __u32 i;
1606 for (i = 0; i < crush->max_rules; i++) {
1607 if (crush->rules[i] &&
1608 crush->rules[i]->mask.ruleset == ruleset &&
1609 crush->rules[i]->mask.type == type) {
1610
1611 if (crush->rules[i]->mask.min_size <= size &&
1612 crush->rules[i]->mask.max_size >= size) {
1613 return true;
1614 } else if (size < crush->rules[i]->mask.min_size) {
1615 ss << "pool size is smaller than the crush rule min size";
1616 return false;
1617 } else {
1618 ss << "pool size is bigger than the crush rule max size";
1619 return false;
1620 }
1621 }
1622 }
1623
1624 return false;
1625 }
1626
1627 void encode(bufferlist &bl, uint64_t features) const;
11fdf7f2
TL
1628 void decode(bufferlist::const_iterator &blp);
1629 void decode_crush_bucket(crush_bucket** bptr, bufferlist::const_iterator &blp);
7c673cae
FG
1630 void dump(Formatter *f) const;
1631 void dump_rules(Formatter *f) const;
1632 void dump_rule(int ruleset, Formatter *f) const;
1633 void dump_tunables(Formatter *f) const;
1634 void dump_choose_args(Formatter *f) const;
1635 void list_rules(Formatter *f) const;
c07f9fc5
FG
1636 void list_rules(ostream *ss) const;
1637 void dump_tree(ostream *out,
1638 Formatter *f,
1639 const CrushTreeDumper::name_map_t& ws,
1640 bool show_shadow = false) const;
1641 void dump_tree(ostream *out, Formatter *f) {
1642 dump_tree(out, f, CrushTreeDumper::name_map_t());
1643 }
1644 void dump_tree(Formatter *f,
1645 const CrushTreeDumper::name_map_t& ws) const;
7c673cae
FG
1646 static void generate_test_instances(list<CrushWrapper*>& o);
1647
7c673cae
FG
1648 int get_osd_pool_default_crush_replicated_ruleset(CephContext *cct);
1649
1650 static bool is_valid_crush_name(const string& s);
1651 static bool is_valid_crush_loc(CephContext *cct,
1652 const map<string,string>& loc);
1653};
1654WRITE_CLASS_ENCODER_FEATURES(CrushWrapper)
1655
1656#endif