]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #ifndef CEPH_CRUSH_WRAPPER_H | |
5 | #define CEPH_CRUSH_WRAPPER_H | |
6 | ||
7 | #include <stdlib.h> | |
8 | #include <map> | |
9 | #include <set> | |
10 | #include <string> | |
11 | ||
12 | #include <iosfwd> | |
13 | ||
14 | #include "include/types.h" | |
15 | ||
16 | extern "C" { | |
17 | #include "crush.h" | |
18 | #include "hash.h" | |
19 | #include "mapper.h" | |
20 | #include "builder.h" | |
21 | } | |
22 | ||
31f18b77 | 23 | #include "include/assert.h" |
7c673cae FG |
24 | #include "include/err.h" |
25 | #include "include/encoding.h" | |
c07f9fc5 | 26 | #include "include/mempool.h" |
7c673cae FG |
27 | |
28 | #include "common/Mutex.h" | |
29 | ||
7c673cae FG |
30 | #define BUG_ON(x) assert(!(x)) |
31 | ||
32 | namespace ceph { | |
33 | class Formatter; | |
34 | } | |
35 | ||
c07f9fc5 FG |
36 | namespace CrushTreeDumper { |
37 | typedef mempool::osdmap::map<int64_t,string> name_map_t; | |
38 | } | |
39 | ||
7c673cae FG |
40 | WRITE_RAW_ENCODER(crush_rule_mask) // it's all u8's |
41 | ||
42 | inline static void encode(const crush_rule_step &s, bufferlist &bl) | |
43 | { | |
44 | ::encode(s.op, bl); | |
45 | ::encode(s.arg1, bl); | |
46 | ::encode(s.arg2, bl); | |
47 | } | |
48 | inline static void decode(crush_rule_step &s, bufferlist::iterator &p) | |
49 | { | |
50 | ::decode(s.op, p); | |
51 | ::decode(s.arg1, p); | |
52 | ::decode(s.arg2, p); | |
53 | } | |
54 | ||
55 | using namespace std; | |
56 | class CrushWrapper { | |
57 | public: | |
c07f9fc5 FG |
58 | // magic value used by OSDMap for a "default" fallback choose_args, used if |
59 | // the choose_arg_map passed to do_rule does not exist. if this also | |
60 | // doesn't exist, fall back to canonical weights. | |
61 | enum { | |
62 | DEFAULT_CHOOSE_ARGS = -1 | |
63 | }; | |
64 | ||
7c673cae FG |
65 | std::map<int32_t, string> type_map; /* bucket/device type names */ |
66 | std::map<int32_t, string> name_map; /* bucket/device names */ | |
67 | std::map<int32_t, string> rule_name_map; | |
d2e6a577 | 68 | |
7c673cae FG |
69 | std::map<int32_t, int32_t> class_map; /* item id -> class id */ |
70 | std::map<int32_t, string> class_name; /* class id -> class name */ | |
71 | std::map<string, int32_t> class_rname; /* class name -> class id */ | |
72 | std::map<int32_t, map<int32_t, int32_t> > class_bucket; /* bucket[id][class] == id */ | |
c07f9fc5 | 73 | std::map<int64_t, crush_choose_arg_map> choose_args; |
7c673cae FG |
74 | |
75 | private: | |
76 | struct crush_map *crush; | |
31f18b77 FG |
77 | |
78 | bool have_uniform_rules = false; | |
79 | ||
7c673cae FG |
80 | /* reverse maps */ |
81 | mutable bool have_rmaps; | |
82 | mutable std::map<string, int> type_rmap, name_rmap, rule_name_rmap; | |
83 | void build_rmaps() const { | |
84 | if (have_rmaps) return; | |
85 | build_rmap(type_map, type_rmap); | |
86 | build_rmap(name_map, name_rmap); | |
87 | build_rmap(rule_name_map, rule_name_rmap); | |
88 | have_rmaps = true; | |
89 | } | |
90 | void build_rmap(const map<int, string> &f, std::map<string, int> &r) const { | |
91 | r.clear(); | |
92 | for (std::map<int, string>::const_iterator p = f.begin(); p != f.end(); ++p) | |
93 | r[p->second] = p->first; | |
94 | } | |
95 | ||
96 | public: | |
97 | CrushWrapper(const CrushWrapper& other); | |
98 | const CrushWrapper& operator=(const CrushWrapper& other); | |
99 | ||
100 | CrushWrapper() : crush(0), have_rmaps(false) { | |
101 | create(); | |
102 | } | |
103 | ~CrushWrapper() { | |
104 | if (crush) | |
105 | crush_destroy(crush); | |
106 | choose_args_clear(); | |
107 | } | |
108 | ||
109 | crush_map *get_crush_map() { return crush; } | |
110 | ||
111 | /* building */ | |
112 | void create() { | |
113 | if (crush) | |
114 | crush_destroy(crush); | |
115 | crush = crush_create(); | |
116 | choose_args_clear(); | |
117 | assert(crush); | |
118 | have_rmaps = false; | |
119 | ||
120 | set_tunables_default(); | |
121 | } | |
122 | ||
31f18b77 FG |
123 | /// true if any rule has a ruleset != the rule id |
124 | bool has_legacy_rulesets() const; | |
125 | ||
126 | /// fix rules whose ruleid != ruleset | |
127 | int renumber_rules_by_ruleset(); | |
128 | ||
129 | /// true if any ruleset has more than 1 rule | |
130 | bool has_multirule_rulesets() const; | |
131 | ||
c07f9fc5 FG |
132 | /// true if any buckets that aren't straw2 |
133 | bool has_non_straw2_buckets() const; | |
134 | ||
7c673cae FG |
135 | // tunables |
136 | void set_tunables_argonaut() { | |
137 | crush->choose_local_tries = 2; | |
138 | crush->choose_local_fallback_tries = 5; | |
139 | crush->choose_total_tries = 19; | |
140 | crush->chooseleaf_descend_once = 0; | |
141 | crush->chooseleaf_vary_r = 0; | |
142 | crush->chooseleaf_stable = 0; | |
143 | crush->allowed_bucket_algs = CRUSH_LEGACY_ALLOWED_BUCKET_ALGS; | |
144 | } | |
145 | void set_tunables_bobtail() { | |
146 | crush->choose_local_tries = 0; | |
147 | crush->choose_local_fallback_tries = 0; | |
148 | crush->choose_total_tries = 50; | |
149 | crush->chooseleaf_descend_once = 1; | |
150 | crush->chooseleaf_vary_r = 0; | |
151 | crush->chooseleaf_stable = 0; | |
152 | crush->allowed_bucket_algs = CRUSH_LEGACY_ALLOWED_BUCKET_ALGS; | |
153 | } | |
154 | void set_tunables_firefly() { | |
155 | crush->choose_local_tries = 0; | |
156 | crush->choose_local_fallback_tries = 0; | |
157 | crush->choose_total_tries = 50; | |
158 | crush->chooseleaf_descend_once = 1; | |
159 | crush->chooseleaf_vary_r = 1; | |
160 | crush->chooseleaf_stable = 0; | |
161 | crush->allowed_bucket_algs = CRUSH_LEGACY_ALLOWED_BUCKET_ALGS; | |
162 | } | |
163 | void set_tunables_hammer() { | |
164 | crush->choose_local_tries = 0; | |
165 | crush->choose_local_fallback_tries = 0; | |
166 | crush->choose_total_tries = 50; | |
167 | crush->chooseleaf_descend_once = 1; | |
168 | crush->chooseleaf_vary_r = 1; | |
169 | crush->chooseleaf_stable = 0; | |
170 | crush->allowed_bucket_algs = | |
171 | (1 << CRUSH_BUCKET_UNIFORM) | | |
172 | (1 << CRUSH_BUCKET_LIST) | | |
173 | (1 << CRUSH_BUCKET_STRAW) | | |
174 | (1 << CRUSH_BUCKET_STRAW2); | |
175 | } | |
176 | void set_tunables_jewel() { | |
177 | crush->choose_local_tries = 0; | |
178 | crush->choose_local_fallback_tries = 0; | |
179 | crush->choose_total_tries = 50; | |
180 | crush->chooseleaf_descend_once = 1; | |
181 | crush->chooseleaf_vary_r = 1; | |
182 | crush->chooseleaf_stable = 1; | |
183 | crush->allowed_bucket_algs = | |
184 | (1 << CRUSH_BUCKET_UNIFORM) | | |
185 | (1 << CRUSH_BUCKET_LIST) | | |
186 | (1 << CRUSH_BUCKET_STRAW) | | |
187 | (1 << CRUSH_BUCKET_STRAW2); | |
188 | } | |
189 | ||
190 | void set_tunables_legacy() { | |
191 | set_tunables_argonaut(); | |
192 | crush->straw_calc_version = 0; | |
193 | } | |
194 | void set_tunables_optimal() { | |
195 | set_tunables_jewel(); | |
196 | crush->straw_calc_version = 1; | |
197 | } | |
198 | void set_tunables_default() { | |
31f18b77 | 199 | set_tunables_jewel(); |
7c673cae FG |
200 | crush->straw_calc_version = 1; |
201 | } | |
202 | ||
203 | int get_choose_local_tries() const { | |
204 | return crush->choose_local_tries; | |
205 | } | |
206 | void set_choose_local_tries(int n) { | |
207 | crush->choose_local_tries = n; | |
208 | } | |
209 | ||
210 | int get_choose_local_fallback_tries() const { | |
211 | return crush->choose_local_fallback_tries; | |
212 | } | |
213 | void set_choose_local_fallback_tries(int n) { | |
214 | crush->choose_local_fallback_tries = n; | |
215 | } | |
216 | ||
217 | int get_choose_total_tries() const { | |
218 | return crush->choose_total_tries; | |
219 | } | |
220 | void set_choose_total_tries(int n) { | |
221 | crush->choose_total_tries = n; | |
222 | } | |
223 | ||
224 | int get_chooseleaf_descend_once() const { | |
225 | return crush->chooseleaf_descend_once; | |
226 | } | |
227 | void set_chooseleaf_descend_once(int n) { | |
228 | crush->chooseleaf_descend_once = !!n; | |
229 | } | |
230 | ||
231 | int get_chooseleaf_vary_r() const { | |
232 | return crush->chooseleaf_vary_r; | |
233 | } | |
234 | void set_chooseleaf_vary_r(int n) { | |
235 | crush->chooseleaf_vary_r = n; | |
236 | } | |
237 | ||
238 | int get_chooseleaf_stable() const { | |
239 | return crush->chooseleaf_stable; | |
240 | } | |
241 | void set_chooseleaf_stable(int n) { | |
242 | crush->chooseleaf_stable = n; | |
243 | } | |
244 | ||
245 | int get_straw_calc_version() const { | |
246 | return crush->straw_calc_version; | |
247 | } | |
248 | void set_straw_calc_version(int n) { | |
249 | crush->straw_calc_version = n; | |
250 | } | |
251 | ||
252 | unsigned get_allowed_bucket_algs() const { | |
253 | return crush->allowed_bucket_algs; | |
254 | } | |
255 | void set_allowed_bucket_algs(unsigned n) { | |
256 | crush->allowed_bucket_algs = n; | |
257 | } | |
258 | ||
259 | bool has_argonaut_tunables() const { | |
260 | return | |
261 | crush->choose_local_tries == 2 && | |
262 | crush->choose_local_fallback_tries == 5 && | |
263 | crush->choose_total_tries == 19 && | |
264 | crush->chooseleaf_descend_once == 0 && | |
265 | crush->chooseleaf_vary_r == 0 && | |
266 | crush->chooseleaf_stable == 0 && | |
267 | crush->allowed_bucket_algs == CRUSH_LEGACY_ALLOWED_BUCKET_ALGS; | |
268 | } | |
269 | bool has_bobtail_tunables() const { | |
270 | return | |
271 | crush->choose_local_tries == 0 && | |
272 | crush->choose_local_fallback_tries == 0 && | |
273 | crush->choose_total_tries == 50 && | |
274 | crush->chooseleaf_descend_once == 1 && | |
275 | crush->chooseleaf_vary_r == 0 && | |
276 | crush->chooseleaf_stable == 0 && | |
277 | crush->allowed_bucket_algs == CRUSH_LEGACY_ALLOWED_BUCKET_ALGS; | |
278 | } | |
279 | bool has_firefly_tunables() const { | |
280 | return | |
281 | crush->choose_local_tries == 0 && | |
282 | crush->choose_local_fallback_tries == 0 && | |
283 | crush->choose_total_tries == 50 && | |
284 | crush->chooseleaf_descend_once == 1 && | |
285 | crush->chooseleaf_vary_r == 1 && | |
286 | crush->chooseleaf_stable == 0 && | |
287 | crush->allowed_bucket_algs == CRUSH_LEGACY_ALLOWED_BUCKET_ALGS; | |
288 | } | |
289 | bool has_hammer_tunables() const { | |
290 | return | |
291 | crush->choose_local_tries == 0 && | |
292 | crush->choose_local_fallback_tries == 0 && | |
293 | crush->choose_total_tries == 50 && | |
294 | crush->chooseleaf_descend_once == 1 && | |
295 | crush->chooseleaf_vary_r == 1 && | |
296 | crush->chooseleaf_stable == 0 && | |
297 | crush->allowed_bucket_algs == ((1 << CRUSH_BUCKET_UNIFORM) | | |
298 | (1 << CRUSH_BUCKET_LIST) | | |
299 | (1 << CRUSH_BUCKET_STRAW) | | |
300 | (1 << CRUSH_BUCKET_STRAW2)); | |
301 | } | |
302 | bool has_jewel_tunables() const { | |
303 | return | |
304 | crush->choose_local_tries == 0 && | |
305 | crush->choose_local_fallback_tries == 0 && | |
306 | crush->choose_total_tries == 50 && | |
307 | crush->chooseleaf_descend_once == 1 && | |
308 | crush->chooseleaf_vary_r == 1 && | |
309 | crush->chooseleaf_stable == 1 && | |
310 | crush->allowed_bucket_algs == ((1 << CRUSH_BUCKET_UNIFORM) | | |
311 | (1 << CRUSH_BUCKET_LIST) | | |
312 | (1 << CRUSH_BUCKET_STRAW) | | |
313 | (1 << CRUSH_BUCKET_STRAW2)); | |
314 | } | |
315 | ||
316 | bool has_optimal_tunables() const { | |
317 | return has_jewel_tunables(); | |
318 | } | |
319 | bool has_legacy_tunables() const { | |
320 | return has_argonaut_tunables(); | |
321 | } | |
322 | ||
323 | bool has_nondefault_tunables() const { | |
324 | return | |
325 | (crush->choose_local_tries != 2 || | |
326 | crush->choose_local_fallback_tries != 5 || | |
327 | crush->choose_total_tries != 19); | |
328 | } | |
329 | bool has_nondefault_tunables2() const { | |
330 | return | |
331 | crush->chooseleaf_descend_once != 0; | |
332 | } | |
333 | bool has_nondefault_tunables3() const { | |
334 | return | |
335 | crush->chooseleaf_vary_r != 0; | |
336 | } | |
337 | bool has_nondefault_tunables5() const { | |
338 | return | |
339 | crush->chooseleaf_stable != 0; | |
340 | } | |
341 | ||
342 | bool has_v2_rules() const; | |
343 | bool has_v3_rules() const; | |
344 | bool has_v4_buckets() const; | |
345 | bool has_v5_rules() const; | |
31f18b77 FG |
346 | bool has_choose_args() const; // any choose_args |
347 | bool has_incompat_choose_args() const; // choose_args that can't be made compat | |
7c673cae FG |
348 | |
349 | bool is_v2_rule(unsigned ruleid) const; | |
350 | bool is_v3_rule(unsigned ruleid) const; | |
351 | bool is_v5_rule(unsigned ruleid) const; | |
352 | ||
353 | string get_min_required_version() const { | |
354 | if (has_v5_rules() || has_nondefault_tunables5()) | |
355 | return "jewel"; | |
356 | else if (has_v4_buckets()) | |
357 | return "hammer"; | |
358 | else if (has_nondefault_tunables3()) | |
359 | return "firefly"; | |
360 | else if (has_nondefault_tunables2() || has_nondefault_tunables()) | |
361 | return "bobtail"; | |
362 | else | |
363 | return "argonaut"; | |
364 | } | |
365 | ||
366 | // default bucket types | |
367 | unsigned get_default_bucket_alg() const { | |
368 | // in order of preference | |
369 | if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_STRAW2)) | |
370 | return CRUSH_BUCKET_STRAW2; | |
371 | if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_STRAW)) | |
372 | return CRUSH_BUCKET_STRAW; | |
373 | if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_TREE)) | |
374 | return CRUSH_BUCKET_TREE; | |
375 | if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_LIST)) | |
376 | return CRUSH_BUCKET_LIST; | |
377 | if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_UNIFORM)) | |
378 | return CRUSH_BUCKET_UNIFORM; | |
379 | return 0; | |
380 | } | |
381 | ||
382 | // bucket types | |
383 | int get_num_type_names() const { | |
384 | return type_map.size(); | |
385 | } | |
31f18b77 FG |
386 | int get_max_type_id() const { |
387 | if (type_map.empty()) | |
388 | return 0; | |
389 | return type_map.rbegin()->first; | |
390 | } | |
7c673cae FG |
391 | int get_type_id(const string& name) const { |
392 | build_rmaps(); | |
393 | if (type_rmap.count(name)) | |
394 | return type_rmap[name]; | |
395 | return -1; | |
396 | } | |
397 | const char *get_type_name(int t) const { | |
398 | std::map<int,string>::const_iterator p = type_map.find(t); | |
399 | if (p != type_map.end()) | |
400 | return p->second.c_str(); | |
401 | return 0; | |
402 | } | |
403 | void set_type_name(int i, const string& name) { | |
404 | type_map[i] = name; | |
405 | if (have_rmaps) | |
406 | type_rmap[name] = i; | |
407 | } | |
408 | ||
409 | // item/bucket names | |
410 | bool name_exists(const string& name) const { | |
411 | build_rmaps(); | |
412 | return name_rmap.count(name); | |
413 | } | |
414 | bool item_exists(int i) const { | |
415 | return name_map.count(i); | |
416 | } | |
417 | int get_item_id(const string& name) const { | |
418 | build_rmaps(); | |
419 | if (name_rmap.count(name)) | |
420 | return name_rmap[name]; | |
421 | return 0; /* hrm */ | |
422 | } | |
423 | const char *get_item_name(int t) const { | |
424 | std::map<int,string>::const_iterator p = name_map.find(t); | |
425 | if (p != name_map.end()) | |
426 | return p->second.c_str(); | |
427 | return 0; | |
428 | } | |
429 | int set_item_name(int i, const string& name) { | |
430 | if (!is_valid_crush_name(name)) | |
431 | return -EINVAL; | |
432 | name_map[i] = name; | |
433 | if (have_rmaps) | |
434 | name_rmap[name] = i; | |
435 | return 0; | |
436 | } | |
31f18b77 FG |
437 | void swap_names(int a, int b) { |
438 | string an = name_map[a]; | |
439 | string bn = name_map[b]; | |
440 | name_map[a] = bn; | |
441 | name_map[b] = an; | |
442 | if (have_rmaps) { | |
443 | name_rmap[an] = b; | |
444 | name_rmap[bn] = a; | |
445 | } | |
446 | } | |
7c673cae FG |
447 | int split_id_class(int i, int *idout, int *classout) const; |
448 | ||
449 | bool class_exists(const string& name) const { | |
450 | return class_rname.count(name); | |
451 | } | |
452 | const char *get_class_name(int i) const { | |
224ce89b | 453 | auto p = class_name.find(i); |
7c673cae FG |
454 | if (p != class_name.end()) |
455 | return p->second.c_str(); | |
456 | return 0; | |
457 | } | |
458 | int get_class_id(const string& name) const { | |
224ce89b | 459 | auto p = class_rname.find(name); |
7c673cae FG |
460 | if (p != class_rname.end()) |
461 | return p->second; | |
462 | else | |
463 | return -EINVAL; | |
464 | } | |
465 | int remove_class_name(const string& name) { | |
224ce89b | 466 | auto p = class_rname.find(name); |
7c673cae FG |
467 | if (p == class_rname.end()) |
468 | return -ENOENT; | |
469 | int class_id = p->second; | |
224ce89b | 470 | auto q = class_name.find(class_id); |
7c673cae FG |
471 | if (q == class_name.end()) |
472 | return -ENOENT; | |
473 | class_rname.erase(name); | |
474 | class_name.erase(class_id); | |
475 | return 0; | |
476 | } | |
224ce89b | 477 | |
224ce89b WB |
478 | int32_t _alloc_class_id() const; |
479 | ||
7c673cae FG |
480 | int get_or_create_class_id(const string& name) { |
481 | int c = get_class_id(name); | |
482 | if (c < 0) { | |
224ce89b | 483 | int i = _alloc_class_id(); |
7c673cae FG |
484 | class_name[i] = name; |
485 | class_rname[name] = i; | |
486 | return i; | |
487 | } else { | |
488 | return c; | |
489 | } | |
490 | } | |
491 | ||
492 | const char *get_item_class(int t) const { | |
493 | std::map<int,int>::const_iterator p = class_map.find(t); | |
494 | if (p == class_map.end()) | |
495 | return 0; | |
496 | return get_class_name(p->second); | |
497 | } | |
498 | int set_item_class(int i, const string& name) { | |
499 | if (!is_valid_crush_name(name)) | |
500 | return -EINVAL; | |
501 | class_map[i] = get_or_create_class_id(name); | |
502 | return 0; | |
503 | } | |
504 | int set_item_class(int i, int c) { | |
505 | class_map[i] = c; | |
506 | return c; | |
507 | } | |
224ce89b WB |
508 | void get_devices_by_class(const string &name, set<int> *devices) const { |
509 | assert(devices); | |
510 | devices->clear(); | |
511 | if (!class_exists(name)) { | |
512 | return; | |
513 | } | |
514 | auto cid = get_class_id(name); | |
515 | for (auto& p : class_map) { | |
516 | if (p.first >= 0 && p.second == cid) { | |
517 | devices->insert(p.first); | |
518 | } | |
519 | } | |
520 | } | |
521 | void class_remove_item(int i) { | |
522 | auto it = class_map.find(i); | |
523 | if (it == class_map.end()) { | |
524 | return; | |
525 | } | |
526 | class_map.erase(it); | |
527 | } | |
7c673cae FG |
528 | int can_rename_item(const string& srcname, |
529 | const string& dstname, | |
530 | ostream *ss) const; | |
531 | int rename_item(const string& srcname, | |
532 | const string& dstname, | |
533 | ostream *ss); | |
534 | int can_rename_bucket(const string& srcname, | |
535 | const string& dstname, | |
536 | ostream *ss) const; | |
537 | int rename_bucket(const string& srcname, | |
538 | const string& dstname, | |
539 | ostream *ss); | |
540 | ||
541 | // rule names | |
542 | bool rule_exists(string name) const { | |
543 | build_rmaps(); | |
544 | return rule_name_rmap.count(name); | |
545 | } | |
546 | int get_rule_id(string name) const { | |
547 | build_rmaps(); | |
548 | if (rule_name_rmap.count(name)) | |
549 | return rule_name_rmap[name]; | |
550 | return -ENOENT; | |
551 | } | |
552 | const char *get_rule_name(int t) const { | |
553 | std::map<int,string>::const_iterator p = rule_name_map.find(t); | |
554 | if (p != rule_name_map.end()) | |
555 | return p->second.c_str(); | |
556 | return 0; | |
557 | } | |
558 | void set_rule_name(int i, const string& name) { | |
559 | rule_name_map[i] = name; | |
560 | if (have_rmaps) | |
561 | rule_name_rmap[name] = i; | |
562 | } | |
c07f9fc5 FG |
563 | bool is_shadow_item(int id) const { |
564 | const char *name = get_item_name(id); | |
565 | return name && !is_valid_crush_name(name); | |
566 | } | |
7c673cae FG |
567 | |
568 | ||
569 | /** | |
570 | * find tree nodes referenced by rules by a 'take' command | |
571 | * | |
572 | * Note that these may not be parentless roots. | |
573 | */ | |
574 | void find_takes(set<int>& roots) const; | |
575 | ||
576 | /** | |
577 | * find tree roots | |
578 | * | |
579 | * These are parentless nodes in the map. | |
580 | */ | |
581 | void find_roots(set<int>& roots) const; | |
582 | ||
c07f9fc5 FG |
583 | |
584 | /** | |
585 | * find tree roots that contain shadow (device class) items only | |
586 | */ | |
587 | void find_shadow_roots(set<int>& roots) const { | |
588 | set<int> all; | |
589 | find_roots(all); | |
590 | for (auto& p: all) { | |
591 | if (is_shadow_item(p)) { | |
592 | roots.insert(p); | |
593 | } | |
594 | } | |
595 | } | |
596 | ||
224ce89b WB |
597 | /** |
598 | * find tree roots that are not shadow (device class) items | |
599 | * | |
600 | * These are parentless nodes in the map that are not shadow | |
601 | * items for device classes. | |
602 | */ | |
c07f9fc5 FG |
603 | void find_nonshadow_roots(set<int>& roots) const { |
604 | set<int> all; | |
605 | find_roots(all); | |
606 | for (auto& p: all) { | |
607 | if (!is_shadow_item(p)) { | |
608 | roots.insert(p); | |
609 | } | |
610 | } | |
611 | } | |
224ce89b | 612 | |
7c673cae FG |
613 | /** |
614 | * see if an item is contained within a subtree | |
615 | * | |
616 | * @param root haystack | |
617 | * @param item needle | |
618 | * @return true if the item is located beneath the given node | |
619 | */ | |
620 | bool subtree_contains(int root, int item) const; | |
621 | ||
622 | private: | |
623 | /** | |
624 | * search for an item in any bucket | |
625 | * | |
626 | * @param i item | |
627 | * @return true if present | |
628 | */ | |
629 | bool _search_item_exists(int i) const; | |
630 | public: | |
631 | ||
632 | /** | |
633 | * see if item is located where we think it is | |
634 | * | |
635 | * This verifies that the given item is located at a particular | |
636 | * location in the hierarchy. However, that check is imprecise; we | |
637 | * are actually verifying that the most specific location key/value | |
638 | * is correct. For example, if loc specifies that rack=foo and | |
639 | * host=bar, it will verify that host=bar is correct; any placement | |
640 | * above that level in the hierarchy is ignored. This matches the | |
641 | * semantics for insert_item(). | |
642 | * | |
643 | * @param cct cct | |
644 | * @param item item id | |
645 | * @param loc location to check (map of type to bucket names) | |
646 | * @param weight optional pointer to weight of item at that location | |
647 | * @return true if item is at specified location | |
648 | */ | |
649 | bool check_item_loc(CephContext *cct, int item, const map<string,string>& loc, int *iweight); | |
650 | bool check_item_loc(CephContext *cct, int item, const map<string,string>& loc, float *weight) { | |
651 | int iweight; | |
652 | bool ret = check_item_loc(cct, item, loc, &iweight); | |
653 | if (weight) | |
654 | *weight = (float)iweight / (float)0x10000; | |
655 | return ret; | |
656 | } | |
657 | ||
658 | ||
659 | /** | |
660 | * returns the (type, name) of the parent bucket of id | |
661 | * | |
662 | * FIXME: ambiguous for items that occur multiple times in the map | |
663 | */ | |
664 | pair<string,string> get_immediate_parent(int id, int *ret = NULL); | |
c07f9fc5 | 665 | |
7c673cae FG |
666 | int get_immediate_parent_id(int id, int *parent) const; |
667 | ||
31f18b77 FG |
668 | /** |
669 | * return ancestor of the given type, or 0 if none | |
670 | * (parent is always a bucket and thus <0) | |
671 | */ | |
672 | int get_parent_of_type(int id, int type) const; | |
673 | ||
7c673cae FG |
674 | /** |
675 | * get the fully qualified location of a device by successively finding | |
676 | * parents beginning at ID and ending at highest type number specified in | |
677 | * the CRUSH map which assumes that if device foo is under device bar, the | |
678 | * type_id of foo < bar where type_id is the integer specified in the CRUSH map | |
679 | * | |
680 | * returns the location in the form of (type=foo) where type is a type of bucket | |
681 | * specified in the CRUSH map and foo is a name specified in the CRUSH map | |
682 | */ | |
683 | map<string, string> get_full_location(int id); | |
684 | ||
685 | /* | |
686 | * identical to get_full_location(int id) although it returns the type/name | |
687 | * pairs in the order they occur in the hierarchy. | |
688 | * | |
689 | * returns -ENOENT if id is not found. | |
690 | */ | |
691 | int get_full_location_ordered(int id, vector<pair<string, string> >& path); | |
692 | ||
31f18b77 FG |
693 | /* |
694 | * identical to get_full_location_ordered(int id, vector<pair<string, string> >& path), | |
695 | * although it returns a concatenated string with the type/name pairs in descending | |
696 | * hierarchical order with format key1=val1,key2=val2. | |
697 | * | |
698 | * returns the location in descending hierarchy as a string. | |
699 | */ | |
700 | string get_full_location_ordered_string(int id); | |
701 | ||
7c673cae FG |
702 | /** |
703 | * returns (type_id, type) of all parent buckets between id and | |
704 | * default, can be used to check for anomolous CRUSH maps | |
705 | */ | |
706 | map<int, string> get_parent_hierarchy(int id); | |
707 | ||
708 | /** | |
709 | * enumerate immediate children of given node | |
710 | * | |
711 | * @param id parent bucket or device id | |
712 | * @return number of items, or error | |
713 | */ | |
714 | int get_children(int id, list<int> *children); | |
715 | ||
31f18b77 FG |
716 | /** |
717 | * enumerate leaves(devices) of given node | |
718 | * | |
719 | * @param name parent bucket name | |
720 | * @return 0 on success or a negative errno on error. | |
721 | */ | |
722 | int get_leaves(const string &name, set<int> *leaves); | |
723 | int _get_leaves(int id, list<int> *leaves); // worker | |
724 | ||
7c673cae FG |
725 | /** |
726 | * insert an item into the map at a specific position | |
727 | * | |
728 | * Add an item as a specific location of the hierarchy. | |
729 | * Specifically, we look for the most specific location constraint | |
730 | * for which a bucket already exists, and then create intervening | |
731 | * buckets beneath that in order to place the item. | |
732 | * | |
733 | * Note that any location specifiers *above* the most specific match | |
734 | * are ignored. For example, if we specify that osd.12 goes in | |
735 | * host=foo, rack=bar, and row=baz, and rack=bar is the most | |
736 | * specific match, we will create host=foo beneath that point and | |
737 | * put osd.12 inside it. However, we will not verify that rack=bar | |
738 | * is beneath row=baz or move it. | |
739 | * | |
740 | * In short, we will build out a hierarchy, and move leaves around, | |
741 | * but not adjust the hierarchy's internal structure. Yet. | |
742 | * | |
743 | * If the item is already present in the map, we will return EEXIST. | |
744 | * If the location key/value pairs are nonsensical | |
745 | * (rack=nameofdevice), or location specifies that do not attach us | |
746 | * to any existing part of the hierarchy, we will return EINVAL. | |
747 | * | |
748 | * @param cct cct | |
749 | * @param id item id | |
750 | * @param weight item weight | |
751 | * @param name item name | |
752 | * @param loc location (map of type to bucket names) | |
753 | * @return 0 for success, negative on error | |
754 | */ | |
755 | int insert_item(CephContext *cct, int id, float weight, string name, const map<string,string>& loc); | |
756 | ||
757 | /** | |
758 | * move a bucket in the hierarchy to the given location | |
759 | * | |
760 | * This has the same location and ancestor creation behavior as | |
761 | * insert_item(), but will relocate the specified existing bucket. | |
762 | * | |
763 | * @param cct cct | |
764 | * @param id bucket id | |
765 | * @param loc location (map of type to bucket names) | |
766 | * @return 0 for success, negative on error | |
767 | */ | |
768 | int move_bucket(CephContext *cct, int id, const map<string,string>& loc); | |
769 | ||
31f18b77 FG |
770 | /** |
771 | * swap bucket contents of two buckets without touching bucket ids | |
772 | * | |
773 | * @param cct cct | |
774 | * @param src bucket a | |
775 | * @param dst bucket b | |
776 | * @return 0 for success, negative on error | |
777 | */ | |
778 | int swap_bucket(CephContext *cct, int src, int dst); | |
779 | ||
7c673cae FG |
780 | /** |
781 | * add a link to an existing bucket in the hierarchy to the new location | |
782 | * | |
783 | * This has the same location and ancestor creation behavior as | |
784 | * insert_item(), but will add a new link to the specified existing | |
785 | * bucket. | |
786 | * | |
787 | * @param cct cct | |
788 | * @param id bucket id | |
789 | * @param loc location (map of type to bucket names) | |
790 | * @return 0 for success, negative on error | |
791 | */ | |
792 | int link_bucket(CephContext *cct, int id, const map<string,string>& loc); | |
793 | ||
794 | /** | |
795 | * add or update an item's position in the map | |
796 | * | |
797 | * This is analogous to insert_item, except we will move an item if | |
798 | * it is already present. | |
799 | * | |
800 | * @param cct cct | |
801 | * @param id item id | |
802 | * @param weight item weight | |
803 | * @param name item name | |
804 | * @param loc location (map of type to bucket names) | |
805 | * @return 0 for no change, 1 for successful change, negative on error | |
806 | */ | |
807 | int update_item(CephContext *cct, int id, float weight, string name, const map<string,string>& loc); | |
808 | ||
809 | /** | |
810 | * create or move an item, but do not adjust its weight if it already exists | |
811 | * | |
812 | * @param cct cct | |
813 | * @param item item id | |
814 | * @param weight initial item weight (if we need to create it) | |
815 | * @param name item name | |
816 | * @param loc location (map of type to bucket names) | |
817 | * @return 0 for no change, 1 for successful change, negative on error | |
818 | */ | |
819 | int create_or_move_item(CephContext *cct, int item, float weight, string name, | |
820 | const map<string,string>& loc); | |
821 | ||
822 | /** | |
823 | * remove all instances of an item from the map | |
824 | * | |
825 | * @param cct cct | |
826 | * @param id item id to remove | |
827 | * @param unlink_only unlink but do not remove bucket (useful if multiple links or not empty) | |
828 | * @return 0 on success, negative on error | |
829 | */ | |
830 | int remove_item(CephContext *cct, int id, bool unlink_only); | |
831 | ||
832 | /** | |
833 | * recursively remove buckets starting at item and stop removing | |
834 | * when a bucket is in use. | |
835 | * | |
836 | * @param item id to remove | |
837 | * @param unused true if only unused items should be removed | |
838 | * @return 0 on success, negative on error | |
839 | */ | |
840 | int remove_root(int item, bool unused); | |
841 | ||
842 | /** | |
843 | * remove all instances of an item nested beneath a certain point from the map | |
844 | * | |
845 | * @param cct cct | |
846 | * @param id item id to remove | |
847 | * @param ancestor ancestor item id under which to search for id | |
848 | * @param unlink_only unlink but do not remove bucket (useful if bucket has multiple links or is not empty) | |
849 | * @return 0 on success, negative on error | |
850 | */ | |
851 | private: | |
852 | bool _maybe_remove_last_instance(CephContext *cct, int id, bool unlink_only); | |
853 | int _remove_item_under(CephContext *cct, int id, int ancestor, bool unlink_only); | |
854 | bool _bucket_is_in_use(int id); | |
855 | public: | |
856 | int remove_item_under(CephContext *cct, int id, int ancestor, bool unlink_only); | |
857 | ||
858 | /** | |
859 | * calculate the locality/distance from a given id to a crush location map | |
860 | * | |
861 | * Specifically, we look for the lowest-valued type for which the | |
862 | * location of id matches that described in loc. | |
863 | * | |
864 | * @param cct cct | |
865 | * @param id the existing id in the map | |
866 | * @param loc a set of key=value pairs describing a location in the hierarchy | |
867 | */ | |
868 | int get_common_ancestor_distance(CephContext *cct, int id, | |
869 | const std::multimap<string,string>& loc); | |
870 | ||
871 | /** | |
872 | * parse a set of key/value pairs out of a string vector | |
873 | * | |
874 | * These are used to describe a location in the CRUSH hierarchy. | |
875 | * | |
876 | * @param args list of strings (each key= or key=value) | |
877 | * @param ploc pointer to a resulting location map or multimap | |
878 | */ | |
879 | static int parse_loc_map(const std::vector<string>& args, | |
880 | std::map<string,string> *ploc); | |
881 | static int parse_loc_multimap(const std::vector<string>& args, | |
882 | std::multimap<string,string> *ploc); | |
883 | ||
884 | /** | |
885 | * get an item's weight | |
886 | * | |
887 | * Will return the weight for the first instance it finds. | |
888 | * | |
889 | * @param id item id to check | |
890 | * @return weight of item | |
891 | */ | |
892 | int get_item_weight(int id) const; | |
893 | float get_item_weightf(int id) const { | |
894 | return (float)get_item_weight(id) / (float)0x10000; | |
895 | } | |
896 | int get_item_weight_in_loc(int id, const map<string,string> &loc); | |
897 | float get_item_weightf_in_loc(int id, const map<string,string> &loc) { | |
898 | return (float)get_item_weight_in_loc(id, loc) / (float)0x10000; | |
899 | } | |
900 | ||
224ce89b WB |
901 | int validate_weightf(float weight) { |
902 | uint64_t iweight = weight * 0x10000; | |
903 | if (iweight > std::numeric_limits<int>::max()) { | |
904 | return -EOVERFLOW; | |
905 | } | |
906 | return 0; | |
907 | } | |
7c673cae FG |
908 | int adjust_item_weight(CephContext *cct, int id, int weight); |
909 | int adjust_item_weightf(CephContext *cct, int id, float weight) { | |
224ce89b WB |
910 | int r = validate_weightf(weight); |
911 | if (r < 0) { | |
912 | return r; | |
913 | } | |
7c673cae FG |
914 | return adjust_item_weight(cct, id, (int)(weight * (float)0x10000)); |
915 | } | |
916 | int adjust_item_weight_in_loc(CephContext *cct, int id, int weight, const map<string,string>& loc); | |
917 | int adjust_item_weightf_in_loc(CephContext *cct, int id, float weight, const map<string,string>& loc) { | |
224ce89b WB |
918 | int r = validate_weightf(weight); |
919 | if (r < 0) { | |
920 | return r; | |
921 | } | |
7c673cae FG |
922 | return adjust_item_weight_in_loc(cct, id, (int)(weight * (float)0x10000), loc); |
923 | } | |
924 | void reweight(CephContext *cct); | |
925 | ||
926 | int adjust_subtree_weight(CephContext *cct, int id, int weight); | |
927 | int adjust_subtree_weightf(CephContext *cct, int id, float weight) { | |
224ce89b WB |
928 | int r = validate_weightf(weight); |
929 | if (r < 0) { | |
930 | return r; | |
931 | } | |
7c673cae FG |
932 | return adjust_subtree_weight(cct, id, (int)(weight * (float)0x10000)); |
933 | } | |
934 | ||
935 | /// check if item id is present in the map hierarchy | |
936 | bool check_item_present(int id) const; | |
937 | ||
938 | ||
939 | /*** devices ***/ | |
940 | int get_max_devices() const { | |
941 | if (!crush) return 0; | |
942 | return crush->max_devices; | |
943 | } | |
944 | ||
945 | ||
946 | /*** rules ***/ | |
947 | private: | |
948 | crush_rule *get_rule(unsigned ruleno) const { | |
949 | if (!crush) return (crush_rule *)(-ENOENT); | |
950 | if (ruleno >= crush->max_rules) | |
951 | return 0; | |
952 | return crush->rules[ruleno]; | |
953 | } | |
954 | crush_rule_step *get_rule_step(unsigned ruleno, unsigned step) const { | |
955 | crush_rule *n = get_rule(ruleno); | |
956 | if (IS_ERR(n)) return (crush_rule_step *)(-EINVAL); | |
957 | if (step >= n->len) return (crush_rule_step *)(-EINVAL); | |
958 | return &n->steps[step]; | |
959 | } | |
960 | ||
961 | public: | |
962 | /* accessors */ | |
963 | int get_max_rules() const { | |
964 | if (!crush) return 0; | |
965 | return crush->max_rules; | |
966 | } | |
967 | bool rule_exists(unsigned ruleno) const { | |
968 | if (!crush) return false; | |
969 | if (ruleno < crush->max_rules && | |
970 | crush->rules[ruleno] != NULL) | |
971 | return true; | |
972 | return false; | |
973 | } | |
974 | int get_rule_len(unsigned ruleno) const { | |
975 | crush_rule *r = get_rule(ruleno); | |
976 | if (IS_ERR(r)) return PTR_ERR(r); | |
977 | return r->len; | |
978 | } | |
979 | int get_rule_mask_ruleset(unsigned ruleno) const { | |
980 | crush_rule *r = get_rule(ruleno); | |
981 | if (IS_ERR(r)) return -1; | |
982 | return r->mask.ruleset; | |
983 | } | |
984 | int get_rule_mask_type(unsigned ruleno) const { | |
985 | crush_rule *r = get_rule(ruleno); | |
986 | if (IS_ERR(r)) return -1; | |
987 | return r->mask.type; | |
988 | } | |
989 | int get_rule_mask_min_size(unsigned ruleno) const { | |
990 | crush_rule *r = get_rule(ruleno); | |
991 | if (IS_ERR(r)) return -1; | |
992 | return r->mask.min_size; | |
993 | } | |
994 | int get_rule_mask_max_size(unsigned ruleno) const { | |
995 | crush_rule *r = get_rule(ruleno); | |
996 | if (IS_ERR(r)) return -1; | |
997 | return r->mask.max_size; | |
998 | } | |
999 | int get_rule_op(unsigned ruleno, unsigned step) const { | |
1000 | crush_rule_step *s = get_rule_step(ruleno, step); | |
1001 | if (IS_ERR(s)) return PTR_ERR(s); | |
1002 | return s->op; | |
1003 | } | |
1004 | int get_rule_arg1(unsigned ruleno, unsigned step) const { | |
1005 | crush_rule_step *s = get_rule_step(ruleno, step); | |
1006 | if (IS_ERR(s)) return PTR_ERR(s); | |
1007 | return s->arg1; | |
1008 | } | |
1009 | int get_rule_arg2(unsigned ruleno, unsigned step) const { | |
1010 | crush_rule_step *s = get_rule_step(ruleno, step); | |
1011 | if (IS_ERR(s)) return PTR_ERR(s); | |
1012 | return s->arg2; | |
1013 | } | |
1014 | ||
1015 | /** | |
1016 | * calculate a map of osds to weights for a given rule | |
1017 | * | |
1018 | * Generate a map of which OSDs get how much relative weight for a | |
1019 | * given rule. | |
1020 | * | |
1021 | * @param ruleno [in] rule id | |
1022 | * @param pmap [out] map of osd to weight | |
1023 | * @return 0 for success, or negative error code | |
1024 | */ | |
1025 | int get_rule_weight_osd_map(unsigned ruleno, map<int,float> *pmap); | |
1026 | ||
1027 | /* modifiers */ | |
c07f9fc5 FG |
1028 | |
1029 | int add_rule(int ruleno, int len, int type, int minsize, int maxsize) { | |
7c673cae | 1030 | if (!crush) return -ENOENT; |
c07f9fc5 | 1031 | crush_rule *n = crush_make_rule(len, ruleno, type, minsize, maxsize); |
7c673cae FG |
1032 | assert(n); |
1033 | ruleno = crush_add_rule(crush, n, ruleno); | |
1034 | return ruleno; | |
1035 | } | |
1036 | int set_rule_mask_max_size(unsigned ruleno, int max_size) { | |
1037 | crush_rule *r = get_rule(ruleno); | |
1038 | if (IS_ERR(r)) return -1; | |
1039 | return r->mask.max_size = max_size; | |
1040 | } | |
1041 | int set_rule_step(unsigned ruleno, unsigned step, int op, int arg1, int arg2) { | |
1042 | if (!crush) return -ENOENT; | |
1043 | crush_rule *n = get_rule(ruleno); | |
1044 | if (!n) return -1; | |
1045 | crush_rule_set_step(n, step, op, arg1, arg2); | |
1046 | return 0; | |
1047 | } | |
1048 | int set_rule_step_take(unsigned ruleno, unsigned step, int val) { | |
1049 | return set_rule_step(ruleno, step, CRUSH_RULE_TAKE, val, 0); | |
1050 | } | |
1051 | int set_rule_step_set_choose_tries(unsigned ruleno, unsigned step, int val) { | |
1052 | return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSE_TRIES, val, 0); | |
1053 | } | |
1054 | int set_rule_step_set_choose_local_tries(unsigned ruleno, unsigned step, int val) { | |
1055 | return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES, val, 0); | |
1056 | } | |
1057 | int set_rule_step_set_choose_local_fallback_tries(unsigned ruleno, unsigned step, int val) { | |
1058 | return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES, val, 0); | |
1059 | } | |
1060 | int set_rule_step_set_chooseleaf_tries(unsigned ruleno, unsigned step, int val) { | |
1061 | return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSELEAF_TRIES, val, 0); | |
1062 | } | |
1063 | int set_rule_step_set_chooseleaf_vary_r(unsigned ruleno, unsigned step, int val) { | |
1064 | return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSELEAF_VARY_R, val, 0); | |
1065 | } | |
1066 | int set_rule_step_set_chooseleaf_stable(unsigned ruleno, unsigned step, int val) { | |
1067 | return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSELEAF_STABLE, val, 0); | |
1068 | } | |
1069 | int set_rule_step_choose_firstn(unsigned ruleno, unsigned step, int val, int type) { | |
1070 | return set_rule_step(ruleno, step, CRUSH_RULE_CHOOSE_FIRSTN, val, type); | |
1071 | } | |
1072 | int set_rule_step_choose_indep(unsigned ruleno, unsigned step, int val, int type) { | |
1073 | return set_rule_step(ruleno, step, CRUSH_RULE_CHOOSE_INDEP, val, type); | |
1074 | } | |
1075 | int set_rule_step_choose_leaf_firstn(unsigned ruleno, unsigned step, int val, int type) { | |
1076 | return set_rule_step(ruleno, step, CRUSH_RULE_CHOOSELEAF_FIRSTN, val, type); | |
1077 | } | |
1078 | int set_rule_step_choose_leaf_indep(unsigned ruleno, unsigned step, int val, int type) { | |
1079 | return set_rule_step(ruleno, step, CRUSH_RULE_CHOOSELEAF_INDEP, val, type); | |
1080 | } | |
1081 | int set_rule_step_emit(unsigned ruleno, unsigned step) { | |
1082 | return set_rule_step(ruleno, step, CRUSH_RULE_EMIT, 0, 0); | |
1083 | } | |
1084 | ||
31f18b77 FG |
1085 | int add_simple_rule( |
1086 | string name, string root_name, string failure_domain_type, | |
224ce89b | 1087 | string device_class, |
31f18b77 FG |
1088 | string mode, int rule_type, ostream *err = 0); |
1089 | ||
7c673cae | 1090 | /** |
31f18b77 | 1091 | * @param rno rule[set] id to use, -1 to pick the lowest available |
7c673cae | 1092 | */ |
31f18b77 FG |
1093 | int add_simple_rule_at( |
1094 | string name, string root_name, | |
224ce89b | 1095 | string failure_domain_type, string device_class, string mode, |
31f18b77 | 1096 | int rule_type, int rno, ostream *err = 0); |
7c673cae FG |
1097 | |
1098 | int remove_rule(int ruleno); | |
1099 | ||
1100 | ||
1101 | /** buckets **/ | |
7c673cae FG |
1102 | const crush_bucket *get_bucket(int id) const { |
1103 | if (!crush) | |
1104 | return (crush_bucket *)(-EINVAL); | |
1105 | unsigned int pos = (unsigned int)(-1 - id); | |
1106 | unsigned int max_buckets = crush->max_buckets; | |
1107 | if (pos >= max_buckets) | |
1108 | return (crush_bucket *)(-ENOENT); | |
1109 | crush_bucket *ret = crush->buckets[pos]; | |
1110 | if (ret == NULL) | |
1111 | return (crush_bucket *)(-ENOENT); | |
1112 | return ret; | |
1113 | } | |
c07f9fc5 | 1114 | private: |
7c673cae FG |
1115 | crush_bucket *get_bucket(int id) { |
1116 | if (!crush) | |
1117 | return (crush_bucket *)(-EINVAL); | |
1118 | unsigned int pos = (unsigned int)(-1 - id); | |
1119 | unsigned int max_buckets = crush->max_buckets; | |
1120 | if (pos >= max_buckets) | |
1121 | return (crush_bucket *)(-ENOENT); | |
1122 | crush_bucket *ret = crush->buckets[pos]; | |
1123 | if (ret == NULL) | |
1124 | return (crush_bucket *)(-ENOENT); | |
1125 | return ret; | |
1126 | } | |
1127 | /** | |
1128 | * detach a bucket from its parent and adjust the parent weight | |
1129 | * | |
1130 | * returns the weight of the detached bucket | |
1131 | **/ | |
c07f9fc5 | 1132 | int detach_bucket(CephContext *cct, int item); |
7c673cae FG |
1133 | |
1134 | public: | |
1135 | int get_max_buckets() const { | |
1136 | if (!crush) return -EINVAL; | |
1137 | return crush->max_buckets; | |
1138 | } | |
1139 | int get_next_bucket_id() const { | |
1140 | if (!crush) return -EINVAL; | |
1141 | return crush_get_next_bucket_id(crush); | |
1142 | } | |
1143 | bool bucket_exists(int id) const { | |
1144 | const crush_bucket *b = get_bucket(id); | |
1145 | if (IS_ERR(b)) | |
1146 | return false; | |
1147 | return true; | |
1148 | } | |
1149 | int get_bucket_weight(int id) const { | |
1150 | const crush_bucket *b = get_bucket(id); | |
1151 | if (IS_ERR(b)) return PTR_ERR(b); | |
1152 | return b->weight; | |
1153 | } | |
1154 | float get_bucket_weightf(int id) const { | |
1155 | const crush_bucket *b = get_bucket(id); | |
1156 | if (IS_ERR(b)) return 0; | |
1157 | return b->weight / (float)0x10000; | |
1158 | } | |
1159 | int get_bucket_type(int id) const { | |
1160 | const crush_bucket *b = get_bucket(id); | |
1161 | if (IS_ERR(b)) return PTR_ERR(b); | |
1162 | return b->type; | |
1163 | } | |
1164 | int get_bucket_alg(int id) const { | |
1165 | const crush_bucket *b = get_bucket(id); | |
1166 | if (IS_ERR(b)) return PTR_ERR(b); | |
1167 | return b->alg; | |
1168 | } | |
1169 | int get_bucket_hash(int id) const { | |
1170 | const crush_bucket *b = get_bucket(id); | |
1171 | if (IS_ERR(b)) return PTR_ERR(b); | |
1172 | return b->hash; | |
1173 | } | |
1174 | int get_bucket_size(int id) const { | |
1175 | const crush_bucket *b = get_bucket(id); | |
1176 | if (IS_ERR(b)) return PTR_ERR(b); | |
1177 | return b->size; | |
1178 | } | |
1179 | int get_bucket_item(int id, int pos) const { | |
1180 | const crush_bucket *b = get_bucket(id); | |
1181 | if (IS_ERR(b)) return PTR_ERR(b); | |
1182 | if ((__u32)pos >= b->size) | |
1183 | return PTR_ERR(b); | |
1184 | return b->items[pos]; | |
1185 | } | |
1186 | int get_bucket_item_weight(int id, int pos) const { | |
1187 | const crush_bucket *b = get_bucket(id); | |
1188 | if (IS_ERR(b)) return PTR_ERR(b); | |
1189 | return crush_get_bucket_item_weight(b, pos); | |
1190 | } | |
1191 | float get_bucket_item_weightf(int id, int pos) const { | |
1192 | const crush_bucket *b = get_bucket(id); | |
1193 | if (IS_ERR(b)) return 0; | |
1194 | return (float)crush_get_bucket_item_weight(b, pos) / (float)0x10000; | |
1195 | } | |
1196 | ||
1197 | /* modifiers */ | |
1198 | int add_bucket(int bucketno, int alg, int hash, int type, int size, | |
c07f9fc5 | 1199 | int *items, int *weights, int *idout); |
31f18b77 FG |
1200 | int bucket_add_item(crush_bucket *bucket, int item, int weight); |
1201 | int bucket_remove_item(struct crush_bucket *bucket, int item); | |
1202 | int bucket_adjust_item_weight(CephContext *cct, struct crush_bucket *bucket, int item, int weight); | |
1203 | ||
7c673cae FG |
1204 | void finalize() { |
1205 | assert(crush); | |
1206 | crush_finalize(crush); | |
31f18b77 | 1207 | have_uniform_rules = !has_legacy_rulesets(); |
7c673cae FG |
1208 | } |
1209 | ||
224ce89b | 1210 | int update_device_class(int id, const string& class_name, const string& name, ostream *ss); |
c07f9fc5 | 1211 | int remove_device_class(CephContext *cct, int id, ostream *ss); |
d2e6a577 FG |
1212 | int device_class_clone( |
1213 | int original, int device_class, | |
1214 | const std::map<int32_t, map<int32_t, int32_t>>& old_class_bucket, | |
1215 | const std::set<int32_t>& used_ids, | |
1216 | int *clone); | |
1217 | int populate_classes( | |
1218 | const std::map<int32_t, map<int32_t, int32_t>>& old_class_bucket); | |
1219 | bool _class_is_dead(int class_id); | |
1220 | void cleanup_dead_classes(); | |
7c673cae FG |
1221 | int rebuild_roots_with_classes(); |
1222 | /* remove unused roots generated for class devices */ | |
1223 | int trim_roots_with_class(bool unused); | |
7c673cae FG |
1224 | |
1225 | void start_choose_profile() { | |
1226 | free(crush->choose_tries); | |
1227 | /* | |
1228 | * the original choose_total_tries value was off by one (it | |
1229 | * counted "retries" and not "tries"). add one to alloc. | |
1230 | */ | |
c07f9fc5 FG |
1231 | crush->choose_tries = (__u32 *)calloc(sizeof(*crush->choose_tries), |
1232 | (crush->choose_total_tries + 1)); | |
7c673cae FG |
1233 | memset(crush->choose_tries, 0, |
1234 | sizeof(*crush->choose_tries) * (crush->choose_total_tries + 1)); | |
1235 | } | |
1236 | void stop_choose_profile() { | |
1237 | free(crush->choose_tries); | |
1238 | crush->choose_tries = 0; | |
1239 | } | |
1240 | ||
1241 | int get_choose_profile(__u32 **vec) { | |
1242 | if (crush->choose_tries) { | |
1243 | *vec = crush->choose_tries; | |
1244 | return crush->choose_total_tries; | |
1245 | } | |
1246 | return 0; | |
1247 | } | |
1248 | ||
1249 | ||
1250 | void set_max_devices(int m) { | |
1251 | crush->max_devices = m; | |
1252 | } | |
1253 | ||
1254 | int find_rule(int ruleset, int type, int size) const { | |
1255 | if (!crush) return -1; | |
31f18b77 FG |
1256 | if (!have_uniform_rules) { |
1257 | return crush_find_rule(crush, ruleset, type, size); | |
1258 | } else { | |
1259 | if (ruleset < (int)crush->max_rules && | |
1260 | crush->rules[ruleset]) | |
1261 | return ruleset; | |
1262 | return -1; | |
1263 | } | |
7c673cae FG |
1264 | } |
1265 | ||
d2e6a577 | 1266 | bool ruleset_exists(const int ruleset) const { |
7c673cae FG |
1267 | for (size_t i = 0; i < crush->max_rules; ++i) { |
1268 | if (rule_exists(i) && crush->rules[i]->mask.ruleset == ruleset) { | |
1269 | return true; | |
1270 | } | |
1271 | } | |
1272 | ||
1273 | return false; | |
1274 | } | |
1275 | ||
1276 | /** | |
1277 | * Return the lowest numbered ruleset of type `type` | |
1278 | * | |
1279 | * @returns a ruleset ID, or -1 if no matching rulesets found. | |
1280 | */ | |
1281 | int find_first_ruleset(int type) const { | |
1282 | int result = -1; | |
1283 | ||
1284 | for (size_t i = 0; i < crush->max_rules; ++i) { | |
1285 | if (crush->rules[i] | |
1286 | && crush->rules[i]->mask.type == type | |
1287 | && (crush->rules[i]->mask.ruleset < result || result == -1)) { | |
1288 | result = crush->rules[i]->mask.ruleset; | |
1289 | } | |
1290 | } | |
1291 | ||
1292 | return result; | |
1293 | } | |
1294 | ||
c07f9fc5 FG |
1295 | bool have_choose_args(int64_t choose_args_index) const { |
1296 | return choose_args.count(choose_args_index); | |
1297 | } | |
1298 | ||
1299 | crush_choose_arg_map choose_args_get_with_fallback( | |
1300 | int64_t choose_args_index) const { | |
1301 | auto i = choose_args.find(choose_args_index); | |
1302 | if (i == choose_args.end()) { | |
1303 | i = choose_args.find(DEFAULT_CHOOSE_ARGS); | |
1304 | } | |
1305 | if (i == choose_args.end()) { | |
1306 | crush_choose_arg_map arg_map; | |
1307 | arg_map.args = NULL; | |
1308 | arg_map.size = 0; | |
1309 | return arg_map; | |
1310 | } else { | |
1311 | return i->second; | |
1312 | } | |
1313 | } | |
1314 | crush_choose_arg_map choose_args_get(int64_t choose_args_index) const { | |
7c673cae FG |
1315 | auto i = choose_args.find(choose_args_index); |
1316 | if (i == choose_args.end()) { | |
1317 | crush_choose_arg_map arg_map; | |
1318 | arg_map.args = NULL; | |
1319 | arg_map.size = 0; | |
1320 | return arg_map; | |
1321 | } else { | |
1322 | return i->second; | |
1323 | } | |
1324 | } | |
1325 | ||
1326 | void destroy_choose_args(crush_choose_arg_map arg_map) { | |
1327 | for (__u32 i = 0; i < arg_map.size; i++) { | |
1328 | crush_choose_arg *arg = &arg_map.args[i]; | |
1329 | for (__u32 j = 0; j < arg->weight_set_size; j++) { | |
1330 | crush_weight_set *weight_set = &arg->weight_set[j]; | |
1331 | free(weight_set->weights); | |
1332 | } | |
1333 | if (arg->weight_set) | |
1334 | free(arg->weight_set); | |
1335 | if (arg->ids) | |
1336 | free(arg->ids); | |
1337 | } | |
1338 | free(arg_map.args); | |
1339 | } | |
c07f9fc5 FG |
1340 | |
1341 | void create_choose_args(int64_t id, int positions) { | |
1342 | if (choose_args.count(id)) | |
1343 | return; | |
1344 | assert(positions); | |
1345 | auto &cmap = choose_args[id]; | |
1346 | cmap.args = (crush_choose_arg*)calloc(sizeof(crush_choose_arg), | |
1347 | crush->max_buckets); | |
1348 | cmap.size = crush->max_buckets; | |
1349 | for (int bidx=0; bidx < crush->max_buckets; ++bidx) { | |
1350 | crush_bucket *b = crush->buckets[bidx]; | |
1351 | auto &carg = cmap.args[bidx]; | |
1352 | carg.ids = NULL; | |
1353 | carg.ids_size = 0; | |
1354 | if (b && b->alg == CRUSH_BUCKET_STRAW2) { | |
1355 | crush_bucket_straw2 *sb = (crush_bucket_straw2*)b; | |
1356 | carg.weight_set_size = positions; | |
1357 | carg.weight_set = (crush_weight_set*)calloc(sizeof(crush_weight_set), | |
1358 | carg.weight_set_size); | |
1359 | // initialize with canonical weights | |
1360 | for (int pos = 0; pos < positions; ++pos) { | |
1361 | carg.weight_set[pos].size = b->size; | |
1362 | carg.weight_set[pos].weights = (__u32*)calloc(4, b->size); | |
1363 | for (unsigned i = 0; i < b->size; ++i) { | |
1364 | carg.weight_set[pos].weights[i] = sb->item_weights[i]; | |
1365 | } | |
1366 | } | |
1367 | } else { | |
1368 | carg.weight_set = NULL; | |
1369 | carg.weight_set_size = 0; | |
1370 | } | |
1371 | } | |
1372 | } | |
1373 | ||
1374 | void rm_choose_args(int64_t id) { | |
1375 | auto p = choose_args.find(id); | |
1376 | if (p != choose_args.end()) { | |
1377 | destroy_choose_args(p->second); | |
1378 | choose_args.erase(p); | |
1379 | } | |
1380 | } | |
1381 | ||
7c673cae FG |
1382 | void choose_args_clear() { |
1383 | for (auto w : choose_args) | |
1384 | destroy_choose_args(w.second); | |
1385 | choose_args.clear(); | |
1386 | } | |
1387 | ||
c07f9fc5 FG |
1388 | // adjust choose_args_map weight, preserving the hierarchical summation |
1389 | // property. used by callers optimizing layouts by tweaking weights. | |
1390 | int _choose_args_adjust_item_weight_in_bucket( | |
1391 | CephContext *cct, | |
1392 | crush_choose_arg_map cmap, | |
1393 | int bucketid, | |
1394 | int id, | |
1395 | const vector<int>& weight, | |
1396 | ostream *ss); | |
1397 | int choose_args_adjust_item_weight( | |
1398 | CephContext *cct, | |
1399 | crush_choose_arg_map cmap, | |
1400 | int id, const vector<int>& weight, | |
1401 | ostream *ss); | |
1402 | int choose_args_adjust_item_weightf( | |
1403 | CephContext *cct, | |
1404 | crush_choose_arg_map cmap, | |
1405 | int id, const vector<double>& weightf, | |
1406 | ostream *ss) { | |
1407 | vector<int> weight(weightf.size()); | |
1408 | for (unsigned i = 0; i < weightf.size(); ++i) { | |
1409 | weight[i] = (int)(weightf[i] * (float)0x10000); | |
1410 | } | |
1411 | return choose_args_adjust_item_weight(cct, cmap, id, weight, ss); | |
1412 | } | |
1413 | ||
1414 | int get_choose_args_positions(crush_choose_arg_map cmap) { | |
1415 | // infer positions from other buckets | |
1416 | for (unsigned j = 0; j < cmap.size; ++j) { | |
1417 | if (cmap.args[j].weight_set_size) { | |
1418 | return cmap.args[j].weight_set_size; | |
1419 | } | |
1420 | } | |
1421 | return 1; | |
1422 | } | |
1423 | ||
7c673cae FG |
1424 | template<typename WeightVector> |
1425 | void do_rule(int rule, int x, vector<int>& out, int maxout, | |
1426 | const WeightVector& weight, | |
1427 | uint64_t choose_args_index) const { | |
1428 | int rawout[maxout]; | |
1429 | char work[crush_work_size(crush, maxout)]; | |
1430 | crush_init_workspace(crush, work); | |
c07f9fc5 FG |
1431 | crush_choose_arg_map arg_map = choose_args_get_with_fallback( |
1432 | choose_args_index); | |
7c673cae FG |
1433 | int numrep = crush_do_rule(crush, rule, x, rawout, maxout, &weight[0], |
1434 | weight.size(), work, arg_map.args); | |
1435 | if (numrep < 0) | |
1436 | numrep = 0; | |
1437 | out.resize(numrep); | |
1438 | for (int i=0; i<numrep; i++) | |
1439 | out[i] = rawout[i]; | |
1440 | } | |
1441 | ||
1442 | int _choose_type_stack( | |
1443 | CephContext *cct, | |
1444 | const vector<pair<int,int>>& stack, | |
1445 | const set<int>& overfull, | |
1446 | const vector<int>& underfull, | |
1447 | const vector<int>& orig, | |
1448 | vector<int>::const_iterator& i, | |
1449 | set<int>& used, | |
1450 | vector<int> *pw) const; | |
1451 | ||
1452 | int try_remap_rule( | |
1453 | CephContext *cct, | |
1454 | int rule, | |
1455 | int maxout, | |
1456 | const set<int>& overfull, | |
1457 | const vector<int>& underfull, | |
1458 | const vector<int>& orig, | |
1459 | vector<int> *out) const; | |
1460 | ||
1461 | bool check_crush_rule(int ruleset, int type, int size, ostream& ss) { | |
1462 | assert(crush); | |
1463 | ||
1464 | __u32 i; | |
1465 | for (i = 0; i < crush->max_rules; i++) { | |
1466 | if (crush->rules[i] && | |
1467 | crush->rules[i]->mask.ruleset == ruleset && | |
1468 | crush->rules[i]->mask.type == type) { | |
1469 | ||
1470 | if (crush->rules[i]->mask.min_size <= size && | |
1471 | crush->rules[i]->mask.max_size >= size) { | |
1472 | return true; | |
1473 | } else if (size < crush->rules[i]->mask.min_size) { | |
1474 | ss << "pool size is smaller than the crush rule min size"; | |
1475 | return false; | |
1476 | } else { | |
1477 | ss << "pool size is bigger than the crush rule max size"; | |
1478 | return false; | |
1479 | } | |
1480 | } | |
1481 | } | |
1482 | ||
1483 | return false; | |
1484 | } | |
1485 | ||
1486 | void encode(bufferlist &bl, uint64_t features) const; | |
1487 | void decode(bufferlist::iterator &blp); | |
1488 | void decode_crush_bucket(crush_bucket** bptr, bufferlist::iterator &blp); | |
1489 | void dump(Formatter *f) const; | |
1490 | void dump_rules(Formatter *f) const; | |
1491 | void dump_rule(int ruleset, Formatter *f) const; | |
1492 | void dump_tunables(Formatter *f) const; | |
1493 | void dump_choose_args(Formatter *f) const; | |
1494 | void list_rules(Formatter *f) const; | |
c07f9fc5 FG |
1495 | void list_rules(ostream *ss) const; |
1496 | void dump_tree(ostream *out, | |
1497 | Formatter *f, | |
1498 | const CrushTreeDumper::name_map_t& ws, | |
1499 | bool show_shadow = false) const; | |
1500 | void dump_tree(ostream *out, Formatter *f) { | |
1501 | dump_tree(out, f, CrushTreeDumper::name_map_t()); | |
1502 | } | |
1503 | void dump_tree(Formatter *f, | |
1504 | const CrushTreeDumper::name_map_t& ws) const; | |
7c673cae FG |
1505 | static void generate_test_instances(list<CrushWrapper*>& o); |
1506 | ||
7c673cae FG |
1507 | int get_osd_pool_default_crush_replicated_ruleset(CephContext *cct); |
1508 | ||
1509 | static bool is_valid_crush_name(const string& s); | |
1510 | static bool is_valid_crush_loc(CephContext *cct, | |
1511 | const map<string,string>& loc); | |
1512 | }; | |
1513 | WRITE_CLASS_ENCODER_FEATURES(CrushWrapper) | |
1514 | ||
1515 | #endif |