]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #ifndef CEPH_CRUSH_WRAPPER_H | |
5 | #define CEPH_CRUSH_WRAPPER_H | |
6 | ||
7 | #include <stdlib.h> | |
8 | #include <map> | |
9 | #include <set> | |
10 | #include <string> | |
11 | ||
12 | #include <iosfwd> | |
13 | ||
14 | #include "include/types.h" | |
15 | ||
16 | extern "C" { | |
17 | #include "crush.h" | |
18 | #include "hash.h" | |
19 | #include "mapper.h" | |
20 | #include "builder.h" | |
21 | } | |
22 | ||
31f18b77 | 23 | #include "include/assert.h" |
7c673cae FG |
24 | #include "include/err.h" |
25 | #include "include/encoding.h" | |
c07f9fc5 | 26 | #include "include/mempool.h" |
7c673cae FG |
27 | |
28 | #include "common/Mutex.h" | |
29 | ||
7c673cae FG |
30 | #define BUG_ON(x) assert(!(x)) |
31 | ||
32 | namespace ceph { | |
33 | class Formatter; | |
34 | } | |
35 | ||
c07f9fc5 FG |
36 | namespace CrushTreeDumper { |
37 | typedef mempool::osdmap::map<int64_t,string> name_map_t; | |
38 | } | |
39 | ||
7c673cae FG |
40 | WRITE_RAW_ENCODER(crush_rule_mask) // it's all u8's |
41 | ||
42 | inline static void encode(const crush_rule_step &s, bufferlist &bl) | |
43 | { | |
44 | ::encode(s.op, bl); | |
45 | ::encode(s.arg1, bl); | |
46 | ::encode(s.arg2, bl); | |
47 | } | |
48 | inline static void decode(crush_rule_step &s, bufferlist::iterator &p) | |
49 | { | |
50 | ::decode(s.op, p); | |
51 | ::decode(s.arg1, p); | |
52 | ::decode(s.arg2, p); | |
53 | } | |
54 | ||
55 | using namespace std; | |
56 | class CrushWrapper { | |
57 | public: | |
c07f9fc5 FG |
58 | // magic value used by OSDMap for a "default" fallback choose_args, used if |
59 | // the choose_arg_map passed to do_rule does not exist. if this also | |
60 | // doesn't exist, fall back to canonical weights. | |
61 | enum { | |
62 | DEFAULT_CHOOSE_ARGS = -1 | |
63 | }; | |
64 | ||
7c673cae FG |
65 | std::map<int32_t, string> type_map; /* bucket/device type names */ |
66 | std::map<int32_t, string> name_map; /* bucket/device names */ | |
67 | std::map<int32_t, string> rule_name_map; | |
d2e6a577 | 68 | |
7c673cae FG |
69 | std::map<int32_t, int32_t> class_map; /* item id -> class id */ |
70 | std::map<int32_t, string> class_name; /* class id -> class name */ | |
71 | std::map<string, int32_t> class_rname; /* class name -> class id */ | |
72 | std::map<int32_t, map<int32_t, int32_t> > class_bucket; /* bucket[id][class] == id */ | |
c07f9fc5 | 73 | std::map<int64_t, crush_choose_arg_map> choose_args; |
7c673cae FG |
74 | |
75 | private: | |
28e407b8 | 76 | struct crush_map *crush = nullptr; |
31f18b77 FG |
77 | |
78 | bool have_uniform_rules = false; | |
79 | ||
7c673cae | 80 | /* reverse maps */ |
28e407b8 | 81 | mutable bool have_rmaps = false; |
7c673cae FG |
82 | mutable std::map<string, int> type_rmap, name_rmap, rule_name_rmap; |
83 | void build_rmaps() const { | |
84 | if (have_rmaps) return; | |
85 | build_rmap(type_map, type_rmap); | |
86 | build_rmap(name_map, name_rmap); | |
87 | build_rmap(rule_name_map, rule_name_rmap); | |
88 | have_rmaps = true; | |
89 | } | |
90 | void build_rmap(const map<int, string> &f, std::map<string, int> &r) const { | |
91 | r.clear(); | |
92 | for (std::map<int, string>::const_iterator p = f.begin(); p != f.end(); ++p) | |
93 | r[p->second] = p->first; | |
94 | } | |
95 | ||
96 | public: | |
97 | CrushWrapper(const CrushWrapper& other); | |
98 | const CrushWrapper& operator=(const CrushWrapper& other); | |
99 | ||
28e407b8 | 100 | CrushWrapper() { |
7c673cae FG |
101 | create(); |
102 | } | |
103 | ~CrushWrapper() { | |
104 | if (crush) | |
105 | crush_destroy(crush); | |
106 | choose_args_clear(); | |
107 | } | |
108 | ||
109 | crush_map *get_crush_map() { return crush; } | |
110 | ||
111 | /* building */ | |
112 | void create() { | |
113 | if (crush) | |
114 | crush_destroy(crush); | |
115 | crush = crush_create(); | |
116 | choose_args_clear(); | |
117 | assert(crush); | |
118 | have_rmaps = false; | |
119 | ||
120 | set_tunables_default(); | |
121 | } | |
122 | ||
3efd9988 FG |
123 | /** |
124 | * true if any rule has a rule id != its position in the array | |
125 | * | |
126 | * These indicate "ruleset" IDs that were created by older versions | |
127 | * of Ceph. They are cleaned up in renumber_rules so that eventually | |
128 | * we can remove the code for handling them. | |
129 | */ | |
130 | bool has_legacy_rule_ids() const; | |
31f18b77 | 131 | |
3efd9988 FG |
132 | /** |
133 | * fix rules whose ruleid != ruleset | |
134 | * | |
135 | * These rules were created in older versions of Ceph. The concept | |
136 | * of a ruleset no longer exists. | |
137 | * | |
138 | * Return a map of old ID -> new ID. Caller must update OSDMap | |
139 | * to use new IDs. | |
140 | */ | |
141 | std::map<int, int> renumber_rules(); | |
31f18b77 | 142 | |
c07f9fc5 FG |
143 | /// true if any buckets that aren't straw2 |
144 | bool has_non_straw2_buckets() const; | |
145 | ||
7c673cae FG |
146 | // tunables |
147 | void set_tunables_argonaut() { | |
148 | crush->choose_local_tries = 2; | |
149 | crush->choose_local_fallback_tries = 5; | |
150 | crush->choose_total_tries = 19; | |
151 | crush->chooseleaf_descend_once = 0; | |
152 | crush->chooseleaf_vary_r = 0; | |
153 | crush->chooseleaf_stable = 0; | |
154 | crush->allowed_bucket_algs = CRUSH_LEGACY_ALLOWED_BUCKET_ALGS; | |
155 | } | |
156 | void set_tunables_bobtail() { | |
157 | crush->choose_local_tries = 0; | |
158 | crush->choose_local_fallback_tries = 0; | |
159 | crush->choose_total_tries = 50; | |
160 | crush->chooseleaf_descend_once = 1; | |
161 | crush->chooseleaf_vary_r = 0; | |
162 | crush->chooseleaf_stable = 0; | |
163 | crush->allowed_bucket_algs = CRUSH_LEGACY_ALLOWED_BUCKET_ALGS; | |
164 | } | |
165 | void set_tunables_firefly() { | |
166 | crush->choose_local_tries = 0; | |
167 | crush->choose_local_fallback_tries = 0; | |
168 | crush->choose_total_tries = 50; | |
169 | crush->chooseleaf_descend_once = 1; | |
170 | crush->chooseleaf_vary_r = 1; | |
171 | crush->chooseleaf_stable = 0; | |
172 | crush->allowed_bucket_algs = CRUSH_LEGACY_ALLOWED_BUCKET_ALGS; | |
173 | } | |
174 | void set_tunables_hammer() { | |
175 | crush->choose_local_tries = 0; | |
176 | crush->choose_local_fallback_tries = 0; | |
177 | crush->choose_total_tries = 50; | |
178 | crush->chooseleaf_descend_once = 1; | |
179 | crush->chooseleaf_vary_r = 1; | |
180 | crush->chooseleaf_stable = 0; | |
181 | crush->allowed_bucket_algs = | |
182 | (1 << CRUSH_BUCKET_UNIFORM) | | |
183 | (1 << CRUSH_BUCKET_LIST) | | |
184 | (1 << CRUSH_BUCKET_STRAW) | | |
185 | (1 << CRUSH_BUCKET_STRAW2); | |
186 | } | |
187 | void set_tunables_jewel() { | |
188 | crush->choose_local_tries = 0; | |
189 | crush->choose_local_fallback_tries = 0; | |
190 | crush->choose_total_tries = 50; | |
191 | crush->chooseleaf_descend_once = 1; | |
192 | crush->chooseleaf_vary_r = 1; | |
193 | crush->chooseleaf_stable = 1; | |
194 | crush->allowed_bucket_algs = | |
195 | (1 << CRUSH_BUCKET_UNIFORM) | | |
196 | (1 << CRUSH_BUCKET_LIST) | | |
197 | (1 << CRUSH_BUCKET_STRAW) | | |
198 | (1 << CRUSH_BUCKET_STRAW2); | |
199 | } | |
200 | ||
201 | void set_tunables_legacy() { | |
202 | set_tunables_argonaut(); | |
203 | crush->straw_calc_version = 0; | |
204 | } | |
205 | void set_tunables_optimal() { | |
206 | set_tunables_jewel(); | |
207 | crush->straw_calc_version = 1; | |
208 | } | |
209 | void set_tunables_default() { | |
31f18b77 | 210 | set_tunables_jewel(); |
7c673cae FG |
211 | crush->straw_calc_version = 1; |
212 | } | |
213 | ||
214 | int get_choose_local_tries() const { | |
215 | return crush->choose_local_tries; | |
216 | } | |
217 | void set_choose_local_tries(int n) { | |
218 | crush->choose_local_tries = n; | |
219 | } | |
220 | ||
221 | int get_choose_local_fallback_tries() const { | |
222 | return crush->choose_local_fallback_tries; | |
223 | } | |
224 | void set_choose_local_fallback_tries(int n) { | |
225 | crush->choose_local_fallback_tries = n; | |
226 | } | |
227 | ||
228 | int get_choose_total_tries() const { | |
229 | return crush->choose_total_tries; | |
230 | } | |
231 | void set_choose_total_tries(int n) { | |
232 | crush->choose_total_tries = n; | |
233 | } | |
234 | ||
235 | int get_chooseleaf_descend_once() const { | |
236 | return crush->chooseleaf_descend_once; | |
237 | } | |
238 | void set_chooseleaf_descend_once(int n) { | |
239 | crush->chooseleaf_descend_once = !!n; | |
240 | } | |
241 | ||
242 | int get_chooseleaf_vary_r() const { | |
243 | return crush->chooseleaf_vary_r; | |
244 | } | |
245 | void set_chooseleaf_vary_r(int n) { | |
246 | crush->chooseleaf_vary_r = n; | |
247 | } | |
248 | ||
249 | int get_chooseleaf_stable() const { | |
250 | return crush->chooseleaf_stable; | |
251 | } | |
252 | void set_chooseleaf_stable(int n) { | |
253 | crush->chooseleaf_stable = n; | |
254 | } | |
255 | ||
256 | int get_straw_calc_version() const { | |
257 | return crush->straw_calc_version; | |
258 | } | |
259 | void set_straw_calc_version(int n) { | |
260 | crush->straw_calc_version = n; | |
261 | } | |
262 | ||
263 | unsigned get_allowed_bucket_algs() const { | |
264 | return crush->allowed_bucket_algs; | |
265 | } | |
266 | void set_allowed_bucket_algs(unsigned n) { | |
267 | crush->allowed_bucket_algs = n; | |
268 | } | |
269 | ||
270 | bool has_argonaut_tunables() const { | |
271 | return | |
272 | crush->choose_local_tries == 2 && | |
273 | crush->choose_local_fallback_tries == 5 && | |
274 | crush->choose_total_tries == 19 && | |
275 | crush->chooseleaf_descend_once == 0 && | |
276 | crush->chooseleaf_vary_r == 0 && | |
277 | crush->chooseleaf_stable == 0 && | |
278 | crush->allowed_bucket_algs == CRUSH_LEGACY_ALLOWED_BUCKET_ALGS; | |
279 | } | |
280 | bool has_bobtail_tunables() const { | |
281 | return | |
282 | crush->choose_local_tries == 0 && | |
283 | crush->choose_local_fallback_tries == 0 && | |
284 | crush->choose_total_tries == 50 && | |
285 | crush->chooseleaf_descend_once == 1 && | |
286 | crush->chooseleaf_vary_r == 0 && | |
287 | crush->chooseleaf_stable == 0 && | |
288 | crush->allowed_bucket_algs == CRUSH_LEGACY_ALLOWED_BUCKET_ALGS; | |
289 | } | |
290 | bool has_firefly_tunables() const { | |
291 | return | |
292 | crush->choose_local_tries == 0 && | |
293 | crush->choose_local_fallback_tries == 0 && | |
294 | crush->choose_total_tries == 50 && | |
295 | crush->chooseleaf_descend_once == 1 && | |
296 | crush->chooseleaf_vary_r == 1 && | |
297 | crush->chooseleaf_stable == 0 && | |
298 | crush->allowed_bucket_algs == CRUSH_LEGACY_ALLOWED_BUCKET_ALGS; | |
299 | } | |
300 | bool has_hammer_tunables() const { | |
301 | return | |
302 | crush->choose_local_tries == 0 && | |
303 | crush->choose_local_fallback_tries == 0 && | |
304 | crush->choose_total_tries == 50 && | |
305 | crush->chooseleaf_descend_once == 1 && | |
306 | crush->chooseleaf_vary_r == 1 && | |
307 | crush->chooseleaf_stable == 0 && | |
308 | crush->allowed_bucket_algs == ((1 << CRUSH_BUCKET_UNIFORM) | | |
309 | (1 << CRUSH_BUCKET_LIST) | | |
310 | (1 << CRUSH_BUCKET_STRAW) | | |
311 | (1 << CRUSH_BUCKET_STRAW2)); | |
312 | } | |
313 | bool has_jewel_tunables() const { | |
314 | return | |
315 | crush->choose_local_tries == 0 && | |
316 | crush->choose_local_fallback_tries == 0 && | |
317 | crush->choose_total_tries == 50 && | |
318 | crush->chooseleaf_descend_once == 1 && | |
319 | crush->chooseleaf_vary_r == 1 && | |
320 | crush->chooseleaf_stable == 1 && | |
321 | crush->allowed_bucket_algs == ((1 << CRUSH_BUCKET_UNIFORM) | | |
322 | (1 << CRUSH_BUCKET_LIST) | | |
323 | (1 << CRUSH_BUCKET_STRAW) | | |
324 | (1 << CRUSH_BUCKET_STRAW2)); | |
325 | } | |
326 | ||
327 | bool has_optimal_tunables() const { | |
328 | return has_jewel_tunables(); | |
329 | } | |
330 | bool has_legacy_tunables() const { | |
331 | return has_argonaut_tunables(); | |
332 | } | |
333 | ||
334 | bool has_nondefault_tunables() const { | |
335 | return | |
336 | (crush->choose_local_tries != 2 || | |
337 | crush->choose_local_fallback_tries != 5 || | |
338 | crush->choose_total_tries != 19); | |
339 | } | |
340 | bool has_nondefault_tunables2() const { | |
341 | return | |
342 | crush->chooseleaf_descend_once != 0; | |
343 | } | |
344 | bool has_nondefault_tunables3() const { | |
345 | return | |
346 | crush->chooseleaf_vary_r != 0; | |
347 | } | |
348 | bool has_nondefault_tunables5() const { | |
349 | return | |
350 | crush->chooseleaf_stable != 0; | |
351 | } | |
352 | ||
353 | bool has_v2_rules() const; | |
354 | bool has_v3_rules() const; | |
355 | bool has_v4_buckets() const; | |
356 | bool has_v5_rules() const; | |
31f18b77 FG |
357 | bool has_choose_args() const; // any choose_args |
358 | bool has_incompat_choose_args() const; // choose_args that can't be made compat | |
7c673cae FG |
359 | |
360 | bool is_v2_rule(unsigned ruleid) const; | |
361 | bool is_v3_rule(unsigned ruleid) const; | |
362 | bool is_v5_rule(unsigned ruleid) const; | |
363 | ||
364 | string get_min_required_version() const { | |
365 | if (has_v5_rules() || has_nondefault_tunables5()) | |
366 | return "jewel"; | |
367 | else if (has_v4_buckets()) | |
368 | return "hammer"; | |
369 | else if (has_nondefault_tunables3()) | |
370 | return "firefly"; | |
371 | else if (has_nondefault_tunables2() || has_nondefault_tunables()) | |
372 | return "bobtail"; | |
373 | else | |
374 | return "argonaut"; | |
375 | } | |
376 | ||
377 | // default bucket types | |
378 | unsigned get_default_bucket_alg() const { | |
379 | // in order of preference | |
380 | if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_STRAW2)) | |
381 | return CRUSH_BUCKET_STRAW2; | |
382 | if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_STRAW)) | |
383 | return CRUSH_BUCKET_STRAW; | |
384 | if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_TREE)) | |
385 | return CRUSH_BUCKET_TREE; | |
386 | if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_LIST)) | |
387 | return CRUSH_BUCKET_LIST; | |
388 | if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_UNIFORM)) | |
389 | return CRUSH_BUCKET_UNIFORM; | |
390 | return 0; | |
391 | } | |
392 | ||
393 | // bucket types | |
394 | int get_num_type_names() const { | |
395 | return type_map.size(); | |
396 | } | |
31f18b77 FG |
397 | int get_max_type_id() const { |
398 | if (type_map.empty()) | |
399 | return 0; | |
400 | return type_map.rbegin()->first; | |
401 | } | |
7c673cae FG |
402 | int get_type_id(const string& name) const { |
403 | build_rmaps(); | |
404 | if (type_rmap.count(name)) | |
405 | return type_rmap[name]; | |
406 | return -1; | |
407 | } | |
408 | const char *get_type_name(int t) const { | |
409 | std::map<int,string>::const_iterator p = type_map.find(t); | |
410 | if (p != type_map.end()) | |
411 | return p->second.c_str(); | |
412 | return 0; | |
413 | } | |
414 | void set_type_name(int i, const string& name) { | |
415 | type_map[i] = name; | |
416 | if (have_rmaps) | |
417 | type_rmap[name] = i; | |
418 | } | |
419 | ||
420 | // item/bucket names | |
421 | bool name_exists(const string& name) const { | |
422 | build_rmaps(); | |
423 | return name_rmap.count(name); | |
424 | } | |
425 | bool item_exists(int i) const { | |
426 | return name_map.count(i); | |
427 | } | |
428 | int get_item_id(const string& name) const { | |
429 | build_rmaps(); | |
430 | if (name_rmap.count(name)) | |
431 | return name_rmap[name]; | |
432 | return 0; /* hrm */ | |
433 | } | |
434 | const char *get_item_name(int t) const { | |
435 | std::map<int,string>::const_iterator p = name_map.find(t); | |
436 | if (p != name_map.end()) | |
437 | return p->second.c_str(); | |
438 | return 0; | |
439 | } | |
440 | int set_item_name(int i, const string& name) { | |
441 | if (!is_valid_crush_name(name)) | |
442 | return -EINVAL; | |
443 | name_map[i] = name; | |
444 | if (have_rmaps) | |
445 | name_rmap[name] = i; | |
446 | return 0; | |
447 | } | |
31f18b77 FG |
448 | void swap_names(int a, int b) { |
449 | string an = name_map[a]; | |
450 | string bn = name_map[b]; | |
451 | name_map[a] = bn; | |
452 | name_map[b] = an; | |
453 | if (have_rmaps) { | |
454 | name_rmap[an] = b; | |
455 | name_rmap[bn] = a; | |
456 | } | |
457 | } | |
7c673cae FG |
458 | int split_id_class(int i, int *idout, int *classout) const; |
459 | ||
460 | bool class_exists(const string& name) const { | |
461 | return class_rname.count(name); | |
462 | } | |
463 | const char *get_class_name(int i) const { | |
224ce89b | 464 | auto p = class_name.find(i); |
7c673cae FG |
465 | if (p != class_name.end()) |
466 | return p->second.c_str(); | |
467 | return 0; | |
468 | } | |
469 | int get_class_id(const string& name) const { | |
224ce89b | 470 | auto p = class_rname.find(name); |
7c673cae FG |
471 | if (p != class_rname.end()) |
472 | return p->second; | |
473 | else | |
474 | return -EINVAL; | |
475 | } | |
476 | int remove_class_name(const string& name) { | |
224ce89b | 477 | auto p = class_rname.find(name); |
7c673cae FG |
478 | if (p == class_rname.end()) |
479 | return -ENOENT; | |
480 | int class_id = p->second; | |
224ce89b | 481 | auto q = class_name.find(class_id); |
7c673cae FG |
482 | if (q == class_name.end()) |
483 | return -ENOENT; | |
484 | class_rname.erase(name); | |
485 | class_name.erase(class_id); | |
486 | return 0; | |
487 | } | |
224ce89b | 488 | |
224ce89b WB |
489 | int32_t _alloc_class_id() const; |
490 | ||
7c673cae FG |
491 | int get_or_create_class_id(const string& name) { |
492 | int c = get_class_id(name); | |
493 | if (c < 0) { | |
224ce89b | 494 | int i = _alloc_class_id(); |
7c673cae FG |
495 | class_name[i] = name; |
496 | class_rname[name] = i; | |
497 | return i; | |
498 | } else { | |
499 | return c; | |
500 | } | |
501 | } | |
502 | ||
503 | const char *get_item_class(int t) const { | |
504 | std::map<int,int>::const_iterator p = class_map.find(t); | |
505 | if (p == class_map.end()) | |
506 | return 0; | |
507 | return get_class_name(p->second); | |
508 | } | |
509 | int set_item_class(int i, const string& name) { | |
510 | if (!is_valid_crush_name(name)) | |
511 | return -EINVAL; | |
512 | class_map[i] = get_or_create_class_id(name); | |
513 | return 0; | |
514 | } | |
515 | int set_item_class(int i, int c) { | |
516 | class_map[i] = c; | |
517 | return c; | |
518 | } | |
224ce89b WB |
519 | void get_devices_by_class(const string &name, set<int> *devices) const { |
520 | assert(devices); | |
521 | devices->clear(); | |
522 | if (!class_exists(name)) { | |
523 | return; | |
524 | } | |
525 | auto cid = get_class_id(name); | |
526 | for (auto& p : class_map) { | |
527 | if (p.first >= 0 && p.second == cid) { | |
528 | devices->insert(p.first); | |
529 | } | |
530 | } | |
531 | } | |
532 | void class_remove_item(int i) { | |
533 | auto it = class_map.find(i); | |
534 | if (it == class_map.end()) { | |
535 | return; | |
536 | } | |
537 | class_map.erase(it); | |
538 | } | |
7c673cae FG |
539 | int can_rename_item(const string& srcname, |
540 | const string& dstname, | |
541 | ostream *ss) const; | |
542 | int rename_item(const string& srcname, | |
543 | const string& dstname, | |
544 | ostream *ss); | |
545 | int can_rename_bucket(const string& srcname, | |
546 | const string& dstname, | |
547 | ostream *ss) const; | |
548 | int rename_bucket(const string& srcname, | |
549 | const string& dstname, | |
550 | ostream *ss); | |
551 | ||
552 | // rule names | |
b5b8bbf5 FG |
553 | int rename_rule(const string& srcname, |
554 | const string& dstname, | |
555 | ostream *ss); | |
7c673cae FG |
556 | bool rule_exists(string name) const { |
557 | build_rmaps(); | |
558 | return rule_name_rmap.count(name); | |
559 | } | |
560 | int get_rule_id(string name) const { | |
561 | build_rmaps(); | |
562 | if (rule_name_rmap.count(name)) | |
563 | return rule_name_rmap[name]; | |
564 | return -ENOENT; | |
565 | } | |
566 | const char *get_rule_name(int t) const { | |
567 | std::map<int,string>::const_iterator p = rule_name_map.find(t); | |
568 | if (p != rule_name_map.end()) | |
569 | return p->second.c_str(); | |
570 | return 0; | |
571 | } | |
572 | void set_rule_name(int i, const string& name) { | |
573 | rule_name_map[i] = name; | |
574 | if (have_rmaps) | |
575 | rule_name_rmap[name] = i; | |
576 | } | |
c07f9fc5 FG |
577 | bool is_shadow_item(int id) const { |
578 | const char *name = get_item_name(id); | |
579 | return name && !is_valid_crush_name(name); | |
580 | } | |
7c673cae FG |
581 | |
582 | ||
583 | /** | |
584 | * find tree nodes referenced by rules by a 'take' command | |
585 | * | |
586 | * Note that these may not be parentless roots. | |
587 | */ | |
3efd9988 | 588 | void find_takes(set<int> *roots) const; |
28e407b8 | 589 | void find_takes_by_rule(int rule, set<int> *roots) const; |
7c673cae FG |
590 | |
591 | /** | |
592 | * find tree roots | |
593 | * | |
594 | * These are parentless nodes in the map. | |
595 | */ | |
3efd9988 | 596 | void find_roots(set<int> *roots) const; |
7c673cae | 597 | |
c07f9fc5 FG |
598 | |
599 | /** | |
600 | * find tree roots that contain shadow (device class) items only | |
601 | */ | |
3efd9988 | 602 | void find_shadow_roots(set<int> *roots) const { |
c07f9fc5 | 603 | set<int> all; |
3efd9988 | 604 | find_roots(&all); |
c07f9fc5 FG |
605 | for (auto& p: all) { |
606 | if (is_shadow_item(p)) { | |
3efd9988 | 607 | roots->insert(p); |
c07f9fc5 FG |
608 | } |
609 | } | |
610 | } | |
611 | ||
224ce89b WB |
612 | /** |
613 | * find tree roots that are not shadow (device class) items | |
614 | * | |
615 | * These are parentless nodes in the map that are not shadow | |
616 | * items for device classes. | |
617 | */ | |
3efd9988 | 618 | void find_nonshadow_roots(set<int> *roots) const { |
c07f9fc5 | 619 | set<int> all; |
3efd9988 | 620 | find_roots(&all); |
c07f9fc5 FG |
621 | for (auto& p: all) { |
622 | if (!is_shadow_item(p)) { | |
3efd9988 | 623 | roots->insert(p); |
c07f9fc5 FG |
624 | } |
625 | } | |
626 | } | |
224ce89b | 627 | |
7c673cae FG |
628 | /** |
629 | * see if an item is contained within a subtree | |
630 | * | |
631 | * @param root haystack | |
632 | * @param item needle | |
633 | * @return true if the item is located beneath the given node | |
634 | */ | |
635 | bool subtree_contains(int root, int item) const; | |
636 | ||
637 | private: | |
638 | /** | |
639 | * search for an item in any bucket | |
640 | * | |
641 | * @param i item | |
642 | * @return true if present | |
643 | */ | |
644 | bool _search_item_exists(int i) const; | |
645 | public: | |
646 | ||
647 | /** | |
648 | * see if item is located where we think it is | |
649 | * | |
650 | * This verifies that the given item is located at a particular | |
651 | * location in the hierarchy. However, that check is imprecise; we | |
652 | * are actually verifying that the most specific location key/value | |
653 | * is correct. For example, if loc specifies that rack=foo and | |
654 | * host=bar, it will verify that host=bar is correct; any placement | |
655 | * above that level in the hierarchy is ignored. This matches the | |
656 | * semantics for insert_item(). | |
657 | * | |
658 | * @param cct cct | |
659 | * @param item item id | |
660 | * @param loc location to check (map of type to bucket names) | |
661 | * @param weight optional pointer to weight of item at that location | |
662 | * @return true if item is at specified location | |
663 | */ | |
664 | bool check_item_loc(CephContext *cct, int item, const map<string,string>& loc, int *iweight); | |
665 | bool check_item_loc(CephContext *cct, int item, const map<string,string>& loc, float *weight) { | |
666 | int iweight; | |
667 | bool ret = check_item_loc(cct, item, loc, &iweight); | |
668 | if (weight) | |
669 | *weight = (float)iweight / (float)0x10000; | |
670 | return ret; | |
671 | } | |
672 | ||
673 | ||
674 | /** | |
675 | * returns the (type, name) of the parent bucket of id | |
676 | * | |
677 | * FIXME: ambiguous for items that occur multiple times in the map | |
678 | */ | |
679 | pair<string,string> get_immediate_parent(int id, int *ret = NULL); | |
c07f9fc5 | 680 | |
7c673cae FG |
681 | int get_immediate_parent_id(int id, int *parent) const; |
682 | ||
31f18b77 FG |
683 | /** |
684 | * return ancestor of the given type, or 0 if none | |
28e407b8 | 685 | * can pass in a specific crush **rule** to return ancestor from that rule only |
31f18b77 FG |
686 | * (parent is always a bucket and thus <0) |
687 | */ | |
28e407b8 | 688 | int get_parent_of_type(int id, int type, int rule = -1) const; |
31f18b77 | 689 | |
7c673cae FG |
690 | /** |
691 | * get the fully qualified location of a device by successively finding | |
692 | * parents beginning at ID and ending at highest type number specified in | |
693 | * the CRUSH map which assumes that if device foo is under device bar, the | |
694 | * type_id of foo < bar where type_id is the integer specified in the CRUSH map | |
695 | * | |
696 | * returns the location in the form of (type=foo) where type is a type of bucket | |
697 | * specified in the CRUSH map and foo is a name specified in the CRUSH map | |
698 | */ | |
699 | map<string, string> get_full_location(int id); | |
700 | ||
701 | /* | |
702 | * identical to get_full_location(int id) although it returns the type/name | |
703 | * pairs in the order they occur in the hierarchy. | |
704 | * | |
705 | * returns -ENOENT if id is not found. | |
706 | */ | |
707 | int get_full_location_ordered(int id, vector<pair<string, string> >& path); | |
708 | ||
31f18b77 FG |
709 | /* |
710 | * identical to get_full_location_ordered(int id, vector<pair<string, string> >& path), | |
711 | * although it returns a concatenated string with the type/name pairs in descending | |
712 | * hierarchical order with format key1=val1,key2=val2. | |
713 | * | |
714 | * returns the location in descending hierarchy as a string. | |
715 | */ | |
716 | string get_full_location_ordered_string(int id); | |
717 | ||
7c673cae FG |
718 | /** |
719 | * returns (type_id, type) of all parent buckets between id and | |
720 | * default, can be used to check for anomolous CRUSH maps | |
721 | */ | |
722 | map<int, string> get_parent_hierarchy(int id); | |
723 | ||
724 | /** | |
725 | * enumerate immediate children of given node | |
726 | * | |
727 | * @param id parent bucket or device id | |
728 | * @return number of items, or error | |
729 | */ | |
730 | int get_children(int id, list<int> *children); | |
28e407b8 AA |
731 | void get_children_of_type(int id, |
732 | int type, | |
733 | set<int> *children, | |
734 | bool exclude_shadow = true) const; | |
7c673cae | 735 | |
94b18763 FG |
736 | /** |
737 | * get failure-domain type of a specific crush rule | |
738 | * @param rule_id crush rule id | |
739 | * @return type of failure-domain or a negative errno on error. | |
740 | */ | |
741 | int get_rule_failure_domain(int rule_id); | |
742 | ||
31f18b77 FG |
743 | /** |
744 | * enumerate leaves(devices) of given node | |
745 | * | |
746 | * @param name parent bucket name | |
747 | * @return 0 on success or a negative errno on error. | |
748 | */ | |
749 | int get_leaves(const string &name, set<int> *leaves); | |
750 | int _get_leaves(int id, list<int> *leaves); // worker | |
751 | ||
7c673cae FG |
752 | /** |
753 | * insert an item into the map at a specific position | |
754 | * | |
755 | * Add an item as a specific location of the hierarchy. | |
756 | * Specifically, we look for the most specific location constraint | |
757 | * for which a bucket already exists, and then create intervening | |
758 | * buckets beneath that in order to place the item. | |
759 | * | |
760 | * Note that any location specifiers *above* the most specific match | |
761 | * are ignored. For example, if we specify that osd.12 goes in | |
762 | * host=foo, rack=bar, and row=baz, and rack=bar is the most | |
763 | * specific match, we will create host=foo beneath that point and | |
764 | * put osd.12 inside it. However, we will not verify that rack=bar | |
765 | * is beneath row=baz or move it. | |
766 | * | |
767 | * In short, we will build out a hierarchy, and move leaves around, | |
768 | * but not adjust the hierarchy's internal structure. Yet. | |
769 | * | |
770 | * If the item is already present in the map, we will return EEXIST. | |
771 | * If the location key/value pairs are nonsensical | |
772 | * (rack=nameofdevice), or location specifies that do not attach us | |
773 | * to any existing part of the hierarchy, we will return EINVAL. | |
774 | * | |
775 | * @param cct cct | |
776 | * @param id item id | |
777 | * @param weight item weight | |
778 | * @param name item name | |
779 | * @param loc location (map of type to bucket names) | |
780 | * @return 0 for success, negative on error | |
781 | */ | |
782 | int insert_item(CephContext *cct, int id, float weight, string name, const map<string,string>& loc); | |
783 | ||
784 | /** | |
785 | * move a bucket in the hierarchy to the given location | |
786 | * | |
787 | * This has the same location and ancestor creation behavior as | |
788 | * insert_item(), but will relocate the specified existing bucket. | |
789 | * | |
790 | * @param cct cct | |
791 | * @param id bucket id | |
792 | * @param loc location (map of type to bucket names) | |
793 | * @return 0 for success, negative on error | |
794 | */ | |
795 | int move_bucket(CephContext *cct, int id, const map<string,string>& loc); | |
796 | ||
31f18b77 FG |
797 | /** |
798 | * swap bucket contents of two buckets without touching bucket ids | |
799 | * | |
800 | * @param cct cct | |
801 | * @param src bucket a | |
802 | * @param dst bucket b | |
803 | * @return 0 for success, negative on error | |
804 | */ | |
805 | int swap_bucket(CephContext *cct, int src, int dst); | |
806 | ||
7c673cae FG |
807 | /** |
808 | * add a link to an existing bucket in the hierarchy to the new location | |
809 | * | |
810 | * This has the same location and ancestor creation behavior as | |
811 | * insert_item(), but will add a new link to the specified existing | |
812 | * bucket. | |
813 | * | |
814 | * @param cct cct | |
815 | * @param id bucket id | |
816 | * @param loc location (map of type to bucket names) | |
817 | * @return 0 for success, negative on error | |
818 | */ | |
819 | int link_bucket(CephContext *cct, int id, const map<string,string>& loc); | |
820 | ||
821 | /** | |
822 | * add or update an item's position in the map | |
823 | * | |
824 | * This is analogous to insert_item, except we will move an item if | |
825 | * it is already present. | |
826 | * | |
827 | * @param cct cct | |
828 | * @param id item id | |
829 | * @param weight item weight | |
830 | * @param name item name | |
831 | * @param loc location (map of type to bucket names) | |
832 | * @return 0 for no change, 1 for successful change, negative on error | |
833 | */ | |
834 | int update_item(CephContext *cct, int id, float weight, string name, const map<string,string>& loc); | |
835 | ||
836 | /** | |
837 | * create or move an item, but do not adjust its weight if it already exists | |
838 | * | |
839 | * @param cct cct | |
840 | * @param item item id | |
841 | * @param weight initial item weight (if we need to create it) | |
842 | * @param name item name | |
843 | * @param loc location (map of type to bucket names) | |
844 | * @return 0 for no change, 1 for successful change, negative on error | |
845 | */ | |
846 | int create_or_move_item(CephContext *cct, int item, float weight, string name, | |
847 | const map<string,string>& loc); | |
848 | ||
849 | /** | |
850 | * remove all instances of an item from the map | |
851 | * | |
852 | * @param cct cct | |
853 | * @param id item id to remove | |
854 | * @param unlink_only unlink but do not remove bucket (useful if multiple links or not empty) | |
855 | * @return 0 on success, negative on error | |
856 | */ | |
857 | int remove_item(CephContext *cct, int id, bool unlink_only); | |
858 | ||
859 | /** | |
860 | * recursively remove buckets starting at item and stop removing | |
861 | * when a bucket is in use. | |
862 | * | |
863 | * @param item id to remove | |
7c673cae FG |
864 | * @return 0 on success, negative on error |
865 | */ | |
35e4c445 | 866 | int remove_root(int item); |
7c673cae FG |
867 | |
868 | /** | |
869 | * remove all instances of an item nested beneath a certain point from the map | |
870 | * | |
871 | * @param cct cct | |
872 | * @param id item id to remove | |
873 | * @param ancestor ancestor item id under which to search for id | |
874 | * @param unlink_only unlink but do not remove bucket (useful if bucket has multiple links or is not empty) | |
875 | * @return 0 on success, negative on error | |
876 | */ | |
877 | private: | |
878 | bool _maybe_remove_last_instance(CephContext *cct, int id, bool unlink_only); | |
879 | int _remove_item_under(CephContext *cct, int id, int ancestor, bool unlink_only); | |
880 | bool _bucket_is_in_use(int id); | |
881 | public: | |
882 | int remove_item_under(CephContext *cct, int id, int ancestor, bool unlink_only); | |
883 | ||
884 | /** | |
885 | * calculate the locality/distance from a given id to a crush location map | |
886 | * | |
887 | * Specifically, we look for the lowest-valued type for which the | |
888 | * location of id matches that described in loc. | |
889 | * | |
890 | * @param cct cct | |
891 | * @param id the existing id in the map | |
892 | * @param loc a set of key=value pairs describing a location in the hierarchy | |
893 | */ | |
894 | int get_common_ancestor_distance(CephContext *cct, int id, | |
895 | const std::multimap<string,string>& loc); | |
896 | ||
897 | /** | |
898 | * parse a set of key/value pairs out of a string vector | |
899 | * | |
900 | * These are used to describe a location in the CRUSH hierarchy. | |
901 | * | |
902 | * @param args list of strings (each key= or key=value) | |
903 | * @param ploc pointer to a resulting location map or multimap | |
904 | */ | |
905 | static int parse_loc_map(const std::vector<string>& args, | |
906 | std::map<string,string> *ploc); | |
907 | static int parse_loc_multimap(const std::vector<string>& args, | |
908 | std::multimap<string,string> *ploc); | |
909 | ||
910 | /** | |
911 | * get an item's weight | |
912 | * | |
913 | * Will return the weight for the first instance it finds. | |
914 | * | |
915 | * @param id item id to check | |
916 | * @return weight of item | |
917 | */ | |
918 | int get_item_weight(int id) const; | |
919 | float get_item_weightf(int id) const { | |
920 | return (float)get_item_weight(id) / (float)0x10000; | |
921 | } | |
922 | int get_item_weight_in_loc(int id, const map<string,string> &loc); | |
923 | float get_item_weightf_in_loc(int id, const map<string,string> &loc) { | |
924 | return (float)get_item_weight_in_loc(id, loc) / (float)0x10000; | |
925 | } | |
926 | ||
224ce89b WB |
927 | int validate_weightf(float weight) { |
928 | uint64_t iweight = weight * 0x10000; | |
929 | if (iweight > std::numeric_limits<int>::max()) { | |
930 | return -EOVERFLOW; | |
931 | } | |
932 | return 0; | |
933 | } | |
7c673cae FG |
934 | int adjust_item_weight(CephContext *cct, int id, int weight); |
935 | int adjust_item_weightf(CephContext *cct, int id, float weight) { | |
224ce89b WB |
936 | int r = validate_weightf(weight); |
937 | if (r < 0) { | |
938 | return r; | |
939 | } | |
7c673cae FG |
940 | return adjust_item_weight(cct, id, (int)(weight * (float)0x10000)); |
941 | } | |
942 | int adjust_item_weight_in_loc(CephContext *cct, int id, int weight, const map<string,string>& loc); | |
943 | int adjust_item_weightf_in_loc(CephContext *cct, int id, float weight, const map<string,string>& loc) { | |
224ce89b WB |
944 | int r = validate_weightf(weight); |
945 | if (r < 0) { | |
946 | return r; | |
947 | } | |
7c673cae FG |
948 | return adjust_item_weight_in_loc(cct, id, (int)(weight * (float)0x10000), loc); |
949 | } | |
950 | void reweight(CephContext *cct); | |
f64942e4 AA |
951 | void reweight_bucket(crush_bucket *b, |
952 | crush_choose_arg_map& arg_map, | |
953 | vector<uint32_t> *weightv); | |
7c673cae FG |
954 | |
955 | int adjust_subtree_weight(CephContext *cct, int id, int weight); | |
956 | int adjust_subtree_weightf(CephContext *cct, int id, float weight) { | |
224ce89b WB |
957 | int r = validate_weightf(weight); |
958 | if (r < 0) { | |
959 | return r; | |
960 | } | |
7c673cae FG |
961 | return adjust_subtree_weight(cct, id, (int)(weight * (float)0x10000)); |
962 | } | |
963 | ||
964 | /// check if item id is present in the map hierarchy | |
965 | bool check_item_present(int id) const; | |
966 | ||
967 | ||
968 | /*** devices ***/ | |
969 | int get_max_devices() const { | |
970 | if (!crush) return 0; | |
971 | return crush->max_devices; | |
972 | } | |
973 | ||
974 | ||
975 | /*** rules ***/ | |
976 | private: | |
977 | crush_rule *get_rule(unsigned ruleno) const { | |
978 | if (!crush) return (crush_rule *)(-ENOENT); | |
979 | if (ruleno >= crush->max_rules) | |
980 | return 0; | |
981 | return crush->rules[ruleno]; | |
982 | } | |
983 | crush_rule_step *get_rule_step(unsigned ruleno, unsigned step) const { | |
984 | crush_rule *n = get_rule(ruleno); | |
985 | if (IS_ERR(n)) return (crush_rule_step *)(-EINVAL); | |
986 | if (step >= n->len) return (crush_rule_step *)(-EINVAL); | |
987 | return &n->steps[step]; | |
988 | } | |
989 | ||
990 | public: | |
991 | /* accessors */ | |
992 | int get_max_rules() const { | |
993 | if (!crush) return 0; | |
994 | return crush->max_rules; | |
995 | } | |
996 | bool rule_exists(unsigned ruleno) const { | |
997 | if (!crush) return false; | |
998 | if (ruleno < crush->max_rules && | |
999 | crush->rules[ruleno] != NULL) | |
1000 | return true; | |
1001 | return false; | |
1002 | } | |
3efd9988 FG |
1003 | bool rule_has_take(unsigned ruleno, int take) const { |
1004 | if (!crush) return false; | |
1005 | crush_rule *rule = get_rule(ruleno); | |
1006 | for (unsigned i = 0; i < rule->len; ++i) { | |
1007 | if (rule->steps[i].op == CRUSH_RULE_TAKE && | |
1008 | rule->steps[i].arg1 == take) { | |
1009 | return true; | |
1010 | } | |
1011 | } | |
1012 | return false; | |
1013 | } | |
7c673cae FG |
1014 | int get_rule_len(unsigned ruleno) const { |
1015 | crush_rule *r = get_rule(ruleno); | |
1016 | if (IS_ERR(r)) return PTR_ERR(r); | |
1017 | return r->len; | |
1018 | } | |
1019 | int get_rule_mask_ruleset(unsigned ruleno) const { | |
1020 | crush_rule *r = get_rule(ruleno); | |
1021 | if (IS_ERR(r)) return -1; | |
1022 | return r->mask.ruleset; | |
1023 | } | |
1024 | int get_rule_mask_type(unsigned ruleno) const { | |
1025 | crush_rule *r = get_rule(ruleno); | |
1026 | if (IS_ERR(r)) return -1; | |
1027 | return r->mask.type; | |
1028 | } | |
1029 | int get_rule_mask_min_size(unsigned ruleno) const { | |
1030 | crush_rule *r = get_rule(ruleno); | |
1031 | if (IS_ERR(r)) return -1; | |
1032 | return r->mask.min_size; | |
1033 | } | |
1034 | int get_rule_mask_max_size(unsigned ruleno) const { | |
1035 | crush_rule *r = get_rule(ruleno); | |
1036 | if (IS_ERR(r)) return -1; | |
1037 | return r->mask.max_size; | |
1038 | } | |
1039 | int get_rule_op(unsigned ruleno, unsigned step) const { | |
1040 | crush_rule_step *s = get_rule_step(ruleno, step); | |
1041 | if (IS_ERR(s)) return PTR_ERR(s); | |
1042 | return s->op; | |
1043 | } | |
1044 | int get_rule_arg1(unsigned ruleno, unsigned step) const { | |
1045 | crush_rule_step *s = get_rule_step(ruleno, step); | |
1046 | if (IS_ERR(s)) return PTR_ERR(s); | |
1047 | return s->arg1; | |
1048 | } | |
1049 | int get_rule_arg2(unsigned ruleno, unsigned step) const { | |
1050 | crush_rule_step *s = get_rule_step(ruleno, step); | |
1051 | if (IS_ERR(s)) return PTR_ERR(s); | |
1052 | return s->arg2; | |
1053 | } | |
1054 | ||
3efd9988 FG |
1055 | private: |
1056 | float _get_take_weight_osd_map(int root, map<int,float> *pmap) const; | |
1057 | void _normalize_weight_map(float sum, const map<int,float>& m, | |
1058 | map<int,float> *pmap) const; | |
1059 | ||
1060 | public: | |
7c673cae FG |
1061 | /** |
1062 | * calculate a map of osds to weights for a given rule | |
1063 | * | |
1064 | * Generate a map of which OSDs get how much relative weight for a | |
1065 | * given rule. | |
1066 | * | |
1067 | * @param ruleno [in] rule id | |
1068 | * @param pmap [out] map of osd to weight | |
1069 | * @return 0 for success, or negative error code | |
1070 | */ | |
3efd9988 FG |
1071 | int get_rule_weight_osd_map(unsigned ruleno, map<int,float> *pmap) const; |
1072 | ||
1073 | /** | |
1074 | * calculate a map of osds to weights for a given starting root | |
1075 | * | |
1076 | * Generate a map of which OSDs get how much relative weight for a | |
1077 | * given starting root | |
1078 | * | |
1079 | * @param root node | |
1080 | * @param pmap [out] map of osd to weight | |
1081 | * @return 0 for success, or negative error code | |
1082 | */ | |
1083 | int get_take_weight_osd_map(int root, map<int,float> *pmap) const; | |
7c673cae FG |
1084 | |
1085 | /* modifiers */ | |
c07f9fc5 FG |
1086 | |
1087 | int add_rule(int ruleno, int len, int type, int minsize, int maxsize) { | |
7c673cae | 1088 | if (!crush) return -ENOENT; |
c07f9fc5 | 1089 | crush_rule *n = crush_make_rule(len, ruleno, type, minsize, maxsize); |
7c673cae FG |
1090 | assert(n); |
1091 | ruleno = crush_add_rule(crush, n, ruleno); | |
1092 | return ruleno; | |
1093 | } | |
1094 | int set_rule_mask_max_size(unsigned ruleno, int max_size) { | |
1095 | crush_rule *r = get_rule(ruleno); | |
1096 | if (IS_ERR(r)) return -1; | |
1097 | return r->mask.max_size = max_size; | |
1098 | } | |
1099 | int set_rule_step(unsigned ruleno, unsigned step, int op, int arg1, int arg2) { | |
1100 | if (!crush) return -ENOENT; | |
1101 | crush_rule *n = get_rule(ruleno); | |
1102 | if (!n) return -1; | |
1103 | crush_rule_set_step(n, step, op, arg1, arg2); | |
1104 | return 0; | |
1105 | } | |
1106 | int set_rule_step_take(unsigned ruleno, unsigned step, int val) { | |
1107 | return set_rule_step(ruleno, step, CRUSH_RULE_TAKE, val, 0); | |
1108 | } | |
1109 | int set_rule_step_set_choose_tries(unsigned ruleno, unsigned step, int val) { | |
1110 | return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSE_TRIES, val, 0); | |
1111 | } | |
1112 | int set_rule_step_set_choose_local_tries(unsigned ruleno, unsigned step, int val) { | |
1113 | return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES, val, 0); | |
1114 | } | |
1115 | int set_rule_step_set_choose_local_fallback_tries(unsigned ruleno, unsigned step, int val) { | |
1116 | return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES, val, 0); | |
1117 | } | |
1118 | int set_rule_step_set_chooseleaf_tries(unsigned ruleno, unsigned step, int val) { | |
1119 | return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSELEAF_TRIES, val, 0); | |
1120 | } | |
1121 | int set_rule_step_set_chooseleaf_vary_r(unsigned ruleno, unsigned step, int val) { | |
1122 | return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSELEAF_VARY_R, val, 0); | |
1123 | } | |
1124 | int set_rule_step_set_chooseleaf_stable(unsigned ruleno, unsigned step, int val) { | |
1125 | return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSELEAF_STABLE, val, 0); | |
1126 | } | |
1127 | int set_rule_step_choose_firstn(unsigned ruleno, unsigned step, int val, int type) { | |
1128 | return set_rule_step(ruleno, step, CRUSH_RULE_CHOOSE_FIRSTN, val, type); | |
1129 | } | |
1130 | int set_rule_step_choose_indep(unsigned ruleno, unsigned step, int val, int type) { | |
1131 | return set_rule_step(ruleno, step, CRUSH_RULE_CHOOSE_INDEP, val, type); | |
1132 | } | |
1133 | int set_rule_step_choose_leaf_firstn(unsigned ruleno, unsigned step, int val, int type) { | |
1134 | return set_rule_step(ruleno, step, CRUSH_RULE_CHOOSELEAF_FIRSTN, val, type); | |
1135 | } | |
1136 | int set_rule_step_choose_leaf_indep(unsigned ruleno, unsigned step, int val, int type) { | |
1137 | return set_rule_step(ruleno, step, CRUSH_RULE_CHOOSELEAF_INDEP, val, type); | |
1138 | } | |
1139 | int set_rule_step_emit(unsigned ruleno, unsigned step) { | |
1140 | return set_rule_step(ruleno, step, CRUSH_RULE_EMIT, 0, 0); | |
1141 | } | |
1142 | ||
31f18b77 FG |
1143 | int add_simple_rule( |
1144 | string name, string root_name, string failure_domain_type, | |
224ce89b | 1145 | string device_class, |
31f18b77 FG |
1146 | string mode, int rule_type, ostream *err = 0); |
1147 | ||
7c673cae | 1148 | /** |
31f18b77 | 1149 | * @param rno rule[set] id to use, -1 to pick the lowest available |
7c673cae | 1150 | */ |
31f18b77 FG |
1151 | int add_simple_rule_at( |
1152 | string name, string root_name, | |
224ce89b | 1153 | string failure_domain_type, string device_class, string mode, |
31f18b77 | 1154 | int rule_type, int rno, ostream *err = 0); |
7c673cae FG |
1155 | |
1156 | int remove_rule(int ruleno); | |
1157 | ||
1158 | ||
1159 | /** buckets **/ | |
7c673cae FG |
1160 | const crush_bucket *get_bucket(int id) const { |
1161 | if (!crush) | |
1162 | return (crush_bucket *)(-EINVAL); | |
1163 | unsigned int pos = (unsigned int)(-1 - id); | |
1164 | unsigned int max_buckets = crush->max_buckets; | |
1165 | if (pos >= max_buckets) | |
1166 | return (crush_bucket *)(-ENOENT); | |
1167 | crush_bucket *ret = crush->buckets[pos]; | |
1168 | if (ret == NULL) | |
1169 | return (crush_bucket *)(-ENOENT); | |
1170 | return ret; | |
1171 | } | |
c07f9fc5 | 1172 | private: |
7c673cae FG |
1173 | crush_bucket *get_bucket(int id) { |
1174 | if (!crush) | |
1175 | return (crush_bucket *)(-EINVAL); | |
1176 | unsigned int pos = (unsigned int)(-1 - id); | |
1177 | unsigned int max_buckets = crush->max_buckets; | |
1178 | if (pos >= max_buckets) | |
1179 | return (crush_bucket *)(-ENOENT); | |
1180 | crush_bucket *ret = crush->buckets[pos]; | |
1181 | if (ret == NULL) | |
1182 | return (crush_bucket *)(-ENOENT); | |
1183 | return ret; | |
1184 | } | |
1185 | /** | |
1186 | * detach a bucket from its parent and adjust the parent weight | |
1187 | * | |
1188 | * returns the weight of the detached bucket | |
1189 | **/ | |
c07f9fc5 | 1190 | int detach_bucket(CephContext *cct, int item); |
7c673cae | 1191 | |
f64942e4 AA |
1192 | int get_new_bucket_id(); |
1193 | ||
7c673cae FG |
1194 | public: |
1195 | int get_max_buckets() const { | |
1196 | if (!crush) return -EINVAL; | |
1197 | return crush->max_buckets; | |
1198 | } | |
1199 | int get_next_bucket_id() const { | |
1200 | if (!crush) return -EINVAL; | |
1201 | return crush_get_next_bucket_id(crush); | |
1202 | } | |
1203 | bool bucket_exists(int id) const { | |
1204 | const crush_bucket *b = get_bucket(id); | |
1205 | if (IS_ERR(b)) | |
1206 | return false; | |
1207 | return true; | |
1208 | } | |
1209 | int get_bucket_weight(int id) const { | |
1210 | const crush_bucket *b = get_bucket(id); | |
1211 | if (IS_ERR(b)) return PTR_ERR(b); | |
1212 | return b->weight; | |
1213 | } | |
1214 | float get_bucket_weightf(int id) const { | |
1215 | const crush_bucket *b = get_bucket(id); | |
1216 | if (IS_ERR(b)) return 0; | |
1217 | return b->weight / (float)0x10000; | |
1218 | } | |
1219 | int get_bucket_type(int id) const { | |
1220 | const crush_bucket *b = get_bucket(id); | |
1221 | if (IS_ERR(b)) return PTR_ERR(b); | |
1222 | return b->type; | |
1223 | } | |
1224 | int get_bucket_alg(int id) const { | |
1225 | const crush_bucket *b = get_bucket(id); | |
1226 | if (IS_ERR(b)) return PTR_ERR(b); | |
1227 | return b->alg; | |
1228 | } | |
1229 | int get_bucket_hash(int id) const { | |
1230 | const crush_bucket *b = get_bucket(id); | |
1231 | if (IS_ERR(b)) return PTR_ERR(b); | |
1232 | return b->hash; | |
1233 | } | |
1234 | int get_bucket_size(int id) const { | |
1235 | const crush_bucket *b = get_bucket(id); | |
1236 | if (IS_ERR(b)) return PTR_ERR(b); | |
1237 | return b->size; | |
1238 | } | |
1239 | int get_bucket_item(int id, int pos) const { | |
1240 | const crush_bucket *b = get_bucket(id); | |
1241 | if (IS_ERR(b)) return PTR_ERR(b); | |
1242 | if ((__u32)pos >= b->size) | |
1243 | return PTR_ERR(b); | |
1244 | return b->items[pos]; | |
1245 | } | |
1246 | int get_bucket_item_weight(int id, int pos) const { | |
1247 | const crush_bucket *b = get_bucket(id); | |
1248 | if (IS_ERR(b)) return PTR_ERR(b); | |
1249 | return crush_get_bucket_item_weight(b, pos); | |
1250 | } | |
1251 | float get_bucket_item_weightf(int id, int pos) const { | |
1252 | const crush_bucket *b = get_bucket(id); | |
1253 | if (IS_ERR(b)) return 0; | |
1254 | return (float)crush_get_bucket_item_weight(b, pos) / (float)0x10000; | |
1255 | } | |
1256 | ||
1257 | /* modifiers */ | |
1258 | int add_bucket(int bucketno, int alg, int hash, int type, int size, | |
c07f9fc5 | 1259 | int *items, int *weights, int *idout); |
31f18b77 FG |
1260 | int bucket_add_item(crush_bucket *bucket, int item, int weight); |
1261 | int bucket_remove_item(struct crush_bucket *bucket, int item); | |
1262 | int bucket_adjust_item_weight(CephContext *cct, struct crush_bucket *bucket, int item, int weight); | |
1263 | ||
7c673cae FG |
1264 | void finalize() { |
1265 | assert(crush); | |
1266 | crush_finalize(crush); | |
3a9019d9 FG |
1267 | if (!name_map.empty() && |
1268 | name_map.rbegin()->first >= crush->max_devices) { | |
1269 | crush->max_devices = name_map.rbegin()->first + 1; | |
1270 | } | |
3efd9988 | 1271 | have_uniform_rules = !has_legacy_rule_ids(); |
7c673cae | 1272 | } |
3efd9988 | 1273 | int bucket_set_alg(int id, int alg); |
7c673cae | 1274 | |
224ce89b | 1275 | int update_device_class(int id, const string& class_name, const string& name, ostream *ss); |
c07f9fc5 | 1276 | int remove_device_class(CephContext *cct, int id, ostream *ss); |
d2e6a577 FG |
1277 | int device_class_clone( |
1278 | int original, int device_class, | |
1279 | const std::map<int32_t, map<int32_t, int32_t>>& old_class_bucket, | |
1280 | const std::set<int32_t>& used_ids, | |
35e4c445 FG |
1281 | int *clone, |
1282 | map<int,map<int,vector<int>>> *cmap_item_weight); | |
1283 | int rename_class(const string& srcname, const string& dstname); | |
d2e6a577 FG |
1284 | int populate_classes( |
1285 | const std::map<int32_t, map<int32_t, int32_t>>& old_class_bucket); | |
b5b8bbf5 | 1286 | int get_rules_by_class(const string &class_name, set<int> *rules); |
3efd9988 | 1287 | int get_rules_by_osd(int osd, set<int> *rules); |
d2e6a577 FG |
1288 | bool _class_is_dead(int class_id); |
1289 | void cleanup_dead_classes(); | |
7c673cae FG |
1290 | int rebuild_roots_with_classes(); |
1291 | /* remove unused roots generated for class devices */ | |
35e4c445 | 1292 | int trim_roots_with_class(); |
7c673cae | 1293 | |
f64942e4 AA |
1294 | int reclassify( |
1295 | CephContext *cct, | |
1296 | ostream& out, | |
1297 | const map<string,string>& classify_root, | |
1298 | const map<string,pair<string,string>>& classify_bucket | |
1299 | ); | |
1300 | ||
1301 | int set_subtree_class(const string& name, const string& class_name); | |
1302 | ||
7c673cae FG |
1303 | void start_choose_profile() { |
1304 | free(crush->choose_tries); | |
1305 | /* | |
1306 | * the original choose_total_tries value was off by one (it | |
1307 | * counted "retries" and not "tries"). add one to alloc. | |
1308 | */ | |
c07f9fc5 FG |
1309 | crush->choose_tries = (__u32 *)calloc(sizeof(*crush->choose_tries), |
1310 | (crush->choose_total_tries + 1)); | |
7c673cae FG |
1311 | memset(crush->choose_tries, 0, |
1312 | sizeof(*crush->choose_tries) * (crush->choose_total_tries + 1)); | |
1313 | } | |
1314 | void stop_choose_profile() { | |
1315 | free(crush->choose_tries); | |
1316 | crush->choose_tries = 0; | |
1317 | } | |
1318 | ||
1319 | int get_choose_profile(__u32 **vec) { | |
1320 | if (crush->choose_tries) { | |
1321 | *vec = crush->choose_tries; | |
1322 | return crush->choose_total_tries; | |
1323 | } | |
1324 | return 0; | |
1325 | } | |
1326 | ||
1327 | ||
1328 | void set_max_devices(int m) { | |
1329 | crush->max_devices = m; | |
1330 | } | |
1331 | ||
1332 | int find_rule(int ruleset, int type, int size) const { | |
1333 | if (!crush) return -1; | |
181888fb FG |
1334 | if (have_uniform_rules && |
1335 | ruleset < (int)crush->max_rules && | |
1336 | crush->rules[ruleset] && | |
1337 | crush->rules[ruleset]->mask.type == type && | |
1338 | crush->rules[ruleset]->mask.min_size <= size && | |
1339 | crush->rules[ruleset]->mask.max_size >= size) { | |
1340 | return ruleset; | |
31f18b77 | 1341 | } |
181888fb | 1342 | return crush_find_rule(crush, ruleset, type, size); |
7c673cae FG |
1343 | } |
1344 | ||
d2e6a577 | 1345 | bool ruleset_exists(const int ruleset) const { |
7c673cae FG |
1346 | for (size_t i = 0; i < crush->max_rules; ++i) { |
1347 | if (rule_exists(i) && crush->rules[i]->mask.ruleset == ruleset) { | |
1348 | return true; | |
1349 | } | |
1350 | } | |
1351 | ||
1352 | return false; | |
1353 | } | |
1354 | ||
1355 | /** | |
1356 | * Return the lowest numbered ruleset of type `type` | |
1357 | * | |
3efd9988 | 1358 | * @returns a ruleset ID, or -1 if no matching rules found. |
7c673cae FG |
1359 | */ |
1360 | int find_first_ruleset(int type) const { | |
1361 | int result = -1; | |
1362 | ||
1363 | for (size_t i = 0; i < crush->max_rules; ++i) { | |
1364 | if (crush->rules[i] | |
1365 | && crush->rules[i]->mask.type == type | |
1366 | && (crush->rules[i]->mask.ruleset < result || result == -1)) { | |
1367 | result = crush->rules[i]->mask.ruleset; | |
1368 | } | |
1369 | } | |
1370 | ||
1371 | return result; | |
1372 | } | |
1373 | ||
c07f9fc5 FG |
1374 | bool have_choose_args(int64_t choose_args_index) const { |
1375 | return choose_args.count(choose_args_index); | |
1376 | } | |
1377 | ||
1378 | crush_choose_arg_map choose_args_get_with_fallback( | |
1379 | int64_t choose_args_index) const { | |
1380 | auto i = choose_args.find(choose_args_index); | |
1381 | if (i == choose_args.end()) { | |
1382 | i = choose_args.find(DEFAULT_CHOOSE_ARGS); | |
1383 | } | |
1384 | if (i == choose_args.end()) { | |
1385 | crush_choose_arg_map arg_map; | |
1386 | arg_map.args = NULL; | |
1387 | arg_map.size = 0; | |
1388 | return arg_map; | |
1389 | } else { | |
1390 | return i->second; | |
1391 | } | |
1392 | } | |
1393 | crush_choose_arg_map choose_args_get(int64_t choose_args_index) const { | |
7c673cae FG |
1394 | auto i = choose_args.find(choose_args_index); |
1395 | if (i == choose_args.end()) { | |
1396 | crush_choose_arg_map arg_map; | |
1397 | arg_map.args = NULL; | |
1398 | arg_map.size = 0; | |
1399 | return arg_map; | |
1400 | } else { | |
1401 | return i->second; | |
1402 | } | |
1403 | } | |
1404 | ||
1405 | void destroy_choose_args(crush_choose_arg_map arg_map) { | |
1406 | for (__u32 i = 0; i < arg_map.size; i++) { | |
1407 | crush_choose_arg *arg = &arg_map.args[i]; | |
28e407b8 | 1408 | for (__u32 j = 0; j < arg->weight_set_positions; j++) { |
7c673cae FG |
1409 | crush_weight_set *weight_set = &arg->weight_set[j]; |
1410 | free(weight_set->weights); | |
1411 | } | |
1412 | if (arg->weight_set) | |
1413 | free(arg->weight_set); | |
1414 | if (arg->ids) | |
1415 | free(arg->ids); | |
1416 | } | |
1417 | free(arg_map.args); | |
1418 | } | |
c07f9fc5 FG |
1419 | |
1420 | void create_choose_args(int64_t id, int positions) { | |
1421 | if (choose_args.count(id)) | |
1422 | return; | |
1423 | assert(positions); | |
1424 | auto &cmap = choose_args[id]; | |
1425 | cmap.args = (crush_choose_arg*)calloc(sizeof(crush_choose_arg), | |
1426 | crush->max_buckets); | |
1427 | cmap.size = crush->max_buckets; | |
1428 | for (int bidx=0; bidx < crush->max_buckets; ++bidx) { | |
1429 | crush_bucket *b = crush->buckets[bidx]; | |
1430 | auto &carg = cmap.args[bidx]; | |
1431 | carg.ids = NULL; | |
1432 | carg.ids_size = 0; | |
1433 | if (b && b->alg == CRUSH_BUCKET_STRAW2) { | |
1434 | crush_bucket_straw2 *sb = (crush_bucket_straw2*)b; | |
28e407b8 | 1435 | carg.weight_set_positions = positions; |
c07f9fc5 | 1436 | carg.weight_set = (crush_weight_set*)calloc(sizeof(crush_weight_set), |
28e407b8 | 1437 | carg.weight_set_positions); |
c07f9fc5 FG |
1438 | // initialize with canonical weights |
1439 | for (int pos = 0; pos < positions; ++pos) { | |
1440 | carg.weight_set[pos].size = b->size; | |
1441 | carg.weight_set[pos].weights = (__u32*)calloc(4, b->size); | |
1442 | for (unsigned i = 0; i < b->size; ++i) { | |
1443 | carg.weight_set[pos].weights[i] = sb->item_weights[i]; | |
1444 | } | |
1445 | } | |
1446 | } else { | |
1447 | carg.weight_set = NULL; | |
28e407b8 | 1448 | carg.weight_set_positions = 0; |
c07f9fc5 FG |
1449 | } |
1450 | } | |
1451 | } | |
1452 | ||
1453 | void rm_choose_args(int64_t id) { | |
1454 | auto p = choose_args.find(id); | |
1455 | if (p != choose_args.end()) { | |
1456 | destroy_choose_args(p->second); | |
1457 | choose_args.erase(p); | |
1458 | } | |
1459 | } | |
1460 | ||
7c673cae FG |
1461 | void choose_args_clear() { |
1462 | for (auto w : choose_args) | |
1463 | destroy_choose_args(w.second); | |
1464 | choose_args.clear(); | |
1465 | } | |
1466 | ||
28e407b8 AA |
1467 | // remove choose_args for buckets that no longer exist, create them for new buckets |
1468 | void update_choose_args(CephContext *cct); | |
1469 | ||
c07f9fc5 FG |
1470 | // adjust choose_args_map weight, preserving the hierarchical summation |
1471 | // property. used by callers optimizing layouts by tweaking weights. | |
1472 | int _choose_args_adjust_item_weight_in_bucket( | |
1473 | CephContext *cct, | |
1474 | crush_choose_arg_map cmap, | |
1475 | int bucketid, | |
1476 | int id, | |
1477 | const vector<int>& weight, | |
1478 | ostream *ss); | |
1479 | int choose_args_adjust_item_weight( | |
1480 | CephContext *cct, | |
1481 | crush_choose_arg_map cmap, | |
1482 | int id, const vector<int>& weight, | |
1483 | ostream *ss); | |
1484 | int choose_args_adjust_item_weightf( | |
1485 | CephContext *cct, | |
1486 | crush_choose_arg_map cmap, | |
1487 | int id, const vector<double>& weightf, | |
1488 | ostream *ss) { | |
1489 | vector<int> weight(weightf.size()); | |
1490 | for (unsigned i = 0; i < weightf.size(); ++i) { | |
1491 | weight[i] = (int)(weightf[i] * (float)0x10000); | |
1492 | } | |
1493 | return choose_args_adjust_item_weight(cct, cmap, id, weight, ss); | |
1494 | } | |
1495 | ||
1496 | int get_choose_args_positions(crush_choose_arg_map cmap) { | |
1497 | // infer positions from other buckets | |
1498 | for (unsigned j = 0; j < cmap.size; ++j) { | |
28e407b8 AA |
1499 | if (cmap.args[j].weight_set_positions) { |
1500 | return cmap.args[j].weight_set_positions; | |
c07f9fc5 FG |
1501 | } |
1502 | } | |
1503 | return 1; | |
1504 | } | |
1505 | ||
7c673cae FG |
1506 | template<typename WeightVector> |
1507 | void do_rule(int rule, int x, vector<int>& out, int maxout, | |
1508 | const WeightVector& weight, | |
1509 | uint64_t choose_args_index) const { | |
1510 | int rawout[maxout]; | |
1511 | char work[crush_work_size(crush, maxout)]; | |
1512 | crush_init_workspace(crush, work); | |
c07f9fc5 FG |
1513 | crush_choose_arg_map arg_map = choose_args_get_with_fallback( |
1514 | choose_args_index); | |
7c673cae FG |
1515 | int numrep = crush_do_rule(crush, rule, x, rawout, maxout, &weight[0], |
1516 | weight.size(), work, arg_map.args); | |
1517 | if (numrep < 0) | |
1518 | numrep = 0; | |
1519 | out.resize(numrep); | |
1520 | for (int i=0; i<numrep; i++) | |
1521 | out[i] = rawout[i]; | |
1522 | } | |
1523 | ||
1524 | int _choose_type_stack( | |
1525 | CephContext *cct, | |
1526 | const vector<pair<int,int>>& stack, | |
1527 | const set<int>& overfull, | |
1528 | const vector<int>& underfull, | |
1529 | const vector<int>& orig, | |
1530 | vector<int>::const_iterator& i, | |
1531 | set<int>& used, | |
1532 | vector<int> *pw) const; | |
1533 | ||
1534 | int try_remap_rule( | |
1535 | CephContext *cct, | |
1536 | int rule, | |
1537 | int maxout, | |
1538 | const set<int>& overfull, | |
1539 | const vector<int>& underfull, | |
1540 | const vector<int>& orig, | |
1541 | vector<int> *out) const; | |
1542 | ||
1543 | bool check_crush_rule(int ruleset, int type, int size, ostream& ss) { | |
1544 | assert(crush); | |
1545 | ||
1546 | __u32 i; | |
1547 | for (i = 0; i < crush->max_rules; i++) { | |
1548 | if (crush->rules[i] && | |
1549 | crush->rules[i]->mask.ruleset == ruleset && | |
1550 | crush->rules[i]->mask.type == type) { | |
1551 | ||
1552 | if (crush->rules[i]->mask.min_size <= size && | |
1553 | crush->rules[i]->mask.max_size >= size) { | |
1554 | return true; | |
1555 | } else if (size < crush->rules[i]->mask.min_size) { | |
1556 | ss << "pool size is smaller than the crush rule min size"; | |
1557 | return false; | |
1558 | } else { | |
1559 | ss << "pool size is bigger than the crush rule max size"; | |
1560 | return false; | |
1561 | } | |
1562 | } | |
1563 | } | |
1564 | ||
1565 | return false; | |
1566 | } | |
1567 | ||
1568 | void encode(bufferlist &bl, uint64_t features) const; | |
1569 | void decode(bufferlist::iterator &blp); | |
1570 | void decode_crush_bucket(crush_bucket** bptr, bufferlist::iterator &blp); | |
1571 | void dump(Formatter *f) const; | |
1572 | void dump_rules(Formatter *f) const; | |
1573 | void dump_rule(int ruleset, Formatter *f) const; | |
1574 | void dump_tunables(Formatter *f) const; | |
1575 | void dump_choose_args(Formatter *f) const; | |
1576 | void list_rules(Formatter *f) const; | |
c07f9fc5 FG |
1577 | void list_rules(ostream *ss) const; |
1578 | void dump_tree(ostream *out, | |
1579 | Formatter *f, | |
1580 | const CrushTreeDumper::name_map_t& ws, | |
1581 | bool show_shadow = false) const; | |
1582 | void dump_tree(ostream *out, Formatter *f) { | |
1583 | dump_tree(out, f, CrushTreeDumper::name_map_t()); | |
1584 | } | |
1585 | void dump_tree(Formatter *f, | |
1586 | const CrushTreeDumper::name_map_t& ws) const; | |
7c673cae FG |
1587 | static void generate_test_instances(list<CrushWrapper*>& o); |
1588 | ||
7c673cae FG |
1589 | int get_osd_pool_default_crush_replicated_ruleset(CephContext *cct); |
1590 | ||
1591 | static bool is_valid_crush_name(const string& s); | |
1592 | static bool is_valid_crush_loc(CephContext *cct, | |
1593 | const map<string,string>& loc); | |
1594 | }; | |
1595 | WRITE_CLASS_ENCODER_FEATURES(CrushWrapper) | |
1596 | ||
1597 | #endif |