]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #ifndef CEPH_CRUSH_WRAPPER_H | |
5 | #define CEPH_CRUSH_WRAPPER_H | |
6 | ||
7 | #include <stdlib.h> | |
8 | #include <map> | |
9 | #include <set> | |
10 | #include <string> | |
11 | ||
12 | #include <iosfwd> | |
13 | ||
14 | #include "include/types.h" | |
15 | ||
16 | extern "C" { | |
17 | #include "crush.h" | |
18 | #include "hash.h" | |
19 | #include "mapper.h" | |
20 | #include "builder.h" | |
21 | } | |
22 | ||
31f18b77 | 23 | #include "include/assert.h" |
7c673cae FG |
24 | #include "include/err.h" |
25 | #include "include/encoding.h" | |
c07f9fc5 | 26 | #include "include/mempool.h" |
7c673cae FG |
27 | |
28 | #include "common/Mutex.h" | |
29 | ||
7c673cae FG |
30 | #define BUG_ON(x) assert(!(x)) |
31 | ||
32 | namespace ceph { | |
33 | class Formatter; | |
34 | } | |
35 | ||
c07f9fc5 FG |
36 | namespace CrushTreeDumper { |
37 | typedef mempool::osdmap::map<int64_t,string> name_map_t; | |
38 | } | |
39 | ||
7c673cae FG |
40 | WRITE_RAW_ENCODER(crush_rule_mask) // it's all u8's |
41 | ||
42 | inline static void encode(const crush_rule_step &s, bufferlist &bl) | |
43 | { | |
44 | ::encode(s.op, bl); | |
45 | ::encode(s.arg1, bl); | |
46 | ::encode(s.arg2, bl); | |
47 | } | |
48 | inline static void decode(crush_rule_step &s, bufferlist::iterator &p) | |
49 | { | |
50 | ::decode(s.op, p); | |
51 | ::decode(s.arg1, p); | |
52 | ::decode(s.arg2, p); | |
53 | } | |
54 | ||
55 | using namespace std; | |
56 | class CrushWrapper { | |
57 | public: | |
c07f9fc5 FG |
58 | // magic value used by OSDMap for a "default" fallback choose_args, used if |
59 | // the choose_arg_map passed to do_rule does not exist. if this also | |
60 | // doesn't exist, fall back to canonical weights. | |
61 | enum { | |
62 | DEFAULT_CHOOSE_ARGS = -1 | |
63 | }; | |
64 | ||
7c673cae FG |
65 | std::map<int32_t, string> type_map; /* bucket/device type names */ |
66 | std::map<int32_t, string> name_map; /* bucket/device names */ | |
67 | std::map<int32_t, string> rule_name_map; | |
d2e6a577 | 68 | |
7c673cae FG |
69 | std::map<int32_t, int32_t> class_map; /* item id -> class id */ |
70 | std::map<int32_t, string> class_name; /* class id -> class name */ | |
71 | std::map<string, int32_t> class_rname; /* class name -> class id */ | |
72 | std::map<int32_t, map<int32_t, int32_t> > class_bucket; /* bucket[id][class] == id */ | |
c07f9fc5 | 73 | std::map<int64_t, crush_choose_arg_map> choose_args; |
7c673cae FG |
74 | |
75 | private: | |
76 | struct crush_map *crush; | |
31f18b77 FG |
77 | |
78 | bool have_uniform_rules = false; | |
79 | ||
7c673cae FG |
80 | /* reverse maps */ |
81 | mutable bool have_rmaps; | |
82 | mutable std::map<string, int> type_rmap, name_rmap, rule_name_rmap; | |
83 | void build_rmaps() const { | |
84 | if (have_rmaps) return; | |
85 | build_rmap(type_map, type_rmap); | |
86 | build_rmap(name_map, name_rmap); | |
87 | build_rmap(rule_name_map, rule_name_rmap); | |
88 | have_rmaps = true; | |
89 | } | |
90 | void build_rmap(const map<int, string> &f, std::map<string, int> &r) const { | |
91 | r.clear(); | |
92 | for (std::map<int, string>::const_iterator p = f.begin(); p != f.end(); ++p) | |
93 | r[p->second] = p->first; | |
94 | } | |
95 | ||
96 | public: | |
97 | CrushWrapper(const CrushWrapper& other); | |
98 | const CrushWrapper& operator=(const CrushWrapper& other); | |
99 | ||
100 | CrushWrapper() : crush(0), have_rmaps(false) { | |
101 | create(); | |
102 | } | |
103 | ~CrushWrapper() { | |
104 | if (crush) | |
105 | crush_destroy(crush); | |
106 | choose_args_clear(); | |
107 | } | |
108 | ||
109 | crush_map *get_crush_map() { return crush; } | |
110 | ||
111 | /* building */ | |
112 | void create() { | |
113 | if (crush) | |
114 | crush_destroy(crush); | |
115 | crush = crush_create(); | |
116 | choose_args_clear(); | |
117 | assert(crush); | |
118 | have_rmaps = false; | |
119 | ||
120 | set_tunables_default(); | |
121 | } | |
122 | ||
3efd9988 FG |
123 | /** |
124 | * true if any rule has a rule id != its position in the array | |
125 | * | |
126 | * These indicate "ruleset" IDs that were created by older versions | |
127 | * of Ceph. They are cleaned up in renumber_rules so that eventually | |
128 | * we can remove the code for handling them. | |
129 | */ | |
130 | bool has_legacy_rule_ids() const; | |
31f18b77 | 131 | |
3efd9988 FG |
132 | /** |
133 | * fix rules whose ruleid != ruleset | |
134 | * | |
135 | * These rules were created in older versions of Ceph. The concept | |
136 | * of a ruleset no longer exists. | |
137 | * | |
138 | * Return a map of old ID -> new ID. Caller must update OSDMap | |
139 | * to use new IDs. | |
140 | */ | |
141 | std::map<int, int> renumber_rules(); | |
31f18b77 | 142 | |
c07f9fc5 FG |
143 | /// true if any buckets that aren't straw2 |
144 | bool has_non_straw2_buckets() const; | |
145 | ||
7c673cae FG |
146 | // tunables |
147 | void set_tunables_argonaut() { | |
148 | crush->choose_local_tries = 2; | |
149 | crush->choose_local_fallback_tries = 5; | |
150 | crush->choose_total_tries = 19; | |
151 | crush->chooseleaf_descend_once = 0; | |
152 | crush->chooseleaf_vary_r = 0; | |
153 | crush->chooseleaf_stable = 0; | |
154 | crush->allowed_bucket_algs = CRUSH_LEGACY_ALLOWED_BUCKET_ALGS; | |
155 | } | |
156 | void set_tunables_bobtail() { | |
157 | crush->choose_local_tries = 0; | |
158 | crush->choose_local_fallback_tries = 0; | |
159 | crush->choose_total_tries = 50; | |
160 | crush->chooseleaf_descend_once = 1; | |
161 | crush->chooseleaf_vary_r = 0; | |
162 | crush->chooseleaf_stable = 0; | |
163 | crush->allowed_bucket_algs = CRUSH_LEGACY_ALLOWED_BUCKET_ALGS; | |
164 | } | |
165 | void set_tunables_firefly() { | |
166 | crush->choose_local_tries = 0; | |
167 | crush->choose_local_fallback_tries = 0; | |
168 | crush->choose_total_tries = 50; | |
169 | crush->chooseleaf_descend_once = 1; | |
170 | crush->chooseleaf_vary_r = 1; | |
171 | crush->chooseleaf_stable = 0; | |
172 | crush->allowed_bucket_algs = CRUSH_LEGACY_ALLOWED_BUCKET_ALGS; | |
173 | } | |
174 | void set_tunables_hammer() { | |
175 | crush->choose_local_tries = 0; | |
176 | crush->choose_local_fallback_tries = 0; | |
177 | crush->choose_total_tries = 50; | |
178 | crush->chooseleaf_descend_once = 1; | |
179 | crush->chooseleaf_vary_r = 1; | |
180 | crush->chooseleaf_stable = 0; | |
181 | crush->allowed_bucket_algs = | |
182 | (1 << CRUSH_BUCKET_UNIFORM) | | |
183 | (1 << CRUSH_BUCKET_LIST) | | |
184 | (1 << CRUSH_BUCKET_STRAW) | | |
185 | (1 << CRUSH_BUCKET_STRAW2); | |
186 | } | |
187 | void set_tunables_jewel() { | |
188 | crush->choose_local_tries = 0; | |
189 | crush->choose_local_fallback_tries = 0; | |
190 | crush->choose_total_tries = 50; | |
191 | crush->chooseleaf_descend_once = 1; | |
192 | crush->chooseleaf_vary_r = 1; | |
193 | crush->chooseleaf_stable = 1; | |
194 | crush->allowed_bucket_algs = | |
195 | (1 << CRUSH_BUCKET_UNIFORM) | | |
196 | (1 << CRUSH_BUCKET_LIST) | | |
197 | (1 << CRUSH_BUCKET_STRAW) | | |
198 | (1 << CRUSH_BUCKET_STRAW2); | |
199 | } | |
200 | ||
201 | void set_tunables_legacy() { | |
202 | set_tunables_argonaut(); | |
203 | crush->straw_calc_version = 0; | |
204 | } | |
205 | void set_tunables_optimal() { | |
206 | set_tunables_jewel(); | |
207 | crush->straw_calc_version = 1; | |
208 | } | |
209 | void set_tunables_default() { | |
31f18b77 | 210 | set_tunables_jewel(); |
7c673cae FG |
211 | crush->straw_calc_version = 1; |
212 | } | |
213 | ||
214 | int get_choose_local_tries() const { | |
215 | return crush->choose_local_tries; | |
216 | } | |
217 | void set_choose_local_tries(int n) { | |
218 | crush->choose_local_tries = n; | |
219 | } | |
220 | ||
221 | int get_choose_local_fallback_tries() const { | |
222 | return crush->choose_local_fallback_tries; | |
223 | } | |
224 | void set_choose_local_fallback_tries(int n) { | |
225 | crush->choose_local_fallback_tries = n; | |
226 | } | |
227 | ||
228 | int get_choose_total_tries() const { | |
229 | return crush->choose_total_tries; | |
230 | } | |
231 | void set_choose_total_tries(int n) { | |
232 | crush->choose_total_tries = n; | |
233 | } | |
234 | ||
235 | int get_chooseleaf_descend_once() const { | |
236 | return crush->chooseleaf_descend_once; | |
237 | } | |
238 | void set_chooseleaf_descend_once(int n) { | |
239 | crush->chooseleaf_descend_once = !!n; | |
240 | } | |
241 | ||
242 | int get_chooseleaf_vary_r() const { | |
243 | return crush->chooseleaf_vary_r; | |
244 | } | |
245 | void set_chooseleaf_vary_r(int n) { | |
246 | crush->chooseleaf_vary_r = n; | |
247 | } | |
248 | ||
249 | int get_chooseleaf_stable() const { | |
250 | return crush->chooseleaf_stable; | |
251 | } | |
252 | void set_chooseleaf_stable(int n) { | |
253 | crush->chooseleaf_stable = n; | |
254 | } | |
255 | ||
256 | int get_straw_calc_version() const { | |
257 | return crush->straw_calc_version; | |
258 | } | |
259 | void set_straw_calc_version(int n) { | |
260 | crush->straw_calc_version = n; | |
261 | } | |
262 | ||
263 | unsigned get_allowed_bucket_algs() const { | |
264 | return crush->allowed_bucket_algs; | |
265 | } | |
266 | void set_allowed_bucket_algs(unsigned n) { | |
267 | crush->allowed_bucket_algs = n; | |
268 | } | |
269 | ||
270 | bool has_argonaut_tunables() const { | |
271 | return | |
272 | crush->choose_local_tries == 2 && | |
273 | crush->choose_local_fallback_tries == 5 && | |
274 | crush->choose_total_tries == 19 && | |
275 | crush->chooseleaf_descend_once == 0 && | |
276 | crush->chooseleaf_vary_r == 0 && | |
277 | crush->chooseleaf_stable == 0 && | |
278 | crush->allowed_bucket_algs == CRUSH_LEGACY_ALLOWED_BUCKET_ALGS; | |
279 | } | |
280 | bool has_bobtail_tunables() const { | |
281 | return | |
282 | crush->choose_local_tries == 0 && | |
283 | crush->choose_local_fallback_tries == 0 && | |
284 | crush->choose_total_tries == 50 && | |
285 | crush->chooseleaf_descend_once == 1 && | |
286 | crush->chooseleaf_vary_r == 0 && | |
287 | crush->chooseleaf_stable == 0 && | |
288 | crush->allowed_bucket_algs == CRUSH_LEGACY_ALLOWED_BUCKET_ALGS; | |
289 | } | |
290 | bool has_firefly_tunables() const { | |
291 | return | |
292 | crush->choose_local_tries == 0 && | |
293 | crush->choose_local_fallback_tries == 0 && | |
294 | crush->choose_total_tries == 50 && | |
295 | crush->chooseleaf_descend_once == 1 && | |
296 | crush->chooseleaf_vary_r == 1 && | |
297 | crush->chooseleaf_stable == 0 && | |
298 | crush->allowed_bucket_algs == CRUSH_LEGACY_ALLOWED_BUCKET_ALGS; | |
299 | } | |
300 | bool has_hammer_tunables() const { | |
301 | return | |
302 | crush->choose_local_tries == 0 && | |
303 | crush->choose_local_fallback_tries == 0 && | |
304 | crush->choose_total_tries == 50 && | |
305 | crush->chooseleaf_descend_once == 1 && | |
306 | crush->chooseleaf_vary_r == 1 && | |
307 | crush->chooseleaf_stable == 0 && | |
308 | crush->allowed_bucket_algs == ((1 << CRUSH_BUCKET_UNIFORM) | | |
309 | (1 << CRUSH_BUCKET_LIST) | | |
310 | (1 << CRUSH_BUCKET_STRAW) | | |
311 | (1 << CRUSH_BUCKET_STRAW2)); | |
312 | } | |
313 | bool has_jewel_tunables() const { | |
314 | return | |
315 | crush->choose_local_tries == 0 && | |
316 | crush->choose_local_fallback_tries == 0 && | |
317 | crush->choose_total_tries == 50 && | |
318 | crush->chooseleaf_descend_once == 1 && | |
319 | crush->chooseleaf_vary_r == 1 && | |
320 | crush->chooseleaf_stable == 1 && | |
321 | crush->allowed_bucket_algs == ((1 << CRUSH_BUCKET_UNIFORM) | | |
322 | (1 << CRUSH_BUCKET_LIST) | | |
323 | (1 << CRUSH_BUCKET_STRAW) | | |
324 | (1 << CRUSH_BUCKET_STRAW2)); | |
325 | } | |
326 | ||
327 | bool has_optimal_tunables() const { | |
328 | return has_jewel_tunables(); | |
329 | } | |
330 | bool has_legacy_tunables() const { | |
331 | return has_argonaut_tunables(); | |
332 | } | |
333 | ||
334 | bool has_nondefault_tunables() const { | |
335 | return | |
336 | (crush->choose_local_tries != 2 || | |
337 | crush->choose_local_fallback_tries != 5 || | |
338 | crush->choose_total_tries != 19); | |
339 | } | |
340 | bool has_nondefault_tunables2() const { | |
341 | return | |
342 | crush->chooseleaf_descend_once != 0; | |
343 | } | |
344 | bool has_nondefault_tunables3() const { | |
345 | return | |
346 | crush->chooseleaf_vary_r != 0; | |
347 | } | |
348 | bool has_nondefault_tunables5() const { | |
349 | return | |
350 | crush->chooseleaf_stable != 0; | |
351 | } | |
352 | ||
353 | bool has_v2_rules() const; | |
354 | bool has_v3_rules() const; | |
355 | bool has_v4_buckets() const; | |
356 | bool has_v5_rules() const; | |
31f18b77 FG |
357 | bool has_choose_args() const; // any choose_args |
358 | bool has_incompat_choose_args() const; // choose_args that can't be made compat | |
7c673cae FG |
359 | |
360 | bool is_v2_rule(unsigned ruleid) const; | |
361 | bool is_v3_rule(unsigned ruleid) const; | |
362 | bool is_v5_rule(unsigned ruleid) const; | |
363 | ||
364 | string get_min_required_version() const { | |
365 | if (has_v5_rules() || has_nondefault_tunables5()) | |
366 | return "jewel"; | |
367 | else if (has_v4_buckets()) | |
368 | return "hammer"; | |
369 | else if (has_nondefault_tunables3()) | |
370 | return "firefly"; | |
371 | else if (has_nondefault_tunables2() || has_nondefault_tunables()) | |
372 | return "bobtail"; | |
373 | else | |
374 | return "argonaut"; | |
375 | } | |
376 | ||
377 | // default bucket types | |
378 | unsigned get_default_bucket_alg() const { | |
379 | // in order of preference | |
380 | if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_STRAW2)) | |
381 | return CRUSH_BUCKET_STRAW2; | |
382 | if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_STRAW)) | |
383 | return CRUSH_BUCKET_STRAW; | |
384 | if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_TREE)) | |
385 | return CRUSH_BUCKET_TREE; | |
386 | if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_LIST)) | |
387 | return CRUSH_BUCKET_LIST; | |
388 | if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_UNIFORM)) | |
389 | return CRUSH_BUCKET_UNIFORM; | |
390 | return 0; | |
391 | } | |
392 | ||
393 | // bucket types | |
394 | int get_num_type_names() const { | |
395 | return type_map.size(); | |
396 | } | |
31f18b77 FG |
397 | int get_max_type_id() const { |
398 | if (type_map.empty()) | |
399 | return 0; | |
400 | return type_map.rbegin()->first; | |
401 | } | |
7c673cae FG |
402 | int get_type_id(const string& name) const { |
403 | build_rmaps(); | |
404 | if (type_rmap.count(name)) | |
405 | return type_rmap[name]; | |
406 | return -1; | |
407 | } | |
408 | const char *get_type_name(int t) const { | |
409 | std::map<int,string>::const_iterator p = type_map.find(t); | |
410 | if (p != type_map.end()) | |
411 | return p->second.c_str(); | |
412 | return 0; | |
413 | } | |
414 | void set_type_name(int i, const string& name) { | |
415 | type_map[i] = name; | |
416 | if (have_rmaps) | |
417 | type_rmap[name] = i; | |
418 | } | |
419 | ||
420 | // item/bucket names | |
421 | bool name_exists(const string& name) const { | |
422 | build_rmaps(); | |
423 | return name_rmap.count(name); | |
424 | } | |
425 | bool item_exists(int i) const { | |
426 | return name_map.count(i); | |
427 | } | |
428 | int get_item_id(const string& name) const { | |
429 | build_rmaps(); | |
430 | if (name_rmap.count(name)) | |
431 | return name_rmap[name]; | |
432 | return 0; /* hrm */ | |
433 | } | |
434 | const char *get_item_name(int t) const { | |
435 | std::map<int,string>::const_iterator p = name_map.find(t); | |
436 | if (p != name_map.end()) | |
437 | return p->second.c_str(); | |
438 | return 0; | |
439 | } | |
440 | int set_item_name(int i, const string& name) { | |
441 | if (!is_valid_crush_name(name)) | |
442 | return -EINVAL; | |
443 | name_map[i] = name; | |
444 | if (have_rmaps) | |
445 | name_rmap[name] = i; | |
446 | return 0; | |
447 | } | |
31f18b77 FG |
448 | void swap_names(int a, int b) { |
449 | string an = name_map[a]; | |
450 | string bn = name_map[b]; | |
451 | name_map[a] = bn; | |
452 | name_map[b] = an; | |
453 | if (have_rmaps) { | |
454 | name_rmap[an] = b; | |
455 | name_rmap[bn] = a; | |
456 | } | |
457 | } | |
7c673cae FG |
458 | int split_id_class(int i, int *idout, int *classout) const; |
459 | ||
460 | bool class_exists(const string& name) const { | |
461 | return class_rname.count(name); | |
462 | } | |
463 | const char *get_class_name(int i) const { | |
224ce89b | 464 | auto p = class_name.find(i); |
7c673cae FG |
465 | if (p != class_name.end()) |
466 | return p->second.c_str(); | |
467 | return 0; | |
468 | } | |
469 | int get_class_id(const string& name) const { | |
224ce89b | 470 | auto p = class_rname.find(name); |
7c673cae FG |
471 | if (p != class_rname.end()) |
472 | return p->second; | |
473 | else | |
474 | return -EINVAL; | |
475 | } | |
476 | int remove_class_name(const string& name) { | |
224ce89b | 477 | auto p = class_rname.find(name); |
7c673cae FG |
478 | if (p == class_rname.end()) |
479 | return -ENOENT; | |
480 | int class_id = p->second; | |
224ce89b | 481 | auto q = class_name.find(class_id); |
7c673cae FG |
482 | if (q == class_name.end()) |
483 | return -ENOENT; | |
484 | class_rname.erase(name); | |
485 | class_name.erase(class_id); | |
486 | return 0; | |
487 | } | |
224ce89b | 488 | |
224ce89b WB |
489 | int32_t _alloc_class_id() const; |
490 | ||
7c673cae FG |
491 | int get_or_create_class_id(const string& name) { |
492 | int c = get_class_id(name); | |
493 | if (c < 0) { | |
224ce89b | 494 | int i = _alloc_class_id(); |
7c673cae FG |
495 | class_name[i] = name; |
496 | class_rname[name] = i; | |
497 | return i; | |
498 | } else { | |
499 | return c; | |
500 | } | |
501 | } | |
502 | ||
503 | const char *get_item_class(int t) const { | |
504 | std::map<int,int>::const_iterator p = class_map.find(t); | |
505 | if (p == class_map.end()) | |
506 | return 0; | |
507 | return get_class_name(p->second); | |
508 | } | |
509 | int set_item_class(int i, const string& name) { | |
510 | if (!is_valid_crush_name(name)) | |
511 | return -EINVAL; | |
512 | class_map[i] = get_or_create_class_id(name); | |
513 | return 0; | |
514 | } | |
515 | int set_item_class(int i, int c) { | |
516 | class_map[i] = c; | |
517 | return c; | |
518 | } | |
224ce89b WB |
519 | void get_devices_by_class(const string &name, set<int> *devices) const { |
520 | assert(devices); | |
521 | devices->clear(); | |
522 | if (!class_exists(name)) { | |
523 | return; | |
524 | } | |
525 | auto cid = get_class_id(name); | |
526 | for (auto& p : class_map) { | |
527 | if (p.first >= 0 && p.second == cid) { | |
528 | devices->insert(p.first); | |
529 | } | |
530 | } | |
531 | } | |
532 | void class_remove_item(int i) { | |
533 | auto it = class_map.find(i); | |
534 | if (it == class_map.end()) { | |
535 | return; | |
536 | } | |
537 | class_map.erase(it); | |
538 | } | |
7c673cae FG |
539 | int can_rename_item(const string& srcname, |
540 | const string& dstname, | |
541 | ostream *ss) const; | |
542 | int rename_item(const string& srcname, | |
543 | const string& dstname, | |
544 | ostream *ss); | |
545 | int can_rename_bucket(const string& srcname, | |
546 | const string& dstname, | |
547 | ostream *ss) const; | |
548 | int rename_bucket(const string& srcname, | |
549 | const string& dstname, | |
550 | ostream *ss); | |
551 | ||
552 | // rule names | |
b5b8bbf5 FG |
553 | int rename_rule(const string& srcname, |
554 | const string& dstname, | |
555 | ostream *ss); | |
7c673cae FG |
556 | bool rule_exists(string name) const { |
557 | build_rmaps(); | |
558 | return rule_name_rmap.count(name); | |
559 | } | |
560 | int get_rule_id(string name) const { | |
561 | build_rmaps(); | |
562 | if (rule_name_rmap.count(name)) | |
563 | return rule_name_rmap[name]; | |
564 | return -ENOENT; | |
565 | } | |
566 | const char *get_rule_name(int t) const { | |
567 | std::map<int,string>::const_iterator p = rule_name_map.find(t); | |
568 | if (p != rule_name_map.end()) | |
569 | return p->second.c_str(); | |
570 | return 0; | |
571 | } | |
572 | void set_rule_name(int i, const string& name) { | |
573 | rule_name_map[i] = name; | |
574 | if (have_rmaps) | |
575 | rule_name_rmap[name] = i; | |
576 | } | |
c07f9fc5 FG |
577 | bool is_shadow_item(int id) const { |
578 | const char *name = get_item_name(id); | |
579 | return name && !is_valid_crush_name(name); | |
580 | } | |
7c673cae FG |
581 | |
582 | ||
583 | /** | |
584 | * find tree nodes referenced by rules by a 'take' command | |
585 | * | |
586 | * Note that these may not be parentless roots. | |
587 | */ | |
3efd9988 | 588 | void find_takes(set<int> *roots) const; |
7c673cae FG |
589 | |
590 | /** | |
591 | * find tree roots | |
592 | * | |
593 | * These are parentless nodes in the map. | |
594 | */ | |
3efd9988 | 595 | void find_roots(set<int> *roots) const; |
7c673cae | 596 | |
c07f9fc5 FG |
597 | |
598 | /** | |
599 | * find tree roots that contain shadow (device class) items only | |
600 | */ | |
3efd9988 | 601 | void find_shadow_roots(set<int> *roots) const { |
c07f9fc5 | 602 | set<int> all; |
3efd9988 | 603 | find_roots(&all); |
c07f9fc5 FG |
604 | for (auto& p: all) { |
605 | if (is_shadow_item(p)) { | |
3efd9988 | 606 | roots->insert(p); |
c07f9fc5 FG |
607 | } |
608 | } | |
609 | } | |
610 | ||
224ce89b WB |
611 | /** |
612 | * find tree roots that are not shadow (device class) items | |
613 | * | |
614 | * These are parentless nodes in the map that are not shadow | |
615 | * items for device classes. | |
616 | */ | |
3efd9988 | 617 | void find_nonshadow_roots(set<int> *roots) const { |
c07f9fc5 | 618 | set<int> all; |
3efd9988 | 619 | find_roots(&all); |
c07f9fc5 FG |
620 | for (auto& p: all) { |
621 | if (!is_shadow_item(p)) { | |
3efd9988 | 622 | roots->insert(p); |
c07f9fc5 FG |
623 | } |
624 | } | |
625 | } | |
224ce89b | 626 | |
7c673cae FG |
627 | /** |
628 | * see if an item is contained within a subtree | |
629 | * | |
630 | * @param root haystack | |
631 | * @param item needle | |
632 | * @return true if the item is located beneath the given node | |
633 | */ | |
634 | bool subtree_contains(int root, int item) const; | |
635 | ||
636 | private: | |
637 | /** | |
638 | * search for an item in any bucket | |
639 | * | |
640 | * @param i item | |
641 | * @return true if present | |
642 | */ | |
643 | bool _search_item_exists(int i) const; | |
644 | public: | |
645 | ||
646 | /** | |
647 | * see if item is located where we think it is | |
648 | * | |
649 | * This verifies that the given item is located at a particular | |
650 | * location in the hierarchy. However, that check is imprecise; we | |
651 | * are actually verifying that the most specific location key/value | |
652 | * is correct. For example, if loc specifies that rack=foo and | |
653 | * host=bar, it will verify that host=bar is correct; any placement | |
654 | * above that level in the hierarchy is ignored. This matches the | |
655 | * semantics for insert_item(). | |
656 | * | |
657 | * @param cct cct | |
658 | * @param item item id | |
659 | * @param loc location to check (map of type to bucket names) | |
660 | * @param weight optional pointer to weight of item at that location | |
661 | * @return true if item is at specified location | |
662 | */ | |
663 | bool check_item_loc(CephContext *cct, int item, const map<string,string>& loc, int *iweight); | |
664 | bool check_item_loc(CephContext *cct, int item, const map<string,string>& loc, float *weight) { | |
665 | int iweight; | |
666 | bool ret = check_item_loc(cct, item, loc, &iweight); | |
667 | if (weight) | |
668 | *weight = (float)iweight / (float)0x10000; | |
669 | return ret; | |
670 | } | |
671 | ||
672 | ||
673 | /** | |
674 | * returns the (type, name) of the parent bucket of id | |
675 | * | |
676 | * FIXME: ambiguous for items that occur multiple times in the map | |
677 | */ | |
678 | pair<string,string> get_immediate_parent(int id, int *ret = NULL); | |
c07f9fc5 | 679 | |
7c673cae FG |
680 | int get_immediate_parent_id(int id, int *parent) const; |
681 | ||
31f18b77 FG |
682 | /** |
683 | * return ancestor of the given type, or 0 if none | |
684 | * (parent is always a bucket and thus <0) | |
685 | */ | |
686 | int get_parent_of_type(int id, int type) const; | |
687 | ||
7c673cae FG |
688 | /** |
689 | * get the fully qualified location of a device by successively finding | |
690 | * parents beginning at ID and ending at highest type number specified in | |
691 | * the CRUSH map which assumes that if device foo is under device bar, the | |
692 | * type_id of foo < bar where type_id is the integer specified in the CRUSH map | |
693 | * | |
694 | * returns the location in the form of (type=foo) where type is a type of bucket | |
695 | * specified in the CRUSH map and foo is a name specified in the CRUSH map | |
696 | */ | |
697 | map<string, string> get_full_location(int id); | |
698 | ||
699 | /* | |
700 | * identical to get_full_location(int id) although it returns the type/name | |
701 | * pairs in the order they occur in the hierarchy. | |
702 | * | |
703 | * returns -ENOENT if id is not found. | |
704 | */ | |
705 | int get_full_location_ordered(int id, vector<pair<string, string> >& path); | |
706 | ||
31f18b77 FG |
707 | /* |
708 | * identical to get_full_location_ordered(int id, vector<pair<string, string> >& path), | |
709 | * although it returns a concatenated string with the type/name pairs in descending | |
710 | * hierarchical order with format key1=val1,key2=val2. | |
711 | * | |
712 | * returns the location in descending hierarchy as a string. | |
713 | */ | |
714 | string get_full_location_ordered_string(int id); | |
715 | ||
7c673cae FG |
716 | /** |
717 | * returns (type_id, type) of all parent buckets between id and | |
718 | * default, can be used to check for anomolous CRUSH maps | |
719 | */ | |
720 | map<int, string> get_parent_hierarchy(int id); | |
721 | ||
722 | /** | |
723 | * enumerate immediate children of given node | |
724 | * | |
725 | * @param id parent bucket or device id | |
726 | * @return number of items, or error | |
727 | */ | |
728 | int get_children(int id, list<int> *children); | |
729 | ||
31f18b77 FG |
730 | /** |
731 | * enumerate leaves(devices) of given node | |
732 | * | |
733 | * @param name parent bucket name | |
734 | * @return 0 on success or a negative errno on error. | |
735 | */ | |
736 | int get_leaves(const string &name, set<int> *leaves); | |
737 | int _get_leaves(int id, list<int> *leaves); // worker | |
738 | ||
7c673cae FG |
739 | /** |
740 | * insert an item into the map at a specific position | |
741 | * | |
742 | * Add an item as a specific location of the hierarchy. | |
743 | * Specifically, we look for the most specific location constraint | |
744 | * for which a bucket already exists, and then create intervening | |
745 | * buckets beneath that in order to place the item. | |
746 | * | |
747 | * Note that any location specifiers *above* the most specific match | |
748 | * are ignored. For example, if we specify that osd.12 goes in | |
749 | * host=foo, rack=bar, and row=baz, and rack=bar is the most | |
750 | * specific match, we will create host=foo beneath that point and | |
751 | * put osd.12 inside it. However, we will not verify that rack=bar | |
752 | * is beneath row=baz or move it. | |
753 | * | |
754 | * In short, we will build out a hierarchy, and move leaves around, | |
755 | * but not adjust the hierarchy's internal structure. Yet. | |
756 | * | |
757 | * If the item is already present in the map, we will return EEXIST. | |
758 | * If the location key/value pairs are nonsensical | |
759 | * (rack=nameofdevice), or location specifies that do not attach us | |
760 | * to any existing part of the hierarchy, we will return EINVAL. | |
761 | * | |
762 | * @param cct cct | |
763 | * @param id item id | |
764 | * @param weight item weight | |
765 | * @param name item name | |
766 | * @param loc location (map of type to bucket names) | |
767 | * @return 0 for success, negative on error | |
768 | */ | |
769 | int insert_item(CephContext *cct, int id, float weight, string name, const map<string,string>& loc); | |
770 | ||
771 | /** | |
772 | * move a bucket in the hierarchy to the given location | |
773 | * | |
774 | * This has the same location and ancestor creation behavior as | |
775 | * insert_item(), but will relocate the specified existing bucket. | |
776 | * | |
777 | * @param cct cct | |
778 | * @param id bucket id | |
779 | * @param loc location (map of type to bucket names) | |
780 | * @return 0 for success, negative on error | |
781 | */ | |
782 | int move_bucket(CephContext *cct, int id, const map<string,string>& loc); | |
783 | ||
31f18b77 FG |
784 | /** |
785 | * swap bucket contents of two buckets without touching bucket ids | |
786 | * | |
787 | * @param cct cct | |
788 | * @param src bucket a | |
789 | * @param dst bucket b | |
790 | * @return 0 for success, negative on error | |
791 | */ | |
792 | int swap_bucket(CephContext *cct, int src, int dst); | |
793 | ||
7c673cae FG |
794 | /** |
795 | * add a link to an existing bucket in the hierarchy to the new location | |
796 | * | |
797 | * This has the same location and ancestor creation behavior as | |
798 | * insert_item(), but will add a new link to the specified existing | |
799 | * bucket. | |
800 | * | |
801 | * @param cct cct | |
802 | * @param id bucket id | |
803 | * @param loc location (map of type to bucket names) | |
804 | * @return 0 for success, negative on error | |
805 | */ | |
806 | int link_bucket(CephContext *cct, int id, const map<string,string>& loc); | |
807 | ||
808 | /** | |
809 | * add or update an item's position in the map | |
810 | * | |
811 | * This is analogous to insert_item, except we will move an item if | |
812 | * it is already present. | |
813 | * | |
814 | * @param cct cct | |
815 | * @param id item id | |
816 | * @param weight item weight | |
817 | * @param name item name | |
818 | * @param loc location (map of type to bucket names) | |
819 | * @return 0 for no change, 1 for successful change, negative on error | |
820 | */ | |
821 | int update_item(CephContext *cct, int id, float weight, string name, const map<string,string>& loc); | |
822 | ||
823 | /** | |
824 | * create or move an item, but do not adjust its weight if it already exists | |
825 | * | |
826 | * @param cct cct | |
827 | * @param item item id | |
828 | * @param weight initial item weight (if we need to create it) | |
829 | * @param name item name | |
830 | * @param loc location (map of type to bucket names) | |
831 | * @return 0 for no change, 1 for successful change, negative on error | |
832 | */ | |
833 | int create_or_move_item(CephContext *cct, int item, float weight, string name, | |
834 | const map<string,string>& loc); | |
835 | ||
836 | /** | |
837 | * remove all instances of an item from the map | |
838 | * | |
839 | * @param cct cct | |
840 | * @param id item id to remove | |
841 | * @param unlink_only unlink but do not remove bucket (useful if multiple links or not empty) | |
842 | * @return 0 on success, negative on error | |
843 | */ | |
844 | int remove_item(CephContext *cct, int id, bool unlink_only); | |
845 | ||
846 | /** | |
847 | * recursively remove buckets starting at item and stop removing | |
848 | * when a bucket is in use. | |
849 | * | |
850 | * @param item id to remove | |
7c673cae FG |
851 | * @return 0 on success, negative on error |
852 | */ | |
35e4c445 | 853 | int remove_root(int item); |
7c673cae FG |
854 | |
855 | /** | |
856 | * remove all instances of an item nested beneath a certain point from the map | |
857 | * | |
858 | * @param cct cct | |
859 | * @param id item id to remove | |
860 | * @param ancestor ancestor item id under which to search for id | |
861 | * @param unlink_only unlink but do not remove bucket (useful if bucket has multiple links or is not empty) | |
862 | * @return 0 on success, negative on error | |
863 | */ | |
864 | private: | |
865 | bool _maybe_remove_last_instance(CephContext *cct, int id, bool unlink_only); | |
866 | int _remove_item_under(CephContext *cct, int id, int ancestor, bool unlink_only); | |
867 | bool _bucket_is_in_use(int id); | |
868 | public: | |
869 | int remove_item_under(CephContext *cct, int id, int ancestor, bool unlink_only); | |
870 | ||
871 | /** | |
872 | * calculate the locality/distance from a given id to a crush location map | |
873 | * | |
874 | * Specifically, we look for the lowest-valued type for which the | |
875 | * location of id matches that described in loc. | |
876 | * | |
877 | * @param cct cct | |
878 | * @param id the existing id in the map | |
879 | * @param loc a set of key=value pairs describing a location in the hierarchy | |
880 | */ | |
881 | int get_common_ancestor_distance(CephContext *cct, int id, | |
882 | const std::multimap<string,string>& loc); | |
883 | ||
884 | /** | |
885 | * parse a set of key/value pairs out of a string vector | |
886 | * | |
887 | * These are used to describe a location in the CRUSH hierarchy. | |
888 | * | |
889 | * @param args list of strings (each key= or key=value) | |
890 | * @param ploc pointer to a resulting location map or multimap | |
891 | */ | |
892 | static int parse_loc_map(const std::vector<string>& args, | |
893 | std::map<string,string> *ploc); | |
894 | static int parse_loc_multimap(const std::vector<string>& args, | |
895 | std::multimap<string,string> *ploc); | |
896 | ||
897 | /** | |
898 | * get an item's weight | |
899 | * | |
900 | * Will return the weight for the first instance it finds. | |
901 | * | |
902 | * @param id item id to check | |
903 | * @return weight of item | |
904 | */ | |
905 | int get_item_weight(int id) const; | |
906 | float get_item_weightf(int id) const { | |
907 | return (float)get_item_weight(id) / (float)0x10000; | |
908 | } | |
909 | int get_item_weight_in_loc(int id, const map<string,string> &loc); | |
910 | float get_item_weightf_in_loc(int id, const map<string,string> &loc) { | |
911 | return (float)get_item_weight_in_loc(id, loc) / (float)0x10000; | |
912 | } | |
913 | ||
224ce89b WB |
914 | int validate_weightf(float weight) { |
915 | uint64_t iweight = weight * 0x10000; | |
916 | if (iweight > std::numeric_limits<int>::max()) { | |
917 | return -EOVERFLOW; | |
918 | } | |
919 | return 0; | |
920 | } | |
7c673cae FG |
921 | int adjust_item_weight(CephContext *cct, int id, int weight); |
922 | int adjust_item_weightf(CephContext *cct, int id, float weight) { | |
224ce89b WB |
923 | int r = validate_weightf(weight); |
924 | if (r < 0) { | |
925 | return r; | |
926 | } | |
7c673cae FG |
927 | return adjust_item_weight(cct, id, (int)(weight * (float)0x10000)); |
928 | } | |
929 | int adjust_item_weight_in_loc(CephContext *cct, int id, int weight, const map<string,string>& loc); | |
930 | int adjust_item_weightf_in_loc(CephContext *cct, int id, float weight, const map<string,string>& loc) { | |
224ce89b WB |
931 | int r = validate_weightf(weight); |
932 | if (r < 0) { | |
933 | return r; | |
934 | } | |
7c673cae FG |
935 | return adjust_item_weight_in_loc(cct, id, (int)(weight * (float)0x10000), loc); |
936 | } | |
937 | void reweight(CephContext *cct); | |
938 | ||
939 | int adjust_subtree_weight(CephContext *cct, int id, int weight); | |
940 | int adjust_subtree_weightf(CephContext *cct, int id, float weight) { | |
224ce89b WB |
941 | int r = validate_weightf(weight); |
942 | if (r < 0) { | |
943 | return r; | |
944 | } | |
7c673cae FG |
945 | return adjust_subtree_weight(cct, id, (int)(weight * (float)0x10000)); |
946 | } | |
947 | ||
948 | /// check if item id is present in the map hierarchy | |
949 | bool check_item_present(int id) const; | |
950 | ||
951 | ||
952 | /*** devices ***/ | |
953 | int get_max_devices() const { | |
954 | if (!crush) return 0; | |
955 | return crush->max_devices; | |
956 | } | |
957 | ||
958 | ||
959 | /*** rules ***/ | |
960 | private: | |
961 | crush_rule *get_rule(unsigned ruleno) const { | |
962 | if (!crush) return (crush_rule *)(-ENOENT); | |
963 | if (ruleno >= crush->max_rules) | |
964 | return 0; | |
965 | return crush->rules[ruleno]; | |
966 | } | |
967 | crush_rule_step *get_rule_step(unsigned ruleno, unsigned step) const { | |
968 | crush_rule *n = get_rule(ruleno); | |
969 | if (IS_ERR(n)) return (crush_rule_step *)(-EINVAL); | |
970 | if (step >= n->len) return (crush_rule_step *)(-EINVAL); | |
971 | return &n->steps[step]; | |
972 | } | |
973 | ||
974 | public: | |
975 | /* accessors */ | |
976 | int get_max_rules() const { | |
977 | if (!crush) return 0; | |
978 | return crush->max_rules; | |
979 | } | |
980 | bool rule_exists(unsigned ruleno) const { | |
981 | if (!crush) return false; | |
982 | if (ruleno < crush->max_rules && | |
983 | crush->rules[ruleno] != NULL) | |
984 | return true; | |
985 | return false; | |
986 | } | |
3efd9988 FG |
987 | bool rule_has_take(unsigned ruleno, int take) const { |
988 | if (!crush) return false; | |
989 | crush_rule *rule = get_rule(ruleno); | |
990 | for (unsigned i = 0; i < rule->len; ++i) { | |
991 | if (rule->steps[i].op == CRUSH_RULE_TAKE && | |
992 | rule->steps[i].arg1 == take) { | |
993 | return true; | |
994 | } | |
995 | } | |
996 | return false; | |
997 | } | |
7c673cae FG |
998 | int get_rule_len(unsigned ruleno) const { |
999 | crush_rule *r = get_rule(ruleno); | |
1000 | if (IS_ERR(r)) return PTR_ERR(r); | |
1001 | return r->len; | |
1002 | } | |
1003 | int get_rule_mask_ruleset(unsigned ruleno) const { | |
1004 | crush_rule *r = get_rule(ruleno); | |
1005 | if (IS_ERR(r)) return -1; | |
1006 | return r->mask.ruleset; | |
1007 | } | |
1008 | int get_rule_mask_type(unsigned ruleno) const { | |
1009 | crush_rule *r = get_rule(ruleno); | |
1010 | if (IS_ERR(r)) return -1; | |
1011 | return r->mask.type; | |
1012 | } | |
1013 | int get_rule_mask_min_size(unsigned ruleno) const { | |
1014 | crush_rule *r = get_rule(ruleno); | |
1015 | if (IS_ERR(r)) return -1; | |
1016 | return r->mask.min_size; | |
1017 | } | |
1018 | int get_rule_mask_max_size(unsigned ruleno) const { | |
1019 | crush_rule *r = get_rule(ruleno); | |
1020 | if (IS_ERR(r)) return -1; | |
1021 | return r->mask.max_size; | |
1022 | } | |
1023 | int get_rule_op(unsigned ruleno, unsigned step) const { | |
1024 | crush_rule_step *s = get_rule_step(ruleno, step); | |
1025 | if (IS_ERR(s)) return PTR_ERR(s); | |
1026 | return s->op; | |
1027 | } | |
1028 | int get_rule_arg1(unsigned ruleno, unsigned step) const { | |
1029 | crush_rule_step *s = get_rule_step(ruleno, step); | |
1030 | if (IS_ERR(s)) return PTR_ERR(s); | |
1031 | return s->arg1; | |
1032 | } | |
1033 | int get_rule_arg2(unsigned ruleno, unsigned step) const { | |
1034 | crush_rule_step *s = get_rule_step(ruleno, step); | |
1035 | if (IS_ERR(s)) return PTR_ERR(s); | |
1036 | return s->arg2; | |
1037 | } | |
1038 | ||
3efd9988 FG |
1039 | private: |
1040 | float _get_take_weight_osd_map(int root, map<int,float> *pmap) const; | |
1041 | void _normalize_weight_map(float sum, const map<int,float>& m, | |
1042 | map<int,float> *pmap) const; | |
1043 | ||
1044 | public: | |
7c673cae FG |
1045 | /** |
1046 | * calculate a map of osds to weights for a given rule | |
1047 | * | |
1048 | * Generate a map of which OSDs get how much relative weight for a | |
1049 | * given rule. | |
1050 | * | |
1051 | * @param ruleno [in] rule id | |
1052 | * @param pmap [out] map of osd to weight | |
1053 | * @return 0 for success, or negative error code | |
1054 | */ | |
3efd9988 FG |
1055 | int get_rule_weight_osd_map(unsigned ruleno, map<int,float> *pmap) const; |
1056 | ||
1057 | /** | |
1058 | * calculate a map of osds to weights for a given starting root | |
1059 | * | |
1060 | * Generate a map of which OSDs get how much relative weight for a | |
1061 | * given starting root | |
1062 | * | |
1063 | * @param root node | |
1064 | * @param pmap [out] map of osd to weight | |
1065 | * @return 0 for success, or negative error code | |
1066 | */ | |
1067 | int get_take_weight_osd_map(int root, map<int,float> *pmap) const; | |
7c673cae FG |
1068 | |
1069 | /* modifiers */ | |
c07f9fc5 FG |
1070 | |
1071 | int add_rule(int ruleno, int len, int type, int minsize, int maxsize) { | |
7c673cae | 1072 | if (!crush) return -ENOENT; |
c07f9fc5 | 1073 | crush_rule *n = crush_make_rule(len, ruleno, type, minsize, maxsize); |
7c673cae FG |
1074 | assert(n); |
1075 | ruleno = crush_add_rule(crush, n, ruleno); | |
1076 | return ruleno; | |
1077 | } | |
1078 | int set_rule_mask_max_size(unsigned ruleno, int max_size) { | |
1079 | crush_rule *r = get_rule(ruleno); | |
1080 | if (IS_ERR(r)) return -1; | |
1081 | return r->mask.max_size = max_size; | |
1082 | } | |
1083 | int set_rule_step(unsigned ruleno, unsigned step, int op, int arg1, int arg2) { | |
1084 | if (!crush) return -ENOENT; | |
1085 | crush_rule *n = get_rule(ruleno); | |
1086 | if (!n) return -1; | |
1087 | crush_rule_set_step(n, step, op, arg1, arg2); | |
1088 | return 0; | |
1089 | } | |
1090 | int set_rule_step_take(unsigned ruleno, unsigned step, int val) { | |
1091 | return set_rule_step(ruleno, step, CRUSH_RULE_TAKE, val, 0); | |
1092 | } | |
1093 | int set_rule_step_set_choose_tries(unsigned ruleno, unsigned step, int val) { | |
1094 | return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSE_TRIES, val, 0); | |
1095 | } | |
1096 | int set_rule_step_set_choose_local_tries(unsigned ruleno, unsigned step, int val) { | |
1097 | return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES, val, 0); | |
1098 | } | |
1099 | int set_rule_step_set_choose_local_fallback_tries(unsigned ruleno, unsigned step, int val) { | |
1100 | return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES, val, 0); | |
1101 | } | |
1102 | int set_rule_step_set_chooseleaf_tries(unsigned ruleno, unsigned step, int val) { | |
1103 | return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSELEAF_TRIES, val, 0); | |
1104 | } | |
1105 | int set_rule_step_set_chooseleaf_vary_r(unsigned ruleno, unsigned step, int val) { | |
1106 | return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSELEAF_VARY_R, val, 0); | |
1107 | } | |
1108 | int set_rule_step_set_chooseleaf_stable(unsigned ruleno, unsigned step, int val) { | |
1109 | return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSELEAF_STABLE, val, 0); | |
1110 | } | |
1111 | int set_rule_step_choose_firstn(unsigned ruleno, unsigned step, int val, int type) { | |
1112 | return set_rule_step(ruleno, step, CRUSH_RULE_CHOOSE_FIRSTN, val, type); | |
1113 | } | |
1114 | int set_rule_step_choose_indep(unsigned ruleno, unsigned step, int val, int type) { | |
1115 | return set_rule_step(ruleno, step, CRUSH_RULE_CHOOSE_INDEP, val, type); | |
1116 | } | |
1117 | int set_rule_step_choose_leaf_firstn(unsigned ruleno, unsigned step, int val, int type) { | |
1118 | return set_rule_step(ruleno, step, CRUSH_RULE_CHOOSELEAF_FIRSTN, val, type); | |
1119 | } | |
1120 | int set_rule_step_choose_leaf_indep(unsigned ruleno, unsigned step, int val, int type) { | |
1121 | return set_rule_step(ruleno, step, CRUSH_RULE_CHOOSELEAF_INDEP, val, type); | |
1122 | } | |
1123 | int set_rule_step_emit(unsigned ruleno, unsigned step) { | |
1124 | return set_rule_step(ruleno, step, CRUSH_RULE_EMIT, 0, 0); | |
1125 | } | |
1126 | ||
31f18b77 FG |
1127 | int add_simple_rule( |
1128 | string name, string root_name, string failure_domain_type, | |
224ce89b | 1129 | string device_class, |
31f18b77 FG |
1130 | string mode, int rule_type, ostream *err = 0); |
1131 | ||
7c673cae | 1132 | /** |
31f18b77 | 1133 | * @param rno rule[set] id to use, -1 to pick the lowest available |
7c673cae | 1134 | */ |
31f18b77 FG |
1135 | int add_simple_rule_at( |
1136 | string name, string root_name, | |
224ce89b | 1137 | string failure_domain_type, string device_class, string mode, |
31f18b77 | 1138 | int rule_type, int rno, ostream *err = 0); |
7c673cae FG |
1139 | |
1140 | int remove_rule(int ruleno); | |
1141 | ||
1142 | ||
1143 | /** buckets **/ | |
7c673cae FG |
1144 | const crush_bucket *get_bucket(int id) const { |
1145 | if (!crush) | |
1146 | return (crush_bucket *)(-EINVAL); | |
1147 | unsigned int pos = (unsigned int)(-1 - id); | |
1148 | unsigned int max_buckets = crush->max_buckets; | |
1149 | if (pos >= max_buckets) | |
1150 | return (crush_bucket *)(-ENOENT); | |
1151 | crush_bucket *ret = crush->buckets[pos]; | |
1152 | if (ret == NULL) | |
1153 | return (crush_bucket *)(-ENOENT); | |
1154 | return ret; | |
1155 | } | |
c07f9fc5 | 1156 | private: |
7c673cae FG |
1157 | crush_bucket *get_bucket(int id) { |
1158 | if (!crush) | |
1159 | return (crush_bucket *)(-EINVAL); | |
1160 | unsigned int pos = (unsigned int)(-1 - id); | |
1161 | unsigned int max_buckets = crush->max_buckets; | |
1162 | if (pos >= max_buckets) | |
1163 | return (crush_bucket *)(-ENOENT); | |
1164 | crush_bucket *ret = crush->buckets[pos]; | |
1165 | if (ret == NULL) | |
1166 | return (crush_bucket *)(-ENOENT); | |
1167 | return ret; | |
1168 | } | |
1169 | /** | |
1170 | * detach a bucket from its parent and adjust the parent weight | |
1171 | * | |
1172 | * returns the weight of the detached bucket | |
1173 | **/ | |
c07f9fc5 | 1174 | int detach_bucket(CephContext *cct, int item); |
7c673cae FG |
1175 | |
1176 | public: | |
1177 | int get_max_buckets() const { | |
1178 | if (!crush) return -EINVAL; | |
1179 | return crush->max_buckets; | |
1180 | } | |
1181 | int get_next_bucket_id() const { | |
1182 | if (!crush) return -EINVAL; | |
1183 | return crush_get_next_bucket_id(crush); | |
1184 | } | |
1185 | bool bucket_exists(int id) const { | |
1186 | const crush_bucket *b = get_bucket(id); | |
1187 | if (IS_ERR(b)) | |
1188 | return false; | |
1189 | return true; | |
1190 | } | |
1191 | int get_bucket_weight(int id) const { | |
1192 | const crush_bucket *b = get_bucket(id); | |
1193 | if (IS_ERR(b)) return PTR_ERR(b); | |
1194 | return b->weight; | |
1195 | } | |
1196 | float get_bucket_weightf(int id) const { | |
1197 | const crush_bucket *b = get_bucket(id); | |
1198 | if (IS_ERR(b)) return 0; | |
1199 | return b->weight / (float)0x10000; | |
1200 | } | |
1201 | int get_bucket_type(int id) const { | |
1202 | const crush_bucket *b = get_bucket(id); | |
1203 | if (IS_ERR(b)) return PTR_ERR(b); | |
1204 | return b->type; | |
1205 | } | |
1206 | int get_bucket_alg(int id) const { | |
1207 | const crush_bucket *b = get_bucket(id); | |
1208 | if (IS_ERR(b)) return PTR_ERR(b); | |
1209 | return b->alg; | |
1210 | } | |
1211 | int get_bucket_hash(int id) const { | |
1212 | const crush_bucket *b = get_bucket(id); | |
1213 | if (IS_ERR(b)) return PTR_ERR(b); | |
1214 | return b->hash; | |
1215 | } | |
1216 | int get_bucket_size(int id) const { | |
1217 | const crush_bucket *b = get_bucket(id); | |
1218 | if (IS_ERR(b)) return PTR_ERR(b); | |
1219 | return b->size; | |
1220 | } | |
1221 | int get_bucket_item(int id, int pos) const { | |
1222 | const crush_bucket *b = get_bucket(id); | |
1223 | if (IS_ERR(b)) return PTR_ERR(b); | |
1224 | if ((__u32)pos >= b->size) | |
1225 | return PTR_ERR(b); | |
1226 | return b->items[pos]; | |
1227 | } | |
1228 | int get_bucket_item_weight(int id, int pos) const { | |
1229 | const crush_bucket *b = get_bucket(id); | |
1230 | if (IS_ERR(b)) return PTR_ERR(b); | |
1231 | return crush_get_bucket_item_weight(b, pos); | |
1232 | } | |
1233 | float get_bucket_item_weightf(int id, int pos) const { | |
1234 | const crush_bucket *b = get_bucket(id); | |
1235 | if (IS_ERR(b)) return 0; | |
1236 | return (float)crush_get_bucket_item_weight(b, pos) / (float)0x10000; | |
1237 | } | |
1238 | ||
1239 | /* modifiers */ | |
1240 | int add_bucket(int bucketno, int alg, int hash, int type, int size, | |
c07f9fc5 | 1241 | int *items, int *weights, int *idout); |
31f18b77 FG |
1242 | int bucket_add_item(crush_bucket *bucket, int item, int weight); |
1243 | int bucket_remove_item(struct crush_bucket *bucket, int item); | |
1244 | int bucket_adjust_item_weight(CephContext *cct, struct crush_bucket *bucket, int item, int weight); | |
1245 | ||
7c673cae FG |
1246 | void finalize() { |
1247 | assert(crush); | |
1248 | crush_finalize(crush); | |
3efd9988 | 1249 | have_uniform_rules = !has_legacy_rule_ids(); |
7c673cae | 1250 | } |
3efd9988 | 1251 | int bucket_set_alg(int id, int alg); |
7c673cae | 1252 | |
224ce89b | 1253 | int update_device_class(int id, const string& class_name, const string& name, ostream *ss); |
c07f9fc5 | 1254 | int remove_device_class(CephContext *cct, int id, ostream *ss); |
d2e6a577 FG |
1255 | int device_class_clone( |
1256 | int original, int device_class, | |
1257 | const std::map<int32_t, map<int32_t, int32_t>>& old_class_bucket, | |
1258 | const std::set<int32_t>& used_ids, | |
35e4c445 FG |
1259 | int *clone, |
1260 | map<int,map<int,vector<int>>> *cmap_item_weight); | |
1261 | int rename_class(const string& srcname, const string& dstname); | |
d2e6a577 FG |
1262 | int populate_classes( |
1263 | const std::map<int32_t, map<int32_t, int32_t>>& old_class_bucket); | |
b5b8bbf5 | 1264 | int get_rules_by_class(const string &class_name, set<int> *rules); |
3efd9988 | 1265 | int get_rules_by_osd(int osd, set<int> *rules); |
d2e6a577 FG |
1266 | bool _class_is_dead(int class_id); |
1267 | void cleanup_dead_classes(); | |
7c673cae FG |
1268 | int rebuild_roots_with_classes(); |
1269 | /* remove unused roots generated for class devices */ | |
35e4c445 | 1270 | int trim_roots_with_class(); |
7c673cae FG |
1271 | |
1272 | void start_choose_profile() { | |
1273 | free(crush->choose_tries); | |
1274 | /* | |
1275 | * the original choose_total_tries value was off by one (it | |
1276 | * counted "retries" and not "tries"). add one to alloc. | |
1277 | */ | |
c07f9fc5 FG |
1278 | crush->choose_tries = (__u32 *)calloc(sizeof(*crush->choose_tries), |
1279 | (crush->choose_total_tries + 1)); | |
7c673cae FG |
1280 | memset(crush->choose_tries, 0, |
1281 | sizeof(*crush->choose_tries) * (crush->choose_total_tries + 1)); | |
1282 | } | |
1283 | void stop_choose_profile() { | |
1284 | free(crush->choose_tries); | |
1285 | crush->choose_tries = 0; | |
1286 | } | |
1287 | ||
1288 | int get_choose_profile(__u32 **vec) { | |
1289 | if (crush->choose_tries) { | |
1290 | *vec = crush->choose_tries; | |
1291 | return crush->choose_total_tries; | |
1292 | } | |
1293 | return 0; | |
1294 | } | |
1295 | ||
1296 | ||
1297 | void set_max_devices(int m) { | |
1298 | crush->max_devices = m; | |
1299 | } | |
1300 | ||
1301 | int find_rule(int ruleset, int type, int size) const { | |
1302 | if (!crush) return -1; | |
181888fb FG |
1303 | if (have_uniform_rules && |
1304 | ruleset < (int)crush->max_rules && | |
1305 | crush->rules[ruleset] && | |
1306 | crush->rules[ruleset]->mask.type == type && | |
1307 | crush->rules[ruleset]->mask.min_size <= size && | |
1308 | crush->rules[ruleset]->mask.max_size >= size) { | |
1309 | return ruleset; | |
31f18b77 | 1310 | } |
181888fb | 1311 | return crush_find_rule(crush, ruleset, type, size); |
7c673cae FG |
1312 | } |
1313 | ||
d2e6a577 | 1314 | bool ruleset_exists(const int ruleset) const { |
7c673cae FG |
1315 | for (size_t i = 0; i < crush->max_rules; ++i) { |
1316 | if (rule_exists(i) && crush->rules[i]->mask.ruleset == ruleset) { | |
1317 | return true; | |
1318 | } | |
1319 | } | |
1320 | ||
1321 | return false; | |
1322 | } | |
1323 | ||
1324 | /** | |
1325 | * Return the lowest numbered ruleset of type `type` | |
1326 | * | |
3efd9988 | 1327 | * @returns a ruleset ID, or -1 if no matching rules found. |
7c673cae FG |
1328 | */ |
1329 | int find_first_ruleset(int type) const { | |
1330 | int result = -1; | |
1331 | ||
1332 | for (size_t i = 0; i < crush->max_rules; ++i) { | |
1333 | if (crush->rules[i] | |
1334 | && crush->rules[i]->mask.type == type | |
1335 | && (crush->rules[i]->mask.ruleset < result || result == -1)) { | |
1336 | result = crush->rules[i]->mask.ruleset; | |
1337 | } | |
1338 | } | |
1339 | ||
1340 | return result; | |
1341 | } | |
1342 | ||
c07f9fc5 FG |
1343 | bool have_choose_args(int64_t choose_args_index) const { |
1344 | return choose_args.count(choose_args_index); | |
1345 | } | |
1346 | ||
1347 | crush_choose_arg_map choose_args_get_with_fallback( | |
1348 | int64_t choose_args_index) const { | |
1349 | auto i = choose_args.find(choose_args_index); | |
1350 | if (i == choose_args.end()) { | |
1351 | i = choose_args.find(DEFAULT_CHOOSE_ARGS); | |
1352 | } | |
1353 | if (i == choose_args.end()) { | |
1354 | crush_choose_arg_map arg_map; | |
1355 | arg_map.args = NULL; | |
1356 | arg_map.size = 0; | |
1357 | return arg_map; | |
1358 | } else { | |
1359 | return i->second; | |
1360 | } | |
1361 | } | |
1362 | crush_choose_arg_map choose_args_get(int64_t choose_args_index) const { | |
7c673cae FG |
1363 | auto i = choose_args.find(choose_args_index); |
1364 | if (i == choose_args.end()) { | |
1365 | crush_choose_arg_map arg_map; | |
1366 | arg_map.args = NULL; | |
1367 | arg_map.size = 0; | |
1368 | return arg_map; | |
1369 | } else { | |
1370 | return i->second; | |
1371 | } | |
1372 | } | |
1373 | ||
1374 | void destroy_choose_args(crush_choose_arg_map arg_map) { | |
1375 | for (__u32 i = 0; i < arg_map.size; i++) { | |
1376 | crush_choose_arg *arg = &arg_map.args[i]; | |
1377 | for (__u32 j = 0; j < arg->weight_set_size; j++) { | |
1378 | crush_weight_set *weight_set = &arg->weight_set[j]; | |
1379 | free(weight_set->weights); | |
1380 | } | |
1381 | if (arg->weight_set) | |
1382 | free(arg->weight_set); | |
1383 | if (arg->ids) | |
1384 | free(arg->ids); | |
1385 | } | |
1386 | free(arg_map.args); | |
1387 | } | |
c07f9fc5 FG |
1388 | |
1389 | void create_choose_args(int64_t id, int positions) { | |
1390 | if (choose_args.count(id)) | |
1391 | return; | |
1392 | assert(positions); | |
1393 | auto &cmap = choose_args[id]; | |
1394 | cmap.args = (crush_choose_arg*)calloc(sizeof(crush_choose_arg), | |
1395 | crush->max_buckets); | |
1396 | cmap.size = crush->max_buckets; | |
1397 | for (int bidx=0; bidx < crush->max_buckets; ++bidx) { | |
1398 | crush_bucket *b = crush->buckets[bidx]; | |
1399 | auto &carg = cmap.args[bidx]; | |
1400 | carg.ids = NULL; | |
1401 | carg.ids_size = 0; | |
1402 | if (b && b->alg == CRUSH_BUCKET_STRAW2) { | |
1403 | crush_bucket_straw2 *sb = (crush_bucket_straw2*)b; | |
1404 | carg.weight_set_size = positions; | |
1405 | carg.weight_set = (crush_weight_set*)calloc(sizeof(crush_weight_set), | |
1406 | carg.weight_set_size); | |
1407 | // initialize with canonical weights | |
1408 | for (int pos = 0; pos < positions; ++pos) { | |
1409 | carg.weight_set[pos].size = b->size; | |
1410 | carg.weight_set[pos].weights = (__u32*)calloc(4, b->size); | |
1411 | for (unsigned i = 0; i < b->size; ++i) { | |
1412 | carg.weight_set[pos].weights[i] = sb->item_weights[i]; | |
1413 | } | |
1414 | } | |
1415 | } else { | |
1416 | carg.weight_set = NULL; | |
1417 | carg.weight_set_size = 0; | |
1418 | } | |
1419 | } | |
1420 | } | |
1421 | ||
1422 | void rm_choose_args(int64_t id) { | |
1423 | auto p = choose_args.find(id); | |
1424 | if (p != choose_args.end()) { | |
1425 | destroy_choose_args(p->second); | |
1426 | choose_args.erase(p); | |
1427 | } | |
1428 | } | |
1429 | ||
7c673cae FG |
1430 | void choose_args_clear() { |
1431 | for (auto w : choose_args) | |
1432 | destroy_choose_args(w.second); | |
1433 | choose_args.clear(); | |
1434 | } | |
1435 | ||
c07f9fc5 FG |
1436 | // adjust choose_args_map weight, preserving the hierarchical summation |
1437 | // property. used by callers optimizing layouts by tweaking weights. | |
1438 | int _choose_args_adjust_item_weight_in_bucket( | |
1439 | CephContext *cct, | |
1440 | crush_choose_arg_map cmap, | |
1441 | int bucketid, | |
1442 | int id, | |
1443 | const vector<int>& weight, | |
1444 | ostream *ss); | |
1445 | int choose_args_adjust_item_weight( | |
1446 | CephContext *cct, | |
1447 | crush_choose_arg_map cmap, | |
1448 | int id, const vector<int>& weight, | |
1449 | ostream *ss); | |
1450 | int choose_args_adjust_item_weightf( | |
1451 | CephContext *cct, | |
1452 | crush_choose_arg_map cmap, | |
1453 | int id, const vector<double>& weightf, | |
1454 | ostream *ss) { | |
1455 | vector<int> weight(weightf.size()); | |
1456 | for (unsigned i = 0; i < weightf.size(); ++i) { | |
1457 | weight[i] = (int)(weightf[i] * (float)0x10000); | |
1458 | } | |
1459 | return choose_args_adjust_item_weight(cct, cmap, id, weight, ss); | |
1460 | } | |
1461 | ||
1462 | int get_choose_args_positions(crush_choose_arg_map cmap) { | |
1463 | // infer positions from other buckets | |
1464 | for (unsigned j = 0; j < cmap.size; ++j) { | |
1465 | if (cmap.args[j].weight_set_size) { | |
1466 | return cmap.args[j].weight_set_size; | |
1467 | } | |
1468 | } | |
1469 | return 1; | |
1470 | } | |
1471 | ||
7c673cae FG |
1472 | template<typename WeightVector> |
1473 | void do_rule(int rule, int x, vector<int>& out, int maxout, | |
1474 | const WeightVector& weight, | |
1475 | uint64_t choose_args_index) const { | |
1476 | int rawout[maxout]; | |
1477 | char work[crush_work_size(crush, maxout)]; | |
1478 | crush_init_workspace(crush, work); | |
c07f9fc5 FG |
1479 | crush_choose_arg_map arg_map = choose_args_get_with_fallback( |
1480 | choose_args_index); | |
7c673cae FG |
1481 | int numrep = crush_do_rule(crush, rule, x, rawout, maxout, &weight[0], |
1482 | weight.size(), work, arg_map.args); | |
1483 | if (numrep < 0) | |
1484 | numrep = 0; | |
1485 | out.resize(numrep); | |
1486 | for (int i=0; i<numrep; i++) | |
1487 | out[i] = rawout[i]; | |
1488 | } | |
1489 | ||
1490 | int _choose_type_stack( | |
1491 | CephContext *cct, | |
1492 | const vector<pair<int,int>>& stack, | |
1493 | const set<int>& overfull, | |
1494 | const vector<int>& underfull, | |
1495 | const vector<int>& orig, | |
1496 | vector<int>::const_iterator& i, | |
1497 | set<int>& used, | |
1498 | vector<int> *pw) const; | |
1499 | ||
1500 | int try_remap_rule( | |
1501 | CephContext *cct, | |
1502 | int rule, | |
1503 | int maxout, | |
1504 | const set<int>& overfull, | |
1505 | const vector<int>& underfull, | |
1506 | const vector<int>& orig, | |
1507 | vector<int> *out) const; | |
1508 | ||
1509 | bool check_crush_rule(int ruleset, int type, int size, ostream& ss) { | |
1510 | assert(crush); | |
1511 | ||
1512 | __u32 i; | |
1513 | for (i = 0; i < crush->max_rules; i++) { | |
1514 | if (crush->rules[i] && | |
1515 | crush->rules[i]->mask.ruleset == ruleset && | |
1516 | crush->rules[i]->mask.type == type) { | |
1517 | ||
1518 | if (crush->rules[i]->mask.min_size <= size && | |
1519 | crush->rules[i]->mask.max_size >= size) { | |
1520 | return true; | |
1521 | } else if (size < crush->rules[i]->mask.min_size) { | |
1522 | ss << "pool size is smaller than the crush rule min size"; | |
1523 | return false; | |
1524 | } else { | |
1525 | ss << "pool size is bigger than the crush rule max size"; | |
1526 | return false; | |
1527 | } | |
1528 | } | |
1529 | } | |
1530 | ||
1531 | return false; | |
1532 | } | |
1533 | ||
1534 | void encode(bufferlist &bl, uint64_t features) const; | |
1535 | void decode(bufferlist::iterator &blp); | |
1536 | void decode_crush_bucket(crush_bucket** bptr, bufferlist::iterator &blp); | |
1537 | void dump(Formatter *f) const; | |
1538 | void dump_rules(Formatter *f) const; | |
1539 | void dump_rule(int ruleset, Formatter *f) const; | |
1540 | void dump_tunables(Formatter *f) const; | |
1541 | void dump_choose_args(Formatter *f) const; | |
1542 | void list_rules(Formatter *f) const; | |
c07f9fc5 FG |
1543 | void list_rules(ostream *ss) const; |
1544 | void dump_tree(ostream *out, | |
1545 | Formatter *f, | |
1546 | const CrushTreeDumper::name_map_t& ws, | |
1547 | bool show_shadow = false) const; | |
1548 | void dump_tree(ostream *out, Formatter *f) { | |
1549 | dump_tree(out, f, CrushTreeDumper::name_map_t()); | |
1550 | } | |
1551 | void dump_tree(Formatter *f, | |
1552 | const CrushTreeDumper::name_map_t& ws) const; | |
7c673cae FG |
1553 | static void generate_test_instances(list<CrushWrapper*>& o); |
1554 | ||
7c673cae FG |
1555 | int get_osd_pool_default_crush_replicated_ruleset(CephContext *cct); |
1556 | ||
1557 | static bool is_valid_crush_name(const string& s); | |
1558 | static bool is_valid_crush_loc(CephContext *cct, | |
1559 | const map<string,string>& loc); | |
1560 | }; | |
1561 | WRITE_CLASS_ENCODER_FEATURES(CrushWrapper) | |
1562 | ||
1563 | #endif |