]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #ifndef CEPH_CRUSH_WRAPPER_H | |
5 | #define CEPH_CRUSH_WRAPPER_H | |
6 | ||
7 | #include <stdlib.h> | |
8 | #include <map> | |
9 | #include <set> | |
10 | #include <string> | |
11 | ||
12 | #include <iosfwd> | |
13 | ||
14 | #include "include/types.h" | |
15 | ||
16 | extern "C" { | |
17 | #include "crush.h" | |
18 | #include "hash.h" | |
19 | #include "mapper.h" | |
20 | #include "builder.h" | |
21 | } | |
22 | ||
31f18b77 | 23 | #include "include/assert.h" |
7c673cae FG |
24 | #include "include/err.h" |
25 | #include "include/encoding.h" | |
26 | ||
27 | ||
28 | #include "common/Mutex.h" | |
29 | ||
7c673cae FG |
30 | #define BUG_ON(x) assert(!(x)) |
31 | ||
32 | namespace ceph { | |
33 | class Formatter; | |
34 | } | |
35 | ||
36 | WRITE_RAW_ENCODER(crush_rule_mask) // it's all u8's | |
37 | ||
38 | inline static void encode(const crush_rule_step &s, bufferlist &bl) | |
39 | { | |
40 | ::encode(s.op, bl); | |
41 | ::encode(s.arg1, bl); | |
42 | ::encode(s.arg2, bl); | |
43 | } | |
44 | inline static void decode(crush_rule_step &s, bufferlist::iterator &p) | |
45 | { | |
46 | ::decode(s.op, p); | |
47 | ::decode(s.arg1, p); | |
48 | ::decode(s.arg2, p); | |
49 | } | |
50 | ||
51 | using namespace std; | |
52 | class CrushWrapper { | |
53 | public: | |
54 | std::map<int32_t, string> type_map; /* bucket/device type names */ | |
55 | std::map<int32_t, string> name_map; /* bucket/device names */ | |
56 | std::map<int32_t, string> rule_name_map; | |
57 | std::map<int32_t, int32_t> class_map; /* item id -> class id */ | |
58 | std::map<int32_t, string> class_name; /* class id -> class name */ | |
59 | std::map<string, int32_t> class_rname; /* class name -> class id */ | |
60 | std::map<int32_t, map<int32_t, int32_t> > class_bucket; /* bucket[id][class] == id */ | |
61 | std::map<uint64_t, crush_choose_arg_map> choose_args; | |
62 | ||
63 | private: | |
64 | struct crush_map *crush; | |
31f18b77 FG |
65 | |
66 | bool have_uniform_rules = false; | |
67 | ||
7c673cae FG |
68 | /* reverse maps */ |
69 | mutable bool have_rmaps; | |
70 | mutable std::map<string, int> type_rmap, name_rmap, rule_name_rmap; | |
71 | void build_rmaps() const { | |
72 | if (have_rmaps) return; | |
73 | build_rmap(type_map, type_rmap); | |
74 | build_rmap(name_map, name_rmap); | |
75 | build_rmap(rule_name_map, rule_name_rmap); | |
76 | have_rmaps = true; | |
77 | } | |
78 | void build_rmap(const map<int, string> &f, std::map<string, int> &r) const { | |
79 | r.clear(); | |
80 | for (std::map<int, string>::const_iterator p = f.begin(); p != f.end(); ++p) | |
81 | r[p->second] = p->first; | |
82 | } | |
83 | ||
84 | public: | |
85 | CrushWrapper(const CrushWrapper& other); | |
86 | const CrushWrapper& operator=(const CrushWrapper& other); | |
87 | ||
88 | CrushWrapper() : crush(0), have_rmaps(false) { | |
89 | create(); | |
90 | } | |
91 | ~CrushWrapper() { | |
92 | if (crush) | |
93 | crush_destroy(crush); | |
94 | choose_args_clear(); | |
95 | } | |
96 | ||
97 | crush_map *get_crush_map() { return crush; } | |
98 | ||
99 | /* building */ | |
100 | void create() { | |
101 | if (crush) | |
102 | crush_destroy(crush); | |
103 | crush = crush_create(); | |
104 | choose_args_clear(); | |
105 | assert(crush); | |
106 | have_rmaps = false; | |
107 | ||
108 | set_tunables_default(); | |
109 | } | |
110 | ||
31f18b77 FG |
111 | /// true if any rule has a ruleset != the rule id |
112 | bool has_legacy_rulesets() const; | |
113 | ||
114 | /// fix rules whose ruleid != ruleset | |
115 | int renumber_rules_by_ruleset(); | |
116 | ||
117 | /// true if any ruleset has more than 1 rule | |
118 | bool has_multirule_rulesets() const; | |
119 | ||
7c673cae FG |
120 | // tunables |
121 | void set_tunables_argonaut() { | |
122 | crush->choose_local_tries = 2; | |
123 | crush->choose_local_fallback_tries = 5; | |
124 | crush->choose_total_tries = 19; | |
125 | crush->chooseleaf_descend_once = 0; | |
126 | crush->chooseleaf_vary_r = 0; | |
127 | crush->chooseleaf_stable = 0; | |
128 | crush->allowed_bucket_algs = CRUSH_LEGACY_ALLOWED_BUCKET_ALGS; | |
129 | } | |
130 | void set_tunables_bobtail() { | |
131 | crush->choose_local_tries = 0; | |
132 | crush->choose_local_fallback_tries = 0; | |
133 | crush->choose_total_tries = 50; | |
134 | crush->chooseleaf_descend_once = 1; | |
135 | crush->chooseleaf_vary_r = 0; | |
136 | crush->chooseleaf_stable = 0; | |
137 | crush->allowed_bucket_algs = CRUSH_LEGACY_ALLOWED_BUCKET_ALGS; | |
138 | } | |
139 | void set_tunables_firefly() { | |
140 | crush->choose_local_tries = 0; | |
141 | crush->choose_local_fallback_tries = 0; | |
142 | crush->choose_total_tries = 50; | |
143 | crush->chooseleaf_descend_once = 1; | |
144 | crush->chooseleaf_vary_r = 1; | |
145 | crush->chooseleaf_stable = 0; | |
146 | crush->allowed_bucket_algs = CRUSH_LEGACY_ALLOWED_BUCKET_ALGS; | |
147 | } | |
148 | void set_tunables_hammer() { | |
149 | crush->choose_local_tries = 0; | |
150 | crush->choose_local_fallback_tries = 0; | |
151 | crush->choose_total_tries = 50; | |
152 | crush->chooseleaf_descend_once = 1; | |
153 | crush->chooseleaf_vary_r = 1; | |
154 | crush->chooseleaf_stable = 0; | |
155 | crush->allowed_bucket_algs = | |
156 | (1 << CRUSH_BUCKET_UNIFORM) | | |
157 | (1 << CRUSH_BUCKET_LIST) | | |
158 | (1 << CRUSH_BUCKET_STRAW) | | |
159 | (1 << CRUSH_BUCKET_STRAW2); | |
160 | } | |
161 | void set_tunables_jewel() { | |
162 | crush->choose_local_tries = 0; | |
163 | crush->choose_local_fallback_tries = 0; | |
164 | crush->choose_total_tries = 50; | |
165 | crush->chooseleaf_descend_once = 1; | |
166 | crush->chooseleaf_vary_r = 1; | |
167 | crush->chooseleaf_stable = 1; | |
168 | crush->allowed_bucket_algs = | |
169 | (1 << CRUSH_BUCKET_UNIFORM) | | |
170 | (1 << CRUSH_BUCKET_LIST) | | |
171 | (1 << CRUSH_BUCKET_STRAW) | | |
172 | (1 << CRUSH_BUCKET_STRAW2); | |
173 | } | |
174 | ||
175 | void set_tunables_legacy() { | |
176 | set_tunables_argonaut(); | |
177 | crush->straw_calc_version = 0; | |
178 | } | |
179 | void set_tunables_optimal() { | |
180 | set_tunables_jewel(); | |
181 | crush->straw_calc_version = 1; | |
182 | } | |
183 | void set_tunables_default() { | |
31f18b77 | 184 | set_tunables_jewel(); |
7c673cae FG |
185 | crush->straw_calc_version = 1; |
186 | } | |
187 | ||
188 | int get_choose_local_tries() const { | |
189 | return crush->choose_local_tries; | |
190 | } | |
191 | void set_choose_local_tries(int n) { | |
192 | crush->choose_local_tries = n; | |
193 | } | |
194 | ||
195 | int get_choose_local_fallback_tries() const { | |
196 | return crush->choose_local_fallback_tries; | |
197 | } | |
198 | void set_choose_local_fallback_tries(int n) { | |
199 | crush->choose_local_fallback_tries = n; | |
200 | } | |
201 | ||
202 | int get_choose_total_tries() const { | |
203 | return crush->choose_total_tries; | |
204 | } | |
205 | void set_choose_total_tries(int n) { | |
206 | crush->choose_total_tries = n; | |
207 | } | |
208 | ||
209 | int get_chooseleaf_descend_once() const { | |
210 | return crush->chooseleaf_descend_once; | |
211 | } | |
212 | void set_chooseleaf_descend_once(int n) { | |
213 | crush->chooseleaf_descend_once = !!n; | |
214 | } | |
215 | ||
216 | int get_chooseleaf_vary_r() const { | |
217 | return crush->chooseleaf_vary_r; | |
218 | } | |
219 | void set_chooseleaf_vary_r(int n) { | |
220 | crush->chooseleaf_vary_r = n; | |
221 | } | |
222 | ||
223 | int get_chooseleaf_stable() const { | |
224 | return crush->chooseleaf_stable; | |
225 | } | |
226 | void set_chooseleaf_stable(int n) { | |
227 | crush->chooseleaf_stable = n; | |
228 | } | |
229 | ||
230 | int get_straw_calc_version() const { | |
231 | return crush->straw_calc_version; | |
232 | } | |
233 | void set_straw_calc_version(int n) { | |
234 | crush->straw_calc_version = n; | |
235 | } | |
236 | ||
237 | unsigned get_allowed_bucket_algs() const { | |
238 | return crush->allowed_bucket_algs; | |
239 | } | |
240 | void set_allowed_bucket_algs(unsigned n) { | |
241 | crush->allowed_bucket_algs = n; | |
242 | } | |
243 | ||
244 | bool has_argonaut_tunables() const { | |
245 | return | |
246 | crush->choose_local_tries == 2 && | |
247 | crush->choose_local_fallback_tries == 5 && | |
248 | crush->choose_total_tries == 19 && | |
249 | crush->chooseleaf_descend_once == 0 && | |
250 | crush->chooseleaf_vary_r == 0 && | |
251 | crush->chooseleaf_stable == 0 && | |
252 | crush->allowed_bucket_algs == CRUSH_LEGACY_ALLOWED_BUCKET_ALGS; | |
253 | } | |
254 | bool has_bobtail_tunables() const { | |
255 | return | |
256 | crush->choose_local_tries == 0 && | |
257 | crush->choose_local_fallback_tries == 0 && | |
258 | crush->choose_total_tries == 50 && | |
259 | crush->chooseleaf_descend_once == 1 && | |
260 | crush->chooseleaf_vary_r == 0 && | |
261 | crush->chooseleaf_stable == 0 && | |
262 | crush->allowed_bucket_algs == CRUSH_LEGACY_ALLOWED_BUCKET_ALGS; | |
263 | } | |
264 | bool has_firefly_tunables() const { | |
265 | return | |
266 | crush->choose_local_tries == 0 && | |
267 | crush->choose_local_fallback_tries == 0 && | |
268 | crush->choose_total_tries == 50 && | |
269 | crush->chooseleaf_descend_once == 1 && | |
270 | crush->chooseleaf_vary_r == 1 && | |
271 | crush->chooseleaf_stable == 0 && | |
272 | crush->allowed_bucket_algs == CRUSH_LEGACY_ALLOWED_BUCKET_ALGS; | |
273 | } | |
274 | bool has_hammer_tunables() const { | |
275 | return | |
276 | crush->choose_local_tries == 0 && | |
277 | crush->choose_local_fallback_tries == 0 && | |
278 | crush->choose_total_tries == 50 && | |
279 | crush->chooseleaf_descend_once == 1 && | |
280 | crush->chooseleaf_vary_r == 1 && | |
281 | crush->chooseleaf_stable == 0 && | |
282 | crush->allowed_bucket_algs == ((1 << CRUSH_BUCKET_UNIFORM) | | |
283 | (1 << CRUSH_BUCKET_LIST) | | |
284 | (1 << CRUSH_BUCKET_STRAW) | | |
285 | (1 << CRUSH_BUCKET_STRAW2)); | |
286 | } | |
287 | bool has_jewel_tunables() const { | |
288 | return | |
289 | crush->choose_local_tries == 0 && | |
290 | crush->choose_local_fallback_tries == 0 && | |
291 | crush->choose_total_tries == 50 && | |
292 | crush->chooseleaf_descend_once == 1 && | |
293 | crush->chooseleaf_vary_r == 1 && | |
294 | crush->chooseleaf_stable == 1 && | |
295 | crush->allowed_bucket_algs == ((1 << CRUSH_BUCKET_UNIFORM) | | |
296 | (1 << CRUSH_BUCKET_LIST) | | |
297 | (1 << CRUSH_BUCKET_STRAW) | | |
298 | (1 << CRUSH_BUCKET_STRAW2)); | |
299 | } | |
300 | ||
301 | bool has_optimal_tunables() const { | |
302 | return has_jewel_tunables(); | |
303 | } | |
304 | bool has_legacy_tunables() const { | |
305 | return has_argonaut_tunables(); | |
306 | } | |
307 | ||
308 | bool has_nondefault_tunables() const { | |
309 | return | |
310 | (crush->choose_local_tries != 2 || | |
311 | crush->choose_local_fallback_tries != 5 || | |
312 | crush->choose_total_tries != 19); | |
313 | } | |
314 | bool has_nondefault_tunables2() const { | |
315 | return | |
316 | crush->chooseleaf_descend_once != 0; | |
317 | } | |
318 | bool has_nondefault_tunables3() const { | |
319 | return | |
320 | crush->chooseleaf_vary_r != 0; | |
321 | } | |
322 | bool has_nondefault_tunables5() const { | |
323 | return | |
324 | crush->chooseleaf_stable != 0; | |
325 | } | |
326 | ||
327 | bool has_v2_rules() const; | |
328 | bool has_v3_rules() const; | |
329 | bool has_v4_buckets() const; | |
330 | bool has_v5_rules() const; | |
31f18b77 FG |
331 | bool has_choose_args() const; // any choose_args |
332 | bool has_incompat_choose_args() const; // choose_args that can't be made compat | |
7c673cae FG |
333 | |
334 | bool is_v2_rule(unsigned ruleid) const; | |
335 | bool is_v3_rule(unsigned ruleid) const; | |
336 | bool is_v5_rule(unsigned ruleid) const; | |
337 | ||
338 | string get_min_required_version() const { | |
339 | if (has_v5_rules() || has_nondefault_tunables5()) | |
340 | return "jewel"; | |
341 | else if (has_v4_buckets()) | |
342 | return "hammer"; | |
343 | else if (has_nondefault_tunables3()) | |
344 | return "firefly"; | |
345 | else if (has_nondefault_tunables2() || has_nondefault_tunables()) | |
346 | return "bobtail"; | |
347 | else | |
348 | return "argonaut"; | |
349 | } | |
350 | ||
351 | // default bucket types | |
352 | unsigned get_default_bucket_alg() const { | |
353 | // in order of preference | |
354 | if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_STRAW2)) | |
355 | return CRUSH_BUCKET_STRAW2; | |
356 | if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_STRAW)) | |
357 | return CRUSH_BUCKET_STRAW; | |
358 | if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_TREE)) | |
359 | return CRUSH_BUCKET_TREE; | |
360 | if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_LIST)) | |
361 | return CRUSH_BUCKET_LIST; | |
362 | if (crush->allowed_bucket_algs & (1 << CRUSH_BUCKET_UNIFORM)) | |
363 | return CRUSH_BUCKET_UNIFORM; | |
364 | return 0; | |
365 | } | |
366 | ||
367 | // bucket types | |
368 | int get_num_type_names() const { | |
369 | return type_map.size(); | |
370 | } | |
31f18b77 FG |
371 | int get_max_type_id() const { |
372 | if (type_map.empty()) | |
373 | return 0; | |
374 | return type_map.rbegin()->first; | |
375 | } | |
7c673cae FG |
376 | int get_type_id(const string& name) const { |
377 | build_rmaps(); | |
378 | if (type_rmap.count(name)) | |
379 | return type_rmap[name]; | |
380 | return -1; | |
381 | } | |
382 | const char *get_type_name(int t) const { | |
383 | std::map<int,string>::const_iterator p = type_map.find(t); | |
384 | if (p != type_map.end()) | |
385 | return p->second.c_str(); | |
386 | return 0; | |
387 | } | |
388 | void set_type_name(int i, const string& name) { | |
389 | type_map[i] = name; | |
390 | if (have_rmaps) | |
391 | type_rmap[name] = i; | |
392 | } | |
393 | ||
394 | // item/bucket names | |
395 | bool name_exists(const string& name) const { | |
396 | build_rmaps(); | |
397 | return name_rmap.count(name); | |
398 | } | |
399 | bool item_exists(int i) const { | |
400 | return name_map.count(i); | |
401 | } | |
402 | int get_item_id(const string& name) const { | |
403 | build_rmaps(); | |
404 | if (name_rmap.count(name)) | |
405 | return name_rmap[name]; | |
406 | return 0; /* hrm */ | |
407 | } | |
408 | const char *get_item_name(int t) const { | |
409 | std::map<int,string>::const_iterator p = name_map.find(t); | |
410 | if (p != name_map.end()) | |
411 | return p->second.c_str(); | |
412 | return 0; | |
413 | } | |
414 | int set_item_name(int i, const string& name) { | |
415 | if (!is_valid_crush_name(name)) | |
416 | return -EINVAL; | |
417 | name_map[i] = name; | |
418 | if (have_rmaps) | |
419 | name_rmap[name] = i; | |
420 | return 0; | |
421 | } | |
31f18b77 FG |
422 | void swap_names(int a, int b) { |
423 | string an = name_map[a]; | |
424 | string bn = name_map[b]; | |
425 | name_map[a] = bn; | |
426 | name_map[b] = an; | |
427 | if (have_rmaps) { | |
428 | name_rmap[an] = b; | |
429 | name_rmap[bn] = a; | |
430 | } | |
431 | } | |
7c673cae FG |
432 | bool id_has_class(int i) { |
433 | int idout; | |
434 | int classout; | |
435 | if (split_id_class(i, &idout, &classout) != 0) | |
436 | return false; | |
437 | return classout != -1; | |
438 | } | |
439 | int split_id_class(int i, int *idout, int *classout) const; | |
440 | ||
441 | bool class_exists(const string& name) const { | |
442 | return class_rname.count(name); | |
443 | } | |
444 | const char *get_class_name(int i) const { | |
224ce89b | 445 | auto p = class_name.find(i); |
7c673cae FG |
446 | if (p != class_name.end()) |
447 | return p->second.c_str(); | |
448 | return 0; | |
449 | } | |
450 | int get_class_id(const string& name) const { | |
224ce89b | 451 | auto p = class_rname.find(name); |
7c673cae FG |
452 | if (p != class_rname.end()) |
453 | return p->second; | |
454 | else | |
455 | return -EINVAL; | |
456 | } | |
457 | int remove_class_name(const string& name) { | |
224ce89b | 458 | auto p = class_rname.find(name); |
7c673cae FG |
459 | if (p == class_rname.end()) |
460 | return -ENOENT; | |
461 | int class_id = p->second; | |
224ce89b | 462 | auto q = class_name.find(class_id); |
7c673cae FG |
463 | if (q == class_name.end()) |
464 | return -ENOENT; | |
465 | class_rname.erase(name); | |
466 | class_name.erase(class_id); | |
467 | return 0; | |
468 | } | |
224ce89b WB |
469 | |
470 | int rename_class(const string& srcname, const string& dstname) { | |
471 | auto p = class_rname.find(srcname); | |
472 | if (p == class_rname.end()) | |
473 | return -ENOENT; | |
474 | int class_id = p->second; | |
475 | auto q = class_name.find(class_id); | |
476 | if (q == class_name.end()) | |
477 | return -ENOENT; | |
478 | class_rname.erase(srcname); | |
479 | class_name.erase(class_id); | |
480 | class_rname[dstname] = class_id; | |
481 | class_name[class_id] = dstname; | |
482 | return 0; | |
483 | } | |
484 | ||
485 | int32_t _alloc_class_id() const; | |
486 | ||
7c673cae FG |
487 | int get_or_create_class_id(const string& name) { |
488 | int c = get_class_id(name); | |
489 | if (c < 0) { | |
224ce89b | 490 | int i = _alloc_class_id(); |
7c673cae FG |
491 | class_name[i] = name; |
492 | class_rname[name] = i; | |
493 | return i; | |
494 | } else { | |
495 | return c; | |
496 | } | |
497 | } | |
498 | ||
499 | const char *get_item_class(int t) const { | |
500 | std::map<int,int>::const_iterator p = class_map.find(t); | |
501 | if (p == class_map.end()) | |
502 | return 0; | |
503 | return get_class_name(p->second); | |
504 | } | |
505 | int set_item_class(int i, const string& name) { | |
506 | if (!is_valid_crush_name(name)) | |
507 | return -EINVAL; | |
508 | class_map[i] = get_or_create_class_id(name); | |
509 | return 0; | |
510 | } | |
511 | int set_item_class(int i, int c) { | |
512 | class_map[i] = c; | |
513 | return c; | |
514 | } | |
224ce89b WB |
515 | void get_devices_by_class(const string &name, set<int> *devices) const { |
516 | assert(devices); | |
517 | devices->clear(); | |
518 | if (!class_exists(name)) { | |
519 | return; | |
520 | } | |
521 | auto cid = get_class_id(name); | |
522 | for (auto& p : class_map) { | |
523 | if (p.first >= 0 && p.second == cid) { | |
524 | devices->insert(p.first); | |
525 | } | |
526 | } | |
527 | } | |
528 | void class_remove_item(int i) { | |
529 | auto it = class_map.find(i); | |
530 | if (it == class_map.end()) { | |
531 | return; | |
532 | } | |
533 | class_map.erase(it); | |
534 | } | |
7c673cae FG |
535 | int can_rename_item(const string& srcname, |
536 | const string& dstname, | |
537 | ostream *ss) const; | |
538 | int rename_item(const string& srcname, | |
539 | const string& dstname, | |
540 | ostream *ss); | |
541 | int can_rename_bucket(const string& srcname, | |
542 | const string& dstname, | |
543 | ostream *ss) const; | |
544 | int rename_bucket(const string& srcname, | |
545 | const string& dstname, | |
546 | ostream *ss); | |
547 | ||
548 | // rule names | |
549 | bool rule_exists(string name) const { | |
550 | build_rmaps(); | |
551 | return rule_name_rmap.count(name); | |
552 | } | |
553 | int get_rule_id(string name) const { | |
554 | build_rmaps(); | |
555 | if (rule_name_rmap.count(name)) | |
556 | return rule_name_rmap[name]; | |
557 | return -ENOENT; | |
558 | } | |
559 | const char *get_rule_name(int t) const { | |
560 | std::map<int,string>::const_iterator p = rule_name_map.find(t); | |
561 | if (p != rule_name_map.end()) | |
562 | return p->second.c_str(); | |
563 | return 0; | |
564 | } | |
565 | void set_rule_name(int i, const string& name) { | |
566 | rule_name_map[i] = name; | |
567 | if (have_rmaps) | |
568 | rule_name_rmap[name] = i; | |
569 | } | |
570 | ||
571 | ||
572 | /** | |
573 | * find tree nodes referenced by rules by a 'take' command | |
574 | * | |
575 | * Note that these may not be parentless roots. | |
576 | */ | |
577 | void find_takes(set<int>& roots) const; | |
578 | ||
579 | /** | |
580 | * find tree roots | |
581 | * | |
582 | * These are parentless nodes in the map. | |
583 | */ | |
584 | void find_roots(set<int>& roots) const; | |
585 | ||
224ce89b WB |
586 | /** |
587 | * find tree roots that are not shadow (device class) items | |
588 | * | |
589 | * These are parentless nodes in the map that are not shadow | |
590 | * items for device classes. | |
591 | */ | |
592 | void find_nonshadow_roots(set<int>& roots) const; | |
593 | ||
7c673cae FG |
594 | /** |
595 | * see if an item is contained within a subtree | |
596 | * | |
597 | * @param root haystack | |
598 | * @param item needle | |
599 | * @return true if the item is located beneath the given node | |
600 | */ | |
601 | bool subtree_contains(int root, int item) const; | |
602 | ||
603 | private: | |
604 | /** | |
605 | * search for an item in any bucket | |
606 | * | |
607 | * @param i item | |
608 | * @return true if present | |
609 | */ | |
610 | bool _search_item_exists(int i) const; | |
611 | public: | |
612 | ||
613 | /** | |
614 | * see if item is located where we think it is | |
615 | * | |
616 | * This verifies that the given item is located at a particular | |
617 | * location in the hierarchy. However, that check is imprecise; we | |
618 | * are actually verifying that the most specific location key/value | |
619 | * is correct. For example, if loc specifies that rack=foo and | |
620 | * host=bar, it will verify that host=bar is correct; any placement | |
621 | * above that level in the hierarchy is ignored. This matches the | |
622 | * semantics for insert_item(). | |
623 | * | |
624 | * @param cct cct | |
625 | * @param item item id | |
626 | * @param loc location to check (map of type to bucket names) | |
627 | * @param weight optional pointer to weight of item at that location | |
628 | * @return true if item is at specified location | |
629 | */ | |
630 | bool check_item_loc(CephContext *cct, int item, const map<string,string>& loc, int *iweight); | |
631 | bool check_item_loc(CephContext *cct, int item, const map<string,string>& loc, float *weight) { | |
632 | int iweight; | |
633 | bool ret = check_item_loc(cct, item, loc, &iweight); | |
634 | if (weight) | |
635 | *weight = (float)iweight / (float)0x10000; | |
636 | return ret; | |
637 | } | |
638 | ||
639 | ||
640 | /** | |
641 | * returns the (type, name) of the parent bucket of id | |
642 | * | |
643 | * FIXME: ambiguous for items that occur multiple times in the map | |
644 | */ | |
645 | pair<string,string> get_immediate_parent(int id, int *ret = NULL); | |
646 | int get_immediate_parent_id(int id, int *parent) const; | |
647 | ||
31f18b77 FG |
648 | /** |
649 | * return ancestor of the given type, or 0 if none | |
650 | * (parent is always a bucket and thus <0) | |
651 | */ | |
652 | int get_parent_of_type(int id, int type) const; | |
653 | ||
7c673cae FG |
654 | /** |
655 | * get the fully qualified location of a device by successively finding | |
656 | * parents beginning at ID and ending at highest type number specified in | |
657 | * the CRUSH map which assumes that if device foo is under device bar, the | |
658 | * type_id of foo < bar where type_id is the integer specified in the CRUSH map | |
659 | * | |
660 | * returns the location in the form of (type=foo) where type is a type of bucket | |
661 | * specified in the CRUSH map and foo is a name specified in the CRUSH map | |
662 | */ | |
663 | map<string, string> get_full_location(int id); | |
664 | ||
665 | /* | |
666 | * identical to get_full_location(int id) although it returns the type/name | |
667 | * pairs in the order they occur in the hierarchy. | |
668 | * | |
669 | * returns -ENOENT if id is not found. | |
670 | */ | |
671 | int get_full_location_ordered(int id, vector<pair<string, string> >& path); | |
672 | ||
31f18b77 FG |
673 | /* |
674 | * identical to get_full_location_ordered(int id, vector<pair<string, string> >& path), | |
675 | * although it returns a concatenated string with the type/name pairs in descending | |
676 | * hierarchical order with format key1=val1,key2=val2. | |
677 | * | |
678 | * returns the location in descending hierarchy as a string. | |
679 | */ | |
680 | string get_full_location_ordered_string(int id); | |
681 | ||
7c673cae FG |
682 | /** |
683 | * returns (type_id, type) of all parent buckets between id and | |
684 | * default, can be used to check for anomolous CRUSH maps | |
685 | */ | |
686 | map<int, string> get_parent_hierarchy(int id); | |
687 | ||
688 | /** | |
689 | * enumerate immediate children of given node | |
690 | * | |
691 | * @param id parent bucket or device id | |
692 | * @return number of items, or error | |
693 | */ | |
694 | int get_children(int id, list<int> *children); | |
695 | ||
31f18b77 FG |
696 | /** |
697 | * enumerate leaves(devices) of given node | |
698 | * | |
699 | * @param name parent bucket name | |
700 | * @return 0 on success or a negative errno on error. | |
701 | */ | |
702 | int get_leaves(const string &name, set<int> *leaves); | |
703 | int _get_leaves(int id, list<int> *leaves); // worker | |
704 | ||
7c673cae FG |
705 | /** |
706 | * insert an item into the map at a specific position | |
707 | * | |
708 | * Add an item as a specific location of the hierarchy. | |
709 | * Specifically, we look for the most specific location constraint | |
710 | * for which a bucket already exists, and then create intervening | |
711 | * buckets beneath that in order to place the item. | |
712 | * | |
713 | * Note that any location specifiers *above* the most specific match | |
714 | * are ignored. For example, if we specify that osd.12 goes in | |
715 | * host=foo, rack=bar, and row=baz, and rack=bar is the most | |
716 | * specific match, we will create host=foo beneath that point and | |
717 | * put osd.12 inside it. However, we will not verify that rack=bar | |
718 | * is beneath row=baz or move it. | |
719 | * | |
720 | * In short, we will build out a hierarchy, and move leaves around, | |
721 | * but not adjust the hierarchy's internal structure. Yet. | |
722 | * | |
723 | * If the item is already present in the map, we will return EEXIST. | |
724 | * If the location key/value pairs are nonsensical | |
725 | * (rack=nameofdevice), or location specifies that do not attach us | |
726 | * to any existing part of the hierarchy, we will return EINVAL. | |
727 | * | |
728 | * @param cct cct | |
729 | * @param id item id | |
730 | * @param weight item weight | |
731 | * @param name item name | |
732 | * @param loc location (map of type to bucket names) | |
733 | * @return 0 for success, negative on error | |
734 | */ | |
735 | int insert_item(CephContext *cct, int id, float weight, string name, const map<string,string>& loc); | |
736 | ||
737 | /** | |
738 | * move a bucket in the hierarchy to the given location | |
739 | * | |
740 | * This has the same location and ancestor creation behavior as | |
741 | * insert_item(), but will relocate the specified existing bucket. | |
742 | * | |
743 | * @param cct cct | |
744 | * @param id bucket id | |
745 | * @param loc location (map of type to bucket names) | |
746 | * @return 0 for success, negative on error | |
747 | */ | |
748 | int move_bucket(CephContext *cct, int id, const map<string,string>& loc); | |
749 | ||
31f18b77 FG |
750 | /** |
751 | * swap bucket contents of two buckets without touching bucket ids | |
752 | * | |
753 | * @param cct cct | |
754 | * @param src bucket a | |
755 | * @param dst bucket b | |
756 | * @return 0 for success, negative on error | |
757 | */ | |
758 | int swap_bucket(CephContext *cct, int src, int dst); | |
759 | ||
7c673cae FG |
760 | /** |
761 | * add a link to an existing bucket in the hierarchy to the new location | |
762 | * | |
763 | * This has the same location and ancestor creation behavior as | |
764 | * insert_item(), but will add a new link to the specified existing | |
765 | * bucket. | |
766 | * | |
767 | * @param cct cct | |
768 | * @param id bucket id | |
769 | * @param loc location (map of type to bucket names) | |
770 | * @return 0 for success, negative on error | |
771 | */ | |
772 | int link_bucket(CephContext *cct, int id, const map<string,string>& loc); | |
773 | ||
774 | /** | |
775 | * add or update an item's position in the map | |
776 | * | |
777 | * This is analogous to insert_item, except we will move an item if | |
778 | * it is already present. | |
779 | * | |
780 | * @param cct cct | |
781 | * @param id item id | |
782 | * @param weight item weight | |
783 | * @param name item name | |
784 | * @param loc location (map of type to bucket names) | |
785 | * @return 0 for no change, 1 for successful change, negative on error | |
786 | */ | |
787 | int update_item(CephContext *cct, int id, float weight, string name, const map<string,string>& loc); | |
788 | ||
789 | /** | |
790 | * create or move an item, but do not adjust its weight if it already exists | |
791 | * | |
792 | * @param cct cct | |
793 | * @param item item id | |
794 | * @param weight initial item weight (if we need to create it) | |
795 | * @param name item name | |
796 | * @param loc location (map of type to bucket names) | |
797 | * @return 0 for no change, 1 for successful change, negative on error | |
798 | */ | |
799 | int create_or_move_item(CephContext *cct, int item, float weight, string name, | |
800 | const map<string,string>& loc); | |
801 | ||
802 | /** | |
803 | * remove all instances of an item from the map | |
804 | * | |
805 | * @param cct cct | |
806 | * @param id item id to remove | |
807 | * @param unlink_only unlink but do not remove bucket (useful if multiple links or not empty) | |
808 | * @return 0 on success, negative on error | |
809 | */ | |
810 | int remove_item(CephContext *cct, int id, bool unlink_only); | |
811 | ||
812 | /** | |
813 | * recursively remove buckets starting at item and stop removing | |
814 | * when a bucket is in use. | |
815 | * | |
816 | * @param item id to remove | |
817 | * @param unused true if only unused items should be removed | |
818 | * @return 0 on success, negative on error | |
819 | */ | |
820 | int remove_root(int item, bool unused); | |
821 | ||
822 | /** | |
823 | * remove all instances of an item nested beneath a certain point from the map | |
824 | * | |
825 | * @param cct cct | |
826 | * @param id item id to remove | |
827 | * @param ancestor ancestor item id under which to search for id | |
828 | * @param unlink_only unlink but do not remove bucket (useful if bucket has multiple links or is not empty) | |
829 | * @return 0 on success, negative on error | |
830 | */ | |
831 | private: | |
832 | bool _maybe_remove_last_instance(CephContext *cct, int id, bool unlink_only); | |
833 | int _remove_item_under(CephContext *cct, int id, int ancestor, bool unlink_only); | |
834 | bool _bucket_is_in_use(int id); | |
835 | public: | |
836 | int remove_item_under(CephContext *cct, int id, int ancestor, bool unlink_only); | |
837 | ||
838 | /** | |
839 | * calculate the locality/distance from a given id to a crush location map | |
840 | * | |
841 | * Specifically, we look for the lowest-valued type for which the | |
842 | * location of id matches that described in loc. | |
843 | * | |
844 | * @param cct cct | |
845 | * @param id the existing id in the map | |
846 | * @param loc a set of key=value pairs describing a location in the hierarchy | |
847 | */ | |
848 | int get_common_ancestor_distance(CephContext *cct, int id, | |
849 | const std::multimap<string,string>& loc); | |
850 | ||
851 | /** | |
852 | * parse a set of key/value pairs out of a string vector | |
853 | * | |
854 | * These are used to describe a location in the CRUSH hierarchy. | |
855 | * | |
856 | * @param args list of strings (each key= or key=value) | |
857 | * @param ploc pointer to a resulting location map or multimap | |
858 | */ | |
859 | static int parse_loc_map(const std::vector<string>& args, | |
860 | std::map<string,string> *ploc); | |
861 | static int parse_loc_multimap(const std::vector<string>& args, | |
862 | std::multimap<string,string> *ploc); | |
863 | ||
864 | /** | |
865 | * get an item's weight | |
866 | * | |
867 | * Will return the weight for the first instance it finds. | |
868 | * | |
869 | * @param id item id to check | |
870 | * @return weight of item | |
871 | */ | |
872 | int get_item_weight(int id) const; | |
873 | float get_item_weightf(int id) const { | |
874 | return (float)get_item_weight(id) / (float)0x10000; | |
875 | } | |
876 | int get_item_weight_in_loc(int id, const map<string,string> &loc); | |
877 | float get_item_weightf_in_loc(int id, const map<string,string> &loc) { | |
878 | return (float)get_item_weight_in_loc(id, loc) / (float)0x10000; | |
879 | } | |
880 | ||
224ce89b WB |
881 | int validate_weightf(float weight) { |
882 | uint64_t iweight = weight * 0x10000; | |
883 | if (iweight > std::numeric_limits<int>::max()) { | |
884 | return -EOVERFLOW; | |
885 | } | |
886 | return 0; | |
887 | } | |
7c673cae FG |
888 | int adjust_item_weight(CephContext *cct, int id, int weight); |
889 | int adjust_item_weightf(CephContext *cct, int id, float weight) { | |
224ce89b WB |
890 | int r = validate_weightf(weight); |
891 | if (r < 0) { | |
892 | return r; | |
893 | } | |
7c673cae FG |
894 | return adjust_item_weight(cct, id, (int)(weight * (float)0x10000)); |
895 | } | |
896 | int adjust_item_weight_in_loc(CephContext *cct, int id, int weight, const map<string,string>& loc); | |
897 | int adjust_item_weightf_in_loc(CephContext *cct, int id, float weight, const map<string,string>& loc) { | |
224ce89b WB |
898 | int r = validate_weightf(weight); |
899 | if (r < 0) { | |
900 | return r; | |
901 | } | |
7c673cae FG |
902 | return adjust_item_weight_in_loc(cct, id, (int)(weight * (float)0x10000), loc); |
903 | } | |
904 | void reweight(CephContext *cct); | |
905 | ||
906 | int adjust_subtree_weight(CephContext *cct, int id, int weight); | |
907 | int adjust_subtree_weightf(CephContext *cct, int id, float weight) { | |
224ce89b WB |
908 | int r = validate_weightf(weight); |
909 | if (r < 0) { | |
910 | return r; | |
911 | } | |
7c673cae FG |
912 | return adjust_subtree_weight(cct, id, (int)(weight * (float)0x10000)); |
913 | } | |
914 | ||
915 | /// check if item id is present in the map hierarchy | |
916 | bool check_item_present(int id) const; | |
917 | ||
918 | ||
919 | /*** devices ***/ | |
920 | int get_max_devices() const { | |
921 | if (!crush) return 0; | |
922 | return crush->max_devices; | |
923 | } | |
924 | ||
925 | ||
926 | /*** rules ***/ | |
927 | private: | |
928 | crush_rule *get_rule(unsigned ruleno) const { | |
929 | if (!crush) return (crush_rule *)(-ENOENT); | |
930 | if (ruleno >= crush->max_rules) | |
931 | return 0; | |
932 | return crush->rules[ruleno]; | |
933 | } | |
934 | crush_rule_step *get_rule_step(unsigned ruleno, unsigned step) const { | |
935 | crush_rule *n = get_rule(ruleno); | |
936 | if (IS_ERR(n)) return (crush_rule_step *)(-EINVAL); | |
937 | if (step >= n->len) return (crush_rule_step *)(-EINVAL); | |
938 | return &n->steps[step]; | |
939 | } | |
940 | ||
941 | public: | |
942 | /* accessors */ | |
943 | int get_max_rules() const { | |
944 | if (!crush) return 0; | |
945 | return crush->max_rules; | |
946 | } | |
947 | bool rule_exists(unsigned ruleno) const { | |
948 | if (!crush) return false; | |
949 | if (ruleno < crush->max_rules && | |
950 | crush->rules[ruleno] != NULL) | |
951 | return true; | |
952 | return false; | |
953 | } | |
954 | int get_rule_len(unsigned ruleno) const { | |
955 | crush_rule *r = get_rule(ruleno); | |
956 | if (IS_ERR(r)) return PTR_ERR(r); | |
957 | return r->len; | |
958 | } | |
959 | int get_rule_mask_ruleset(unsigned ruleno) const { | |
960 | crush_rule *r = get_rule(ruleno); | |
961 | if (IS_ERR(r)) return -1; | |
962 | return r->mask.ruleset; | |
963 | } | |
964 | int get_rule_mask_type(unsigned ruleno) const { | |
965 | crush_rule *r = get_rule(ruleno); | |
966 | if (IS_ERR(r)) return -1; | |
967 | return r->mask.type; | |
968 | } | |
969 | int get_rule_mask_min_size(unsigned ruleno) const { | |
970 | crush_rule *r = get_rule(ruleno); | |
971 | if (IS_ERR(r)) return -1; | |
972 | return r->mask.min_size; | |
973 | } | |
974 | int get_rule_mask_max_size(unsigned ruleno) const { | |
975 | crush_rule *r = get_rule(ruleno); | |
976 | if (IS_ERR(r)) return -1; | |
977 | return r->mask.max_size; | |
978 | } | |
979 | int get_rule_op(unsigned ruleno, unsigned step) const { | |
980 | crush_rule_step *s = get_rule_step(ruleno, step); | |
981 | if (IS_ERR(s)) return PTR_ERR(s); | |
982 | return s->op; | |
983 | } | |
984 | int get_rule_arg1(unsigned ruleno, unsigned step) const { | |
985 | crush_rule_step *s = get_rule_step(ruleno, step); | |
986 | if (IS_ERR(s)) return PTR_ERR(s); | |
987 | return s->arg1; | |
988 | } | |
989 | int get_rule_arg2(unsigned ruleno, unsigned step) const { | |
990 | crush_rule_step *s = get_rule_step(ruleno, step); | |
991 | if (IS_ERR(s)) return PTR_ERR(s); | |
992 | return s->arg2; | |
993 | } | |
994 | ||
995 | /** | |
996 | * calculate a map of osds to weights for a given rule | |
997 | * | |
998 | * Generate a map of which OSDs get how much relative weight for a | |
999 | * given rule. | |
1000 | * | |
1001 | * @param ruleno [in] rule id | |
1002 | * @param pmap [out] map of osd to weight | |
1003 | * @return 0 for success, or negative error code | |
1004 | */ | |
1005 | int get_rule_weight_osd_map(unsigned ruleno, map<int,float> *pmap); | |
1006 | ||
1007 | /* modifiers */ | |
1008 | int add_rule(int len, int ruleset, int type, int minsize, int maxsize, int ruleno) { | |
1009 | if (!crush) return -ENOENT; | |
1010 | crush_rule *n = crush_make_rule(len, ruleset, type, minsize, maxsize); | |
1011 | assert(n); | |
1012 | ruleno = crush_add_rule(crush, n, ruleno); | |
1013 | return ruleno; | |
1014 | } | |
1015 | int set_rule_mask_max_size(unsigned ruleno, int max_size) { | |
1016 | crush_rule *r = get_rule(ruleno); | |
1017 | if (IS_ERR(r)) return -1; | |
1018 | return r->mask.max_size = max_size; | |
1019 | } | |
1020 | int set_rule_step(unsigned ruleno, unsigned step, int op, int arg1, int arg2) { | |
1021 | if (!crush) return -ENOENT; | |
1022 | crush_rule *n = get_rule(ruleno); | |
1023 | if (!n) return -1; | |
1024 | crush_rule_set_step(n, step, op, arg1, arg2); | |
1025 | return 0; | |
1026 | } | |
1027 | int set_rule_step_take(unsigned ruleno, unsigned step, int val) { | |
1028 | return set_rule_step(ruleno, step, CRUSH_RULE_TAKE, val, 0); | |
1029 | } | |
1030 | int set_rule_step_set_choose_tries(unsigned ruleno, unsigned step, int val) { | |
1031 | return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSE_TRIES, val, 0); | |
1032 | } | |
1033 | int set_rule_step_set_choose_local_tries(unsigned ruleno, unsigned step, int val) { | |
1034 | return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES, val, 0); | |
1035 | } | |
1036 | int set_rule_step_set_choose_local_fallback_tries(unsigned ruleno, unsigned step, int val) { | |
1037 | return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES, val, 0); | |
1038 | } | |
1039 | int set_rule_step_set_chooseleaf_tries(unsigned ruleno, unsigned step, int val) { | |
1040 | return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSELEAF_TRIES, val, 0); | |
1041 | } | |
1042 | int set_rule_step_set_chooseleaf_vary_r(unsigned ruleno, unsigned step, int val) { | |
1043 | return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSELEAF_VARY_R, val, 0); | |
1044 | } | |
1045 | int set_rule_step_set_chooseleaf_stable(unsigned ruleno, unsigned step, int val) { | |
1046 | return set_rule_step(ruleno, step, CRUSH_RULE_SET_CHOOSELEAF_STABLE, val, 0); | |
1047 | } | |
1048 | int set_rule_step_choose_firstn(unsigned ruleno, unsigned step, int val, int type) { | |
1049 | return set_rule_step(ruleno, step, CRUSH_RULE_CHOOSE_FIRSTN, val, type); | |
1050 | } | |
1051 | int set_rule_step_choose_indep(unsigned ruleno, unsigned step, int val, int type) { | |
1052 | return set_rule_step(ruleno, step, CRUSH_RULE_CHOOSE_INDEP, val, type); | |
1053 | } | |
1054 | int set_rule_step_choose_leaf_firstn(unsigned ruleno, unsigned step, int val, int type) { | |
1055 | return set_rule_step(ruleno, step, CRUSH_RULE_CHOOSELEAF_FIRSTN, val, type); | |
1056 | } | |
1057 | int set_rule_step_choose_leaf_indep(unsigned ruleno, unsigned step, int val, int type) { | |
1058 | return set_rule_step(ruleno, step, CRUSH_RULE_CHOOSELEAF_INDEP, val, type); | |
1059 | } | |
1060 | int set_rule_step_emit(unsigned ruleno, unsigned step) { | |
1061 | return set_rule_step(ruleno, step, CRUSH_RULE_EMIT, 0, 0); | |
1062 | } | |
1063 | ||
31f18b77 FG |
1064 | int add_simple_rule( |
1065 | string name, string root_name, string failure_domain_type, | |
224ce89b | 1066 | string device_class, |
31f18b77 FG |
1067 | string mode, int rule_type, ostream *err = 0); |
1068 | ||
7c673cae | 1069 | /** |
31f18b77 | 1070 | * @param rno rule[set] id to use, -1 to pick the lowest available |
7c673cae | 1071 | */ |
31f18b77 FG |
1072 | int add_simple_rule_at( |
1073 | string name, string root_name, | |
224ce89b | 1074 | string failure_domain_type, string device_class, string mode, |
31f18b77 | 1075 | int rule_type, int rno, ostream *err = 0); |
7c673cae FG |
1076 | |
1077 | int remove_rule(int ruleno); | |
1078 | ||
1079 | ||
1080 | /** buckets **/ | |
1081 | private: | |
1082 | const crush_bucket *get_bucket(int id) const { | |
1083 | if (!crush) | |
1084 | return (crush_bucket *)(-EINVAL); | |
1085 | unsigned int pos = (unsigned int)(-1 - id); | |
1086 | unsigned int max_buckets = crush->max_buckets; | |
1087 | if (pos >= max_buckets) | |
1088 | return (crush_bucket *)(-ENOENT); | |
1089 | crush_bucket *ret = crush->buckets[pos]; | |
1090 | if (ret == NULL) | |
1091 | return (crush_bucket *)(-ENOENT); | |
1092 | return ret; | |
1093 | } | |
1094 | crush_bucket *get_bucket(int id) { | |
1095 | if (!crush) | |
1096 | return (crush_bucket *)(-EINVAL); | |
1097 | unsigned int pos = (unsigned int)(-1 - id); | |
1098 | unsigned int max_buckets = crush->max_buckets; | |
1099 | if (pos >= max_buckets) | |
1100 | return (crush_bucket *)(-ENOENT); | |
1101 | crush_bucket *ret = crush->buckets[pos]; | |
1102 | if (ret == NULL) | |
1103 | return (crush_bucket *)(-ENOENT); | |
1104 | return ret; | |
1105 | } | |
1106 | /** | |
1107 | * detach a bucket from its parent and adjust the parent weight | |
1108 | * | |
1109 | * returns the weight of the detached bucket | |
1110 | **/ | |
1111 | int detach_bucket(CephContext *cct, int item){ | |
1112 | if (!crush) | |
1113 | return (-EINVAL); | |
1114 | ||
1115 | if (item >= 0) | |
1116 | return (-EINVAL); | |
1117 | ||
1118 | // check that the bucket that we want to detach exists | |
1119 | assert(bucket_exists(item)); | |
1120 | ||
1121 | // get the bucket's weight | |
1122 | crush_bucket *b = get_bucket(item); | |
1123 | unsigned bucket_weight = b->weight; | |
1124 | ||
1125 | // get where the bucket is located | |
1126 | pair<string, string> bucket_location = get_immediate_parent(item); | |
1127 | ||
1128 | // get the id of the parent bucket | |
1129 | int parent_id = get_item_id(bucket_location.second); | |
1130 | ||
1131 | // get the parent bucket | |
1132 | crush_bucket *parent_bucket = get_bucket(parent_id); | |
1133 | ||
1134 | if (!IS_ERR(parent_bucket)) { | |
1135 | // zero out the bucket weight | |
31f18b77 | 1136 | bucket_adjust_item_weight(cct, parent_bucket, item, 0); |
7c673cae FG |
1137 | adjust_item_weight(cct, parent_bucket->id, parent_bucket->weight); |
1138 | ||
1139 | // remove the bucket from the parent | |
31f18b77 | 1140 | bucket_remove_item(parent_bucket, item); |
7c673cae FG |
1141 | } else if (PTR_ERR(parent_bucket) != -ENOENT) { |
1142 | return PTR_ERR(parent_bucket); | |
1143 | } | |
1144 | ||
1145 | // check that we're happy | |
1146 | int test_weight = 0; | |
1147 | map<string,string> test_location; | |
1148 | test_location[ bucket_location.first ] = (bucket_location.second); | |
1149 | ||
1150 | bool successful_detach = !(check_item_loc(cct, item, test_location, &test_weight)); | |
1151 | assert(successful_detach); | |
1152 | assert(test_weight == 0); | |
1153 | ||
1154 | return bucket_weight; | |
1155 | } | |
1156 | ||
1157 | public: | |
1158 | int get_max_buckets() const { | |
1159 | if (!crush) return -EINVAL; | |
1160 | return crush->max_buckets; | |
1161 | } | |
1162 | int get_next_bucket_id() const { | |
1163 | if (!crush) return -EINVAL; | |
1164 | return crush_get_next_bucket_id(crush); | |
1165 | } | |
1166 | bool bucket_exists(int id) const { | |
1167 | const crush_bucket *b = get_bucket(id); | |
1168 | if (IS_ERR(b)) | |
1169 | return false; | |
1170 | return true; | |
1171 | } | |
1172 | int get_bucket_weight(int id) const { | |
1173 | const crush_bucket *b = get_bucket(id); | |
1174 | if (IS_ERR(b)) return PTR_ERR(b); | |
1175 | return b->weight; | |
1176 | } | |
1177 | float get_bucket_weightf(int id) const { | |
1178 | const crush_bucket *b = get_bucket(id); | |
1179 | if (IS_ERR(b)) return 0; | |
1180 | return b->weight / (float)0x10000; | |
1181 | } | |
1182 | int get_bucket_type(int id) const { | |
1183 | const crush_bucket *b = get_bucket(id); | |
1184 | if (IS_ERR(b)) return PTR_ERR(b); | |
1185 | return b->type; | |
1186 | } | |
1187 | int get_bucket_alg(int id) const { | |
1188 | const crush_bucket *b = get_bucket(id); | |
1189 | if (IS_ERR(b)) return PTR_ERR(b); | |
1190 | return b->alg; | |
1191 | } | |
1192 | int get_bucket_hash(int id) const { | |
1193 | const crush_bucket *b = get_bucket(id); | |
1194 | if (IS_ERR(b)) return PTR_ERR(b); | |
1195 | return b->hash; | |
1196 | } | |
1197 | int get_bucket_size(int id) const { | |
1198 | const crush_bucket *b = get_bucket(id); | |
1199 | if (IS_ERR(b)) return PTR_ERR(b); | |
1200 | return b->size; | |
1201 | } | |
1202 | int get_bucket_item(int id, int pos) const { | |
1203 | const crush_bucket *b = get_bucket(id); | |
1204 | if (IS_ERR(b)) return PTR_ERR(b); | |
1205 | if ((__u32)pos >= b->size) | |
1206 | return PTR_ERR(b); | |
1207 | return b->items[pos]; | |
1208 | } | |
1209 | int get_bucket_item_weight(int id, int pos) const { | |
1210 | const crush_bucket *b = get_bucket(id); | |
1211 | if (IS_ERR(b)) return PTR_ERR(b); | |
1212 | return crush_get_bucket_item_weight(b, pos); | |
1213 | } | |
1214 | float get_bucket_item_weightf(int id, int pos) const { | |
1215 | const crush_bucket *b = get_bucket(id); | |
1216 | if (IS_ERR(b)) return 0; | |
1217 | return (float)crush_get_bucket_item_weight(b, pos) / (float)0x10000; | |
1218 | } | |
1219 | ||
1220 | /* modifiers */ | |
1221 | int add_bucket(int bucketno, int alg, int hash, int type, int size, | |
1222 | int *items, int *weights, int *idout) { | |
1223 | if (alg == 0) { | |
1224 | alg = get_default_bucket_alg(); | |
1225 | if (alg == 0) | |
1226 | return -EINVAL; | |
1227 | } | |
1228 | crush_bucket *b = crush_make_bucket(crush, alg, hash, type, size, items, weights); | |
1229 | assert(b); | |
1230 | return crush_add_bucket(crush, bucketno, b, idout); | |
1231 | } | |
31f18b77 FG |
1232 | |
1233 | int bucket_add_item(crush_bucket *bucket, int item, int weight); | |
1234 | int bucket_remove_item(struct crush_bucket *bucket, int item); | |
1235 | int bucket_adjust_item_weight(CephContext *cct, struct crush_bucket *bucket, int item, int weight); | |
1236 | ||
7c673cae FG |
1237 | void finalize() { |
1238 | assert(crush); | |
1239 | crush_finalize(crush); | |
31f18b77 | 1240 | have_uniform_rules = !has_legacy_rulesets(); |
7c673cae FG |
1241 | } |
1242 | ||
224ce89b | 1243 | int update_device_class(int id, const string& class_name, const string& name, ostream *ss); |
7c673cae FG |
1244 | int device_class_clone(int original, int device_class, int *clone); |
1245 | bool class_is_in_use(int class_id); | |
1246 | int populate_classes(); | |
1247 | int rebuild_roots_with_classes(); | |
1248 | /* remove unused roots generated for class devices */ | |
1249 | int trim_roots_with_class(bool unused); | |
1250 | int cleanup_classes(); | |
1251 | ||
1252 | void start_choose_profile() { | |
1253 | free(crush->choose_tries); | |
1254 | /* | |
1255 | * the original choose_total_tries value was off by one (it | |
1256 | * counted "retries" and not "tries"). add one to alloc. | |
1257 | */ | |
1258 | crush->choose_tries = (__u32 *)malloc(sizeof(*crush->choose_tries) * (crush->choose_total_tries + 1)); | |
1259 | memset(crush->choose_tries, 0, | |
1260 | sizeof(*crush->choose_tries) * (crush->choose_total_tries + 1)); | |
1261 | } | |
1262 | void stop_choose_profile() { | |
1263 | free(crush->choose_tries); | |
1264 | crush->choose_tries = 0; | |
1265 | } | |
1266 | ||
1267 | int get_choose_profile(__u32 **vec) { | |
1268 | if (crush->choose_tries) { | |
1269 | *vec = crush->choose_tries; | |
1270 | return crush->choose_total_tries; | |
1271 | } | |
1272 | return 0; | |
1273 | } | |
1274 | ||
1275 | ||
1276 | void set_max_devices(int m) { | |
1277 | crush->max_devices = m; | |
1278 | } | |
1279 | ||
1280 | int find_rule(int ruleset, int type, int size) const { | |
1281 | if (!crush) return -1; | |
31f18b77 FG |
1282 | if (!have_uniform_rules) { |
1283 | return crush_find_rule(crush, ruleset, type, size); | |
1284 | } else { | |
1285 | if (ruleset < (int)crush->max_rules && | |
1286 | crush->rules[ruleset]) | |
1287 | return ruleset; | |
1288 | return -1; | |
1289 | } | |
7c673cae FG |
1290 | } |
1291 | ||
1292 | bool ruleset_exists(int const ruleset) const { | |
1293 | for (size_t i = 0; i < crush->max_rules; ++i) { | |
1294 | if (rule_exists(i) && crush->rules[i]->mask.ruleset == ruleset) { | |
1295 | return true; | |
1296 | } | |
1297 | } | |
1298 | ||
1299 | return false; | |
1300 | } | |
1301 | ||
1302 | /** | |
1303 | * Return the lowest numbered ruleset of type `type` | |
1304 | * | |
1305 | * @returns a ruleset ID, or -1 if no matching rulesets found. | |
1306 | */ | |
1307 | int find_first_ruleset(int type) const { | |
1308 | int result = -1; | |
1309 | ||
1310 | for (size_t i = 0; i < crush->max_rules; ++i) { | |
1311 | if (crush->rules[i] | |
1312 | && crush->rules[i]->mask.type == type | |
1313 | && (crush->rules[i]->mask.ruleset < result || result == -1)) { | |
1314 | result = crush->rules[i]->mask.ruleset; | |
1315 | } | |
1316 | } | |
1317 | ||
1318 | return result; | |
1319 | } | |
1320 | ||
1321 | crush_choose_arg_map choose_args_get(uint64_t choose_args_index) const { | |
1322 | auto i = choose_args.find(choose_args_index); | |
1323 | if (i == choose_args.end()) { | |
1324 | crush_choose_arg_map arg_map; | |
1325 | arg_map.args = NULL; | |
1326 | arg_map.size = 0; | |
1327 | return arg_map; | |
1328 | } else { | |
1329 | return i->second; | |
1330 | } | |
1331 | } | |
1332 | ||
1333 | void destroy_choose_args(crush_choose_arg_map arg_map) { | |
1334 | for (__u32 i = 0; i < arg_map.size; i++) { | |
1335 | crush_choose_arg *arg = &arg_map.args[i]; | |
1336 | for (__u32 j = 0; j < arg->weight_set_size; j++) { | |
1337 | crush_weight_set *weight_set = &arg->weight_set[j]; | |
1338 | free(weight_set->weights); | |
1339 | } | |
1340 | if (arg->weight_set) | |
1341 | free(arg->weight_set); | |
1342 | if (arg->ids) | |
1343 | free(arg->ids); | |
1344 | } | |
1345 | free(arg_map.args); | |
1346 | } | |
1347 | ||
1348 | void choose_args_clear() { | |
1349 | for (auto w : choose_args) | |
1350 | destroy_choose_args(w.second); | |
1351 | choose_args.clear(); | |
1352 | } | |
1353 | ||
1354 | template<typename WeightVector> | |
1355 | void do_rule(int rule, int x, vector<int>& out, int maxout, | |
1356 | const WeightVector& weight, | |
1357 | uint64_t choose_args_index) const { | |
1358 | int rawout[maxout]; | |
1359 | char work[crush_work_size(crush, maxout)]; | |
1360 | crush_init_workspace(crush, work); | |
1361 | crush_choose_arg_map arg_map = choose_args_get(choose_args_index); | |
1362 | int numrep = crush_do_rule(crush, rule, x, rawout, maxout, &weight[0], | |
1363 | weight.size(), work, arg_map.args); | |
1364 | if (numrep < 0) | |
1365 | numrep = 0; | |
1366 | out.resize(numrep); | |
1367 | for (int i=0; i<numrep; i++) | |
1368 | out[i] = rawout[i]; | |
1369 | } | |
1370 | ||
1371 | int _choose_type_stack( | |
1372 | CephContext *cct, | |
1373 | const vector<pair<int,int>>& stack, | |
1374 | const set<int>& overfull, | |
1375 | const vector<int>& underfull, | |
1376 | const vector<int>& orig, | |
1377 | vector<int>::const_iterator& i, | |
1378 | set<int>& used, | |
1379 | vector<int> *pw) const; | |
1380 | ||
1381 | int try_remap_rule( | |
1382 | CephContext *cct, | |
1383 | int rule, | |
1384 | int maxout, | |
1385 | const set<int>& overfull, | |
1386 | const vector<int>& underfull, | |
1387 | const vector<int>& orig, | |
1388 | vector<int> *out) const; | |
1389 | ||
1390 | bool check_crush_rule(int ruleset, int type, int size, ostream& ss) { | |
1391 | assert(crush); | |
1392 | ||
1393 | __u32 i; | |
1394 | for (i = 0; i < crush->max_rules; i++) { | |
1395 | if (crush->rules[i] && | |
1396 | crush->rules[i]->mask.ruleset == ruleset && | |
1397 | crush->rules[i]->mask.type == type) { | |
1398 | ||
1399 | if (crush->rules[i]->mask.min_size <= size && | |
1400 | crush->rules[i]->mask.max_size >= size) { | |
1401 | return true; | |
1402 | } else if (size < crush->rules[i]->mask.min_size) { | |
1403 | ss << "pool size is smaller than the crush rule min size"; | |
1404 | return false; | |
1405 | } else { | |
1406 | ss << "pool size is bigger than the crush rule max size"; | |
1407 | return false; | |
1408 | } | |
1409 | } | |
1410 | } | |
1411 | ||
1412 | return false; | |
1413 | } | |
1414 | ||
1415 | void encode(bufferlist &bl, uint64_t features) const; | |
1416 | void decode(bufferlist::iterator &blp); | |
1417 | void decode_crush_bucket(crush_bucket** bptr, bufferlist::iterator &blp); | |
1418 | void dump(Formatter *f) const; | |
1419 | void dump_rules(Formatter *f) const; | |
1420 | void dump_rule(int ruleset, Formatter *f) const; | |
1421 | void dump_tunables(Formatter *f) const; | |
1422 | void dump_choose_args(Formatter *f) const; | |
1423 | void list_rules(Formatter *f) const; | |
1424 | void dump_tree(ostream *out, Formatter *f) const; | |
1425 | void dump_tree(Formatter *f) const; | |
1426 | static void generate_test_instances(list<CrushWrapper*>& o); | |
1427 | ||
7c673cae FG |
1428 | int get_osd_pool_default_crush_replicated_ruleset(CephContext *cct); |
1429 | ||
1430 | static bool is_valid_crush_name(const string& s); | |
1431 | static bool is_valid_crush_loc(CephContext *cct, | |
1432 | const map<string,string>& loc); | |
1433 | }; | |
1434 | WRITE_CLASS_ENCODER_FEATURES(CrushWrapper) | |
1435 | ||
1436 | #endif |