]> git.proxmox.com Git - ceph.git/blame - ceph/src/crush/CrushCompiler.cc
update sources to 12.2.7
[ceph.git] / ceph / src / crush / CrushCompiler.cc
CommitLineData
7c673cae
FG
1
2#include "CrushCompiler.h"
3
4#if defined(_AIX)
5#define EBADE ECORRUPT
6#endif
7
8#ifndef EBADE
9#define EBADE EFTYPE
10#endif
7c673cae 11#include <string>
7c673cae
FG
12#include "common/errno.h"
13#include <boost/algorithm/string.hpp>
14
15// -------------
16
17static void print_type_name(ostream& out, int t, CrushWrapper &crush)
18{
19 const char *name = crush.get_type_name(t);
20 if (name)
21 out << name;
22 else if (t == 0)
23 out << "device";
24 else
25 out << "type" << t;
26}
27
28static void print_item_name(ostream& out, int t, CrushWrapper &crush)
29{
30 const char *name = crush.get_item_name(t);
31 if (name)
32 out << name;
33 else if (t >= 0)
34 out << "device" << t;
35 else
36 out << "bucket" << (-1-t);
37}
38
39static void print_bucket_class_ids(ostream& out, int t, CrushWrapper &crush)
40{
41 if (crush.class_bucket.count(t) == 0)
42 return;
43 auto &class_to_id = crush.class_bucket[t];
44 for (auto &i : class_to_id) {
45 int c = i.first;
46 int cid = i.second;
47 const char* class_name = crush.get_class_name(c);
48 assert(class_name);
49 out << "\tid " << cid << " class " << class_name << "\t\t# do not change unnecessarily\n";
50 }
51}
52
53static void print_item_class(ostream& out, int t, CrushWrapper &crush)
54{
55 const char *c = crush.get_item_class(t);
56 if (c)
57 out << " class " << c;
58}
59
60static void print_class(ostream& out, int t, CrushWrapper &crush)
61{
62 const char *c = crush.get_class_name(t);
63 if (c)
64 out << " class " << c;
65 else
66 out << " # unexpected class " << t;
67}
68
69static void print_rule_name(ostream& out, int t, CrushWrapper &crush)
70{
71 const char *name = crush.get_rule_name(t);
72 if (name)
73 out << name;
74 else
75 out << "rule" << t;
76}
77
78static void print_fixedpoint(ostream& out, int i)
79{
80 char s[20];
81 snprintf(s, sizeof(s), "%.3f", (float)i / (float)0x10000);
82 out << s;
83}
84
85int CrushCompiler::decompile_bucket_impl(int i, ostream &out)
86{
87 const char *name = crush.get_item_name(i);
88 if (name && !crush.is_valid_crush_name(name))
89 return 0;
90 int type = crush.get_bucket_type(i);
91 print_type_name(out, type, crush);
92 out << " ";
93 print_item_name(out, i, crush);
94 out << " {\n";
95 out << "\tid " << i << "\t\t# do not change unnecessarily\n";
96 print_bucket_class_ids(out, i, crush);
97
98 out << "\t# weight ";
99 print_fixedpoint(out, crush.get_bucket_weight(i));
100 out << "\n";
101
102 int n = crush.get_bucket_size(i);
103
104 int alg = crush.get_bucket_alg(i);
105 out << "\talg " << crush_bucket_alg_name(alg);
106
107 // notate based on alg type
108 bool dopos = false;
109 switch (alg) {
110 case CRUSH_BUCKET_UNIFORM:
111 out << "\t# do not change bucket size (" << n << ") unnecessarily";
112 dopos = true;
113 break;
114 case CRUSH_BUCKET_LIST:
115 out << "\t# add new items at the end; do not change order unnecessarily";
116 break;
117 case CRUSH_BUCKET_TREE:
118 out << "\t# do not change pos for existing items unnecessarily";
119 dopos = true;
120 break;
121 }
122 out << "\n";
123
124 int hash = crush.get_bucket_hash(i);
125 out << "\thash " << hash << "\t# " << crush_hash_name(hash) << "\n";
126
127 for (int j=0; j<n; j++) {
128 int item = crush.get_bucket_item(i, j);
129 int w = crush.get_bucket_item_weight(i, j);
130 out << "\titem ";
131 print_item_name(out, item, crush);
132 out << " weight ";
133 print_fixedpoint(out, w);
134 if (dopos)
135 out << " pos " << j;
136
137 out << "\n";
138 }
139 out << "}\n";
140 return 0;
141}
142
143/* Basically, we just descend recursively into all of the buckets,
144 * executing a depth-first traversal of the graph. Since the buckets form a
145 * directed acyclic graph, this should work just fine. The graph isn't
146 * necessarily a tree, so we have to keep track of what buckets we already
147 * outputted. We don't want to output anything twice. We also keep track of
148 * what buckets are in progress so that we can detect cycles. These can
149 * arise through user error.
150 */
151int CrushCompiler::decompile_bucket(int cur,
152 std::map<int, dcb_state_t>& dcb_states,
153 ostream &out)
154{
155 if ((cur == 0) || (!crush.bucket_exists(cur)))
156 return 0;
157
158 std::map<int, dcb_state_t>::iterator c = dcb_states.find(cur);
159 if (c == dcb_states.end()) {
160 // Mark this bucket as "in progress."
161 std::map<int, dcb_state_t>::value_type val(cur, DCB_STATE_IN_PROGRESS);
162 std::pair <std::map<int, dcb_state_t>::iterator, bool> rval
163 (dcb_states.insert(val));
164 assert(rval.second);
165 c = rval.first;
166 }
167 else if (c->second == DCB_STATE_DONE) {
168 // We already did this bucket.
169 return 0;
170 }
171 else if (c->second == DCB_STATE_IN_PROGRESS) {
172 err << "decompile_crush_bucket: logic error: tried to decompile "
173 "a bucket that is already being decompiled" << std::endl;
174 return -EBADE;
175 }
176 else {
177 err << "decompile_crush_bucket: logic error: illegal bucket state! "
178 << c->second << std::endl;
179 return -EBADE;
180 }
181
182 int bsize = crush.get_bucket_size(cur);
183 for (int i = 0; i < bsize; ++i) {
184 int item = crush.get_bucket_item(cur, i);
185 std::map<int, dcb_state_t>::iterator d = dcb_states.find(item);
186 if (d == dcb_states.end()) {
187 int ret = decompile_bucket(item, dcb_states, out);
188 if (ret)
189 return ret;
190 }
191 else if (d->second == DCB_STATE_IN_PROGRESS) {
192 err << "decompile_crush_bucket: error: while trying to output bucket "
193 << cur << ", we found out that it contains one of the buckets that "
194 << "contain it. This is not allowed. The buckets must form a "
195 << "directed acyclic graph." << std::endl;
196 return -EINVAL;
197 }
198 else if (d->second != DCB_STATE_DONE) {
199 err << "decompile_crush_bucket: logic error: illegal bucket state "
200 << d->second << std::endl;
201 return -EBADE;
202 }
203 }
204 decompile_bucket_impl(cur, out);
205 c->second = DCB_STATE_DONE;
206 return 0;
207}
208
209int CrushCompiler::decompile_weight_set_weights(crush_weight_set weight_set,
210 ostream &out)
211{
212 out << " [ ";
213 for (__u32 i = 0; i < weight_set.size; i++) {
214 print_fixedpoint(out, weight_set.weights[i]);
215 out << " ";
216 }
217 out << "]\n";
218 return 0;
219}
220
221int CrushCompiler::decompile_weight_set(crush_weight_set *weight_set,
222 __u32 size,
223 ostream &out)
224{
225 out << " weight_set [\n";
226 for (__u32 i = 0; i < size; i++) {
227 int r = decompile_weight_set_weights(weight_set[i], out);
228 if (r < 0)
229 return r;
230 }
231 out << " ]\n";
232 return 0;
233}
234
224ce89b 235int CrushCompiler::decompile_ids(__s32 *ids,
7c673cae
FG
236 __u32 size,
237 ostream &out)
238{
239 out << " ids [ ";
240 for (__u32 i = 0; i < size; i++)
241 out << ids[i] << " ";
242 out << "]\n";
243 return 0;
244}
245
246int CrushCompiler::decompile_choose_arg(crush_choose_arg *arg,
247 int bucket_id,
248 ostream &out)
249{
250 int r;
251 out << " {\n";
252 out << " bucket_id " << bucket_id << "\n";
28e407b8
AA
253 if (arg->weight_set_positions > 0) {
254 r = decompile_weight_set(arg->weight_set, arg->weight_set_positions, out);
7c673cae
FG
255 if (r < 0)
256 return r;
257 }
258 if (arg->ids_size > 0) {
259 r = decompile_ids(arg->ids, arg->ids_size, out);
260 if (r < 0)
261 return r;
262 }
263 out << " }\n";
264 return 0;
265}
266
267int CrushCompiler::decompile_choose_arg_map(crush_choose_arg_map arg_map,
268 ostream &out)
269{
270 for (__u32 i = 0; i < arg_map.size; i++) {
271 if ((arg_map.args[i].ids_size == 0) &&
28e407b8 272 (arg_map.args[i].weight_set_positions == 0))
7c673cae
FG
273 continue;
274 int r = decompile_choose_arg(&arg_map.args[i], -1-i, out);
275 if (r < 0)
276 return r;
277 }
278 return 0;
279}
280
281int CrushCompiler::decompile_choose_args(const std::pair<const long unsigned int, crush_choose_arg_map> &i,
282 ostream &out)
283{
284 out << "choose_args " << i.first << " {\n";
285 int r = decompile_choose_arg_map(i.second, out);
286 if (r < 0)
287 return r;
288 out << "}\n";
289 return 0;
290}
291
292int CrushCompiler::decompile(ostream &out)
293{
7c673cae
FG
294 out << "# begin crush map\n";
295
296 // only dump tunables if they differ from the defaults
297 if (crush.get_choose_local_tries() != 2)
298 out << "tunable choose_local_tries " << crush.get_choose_local_tries() << "\n";
299 if (crush.get_choose_local_fallback_tries() != 5)
300 out << "tunable choose_local_fallback_tries " << crush.get_choose_local_fallback_tries() << "\n";
301 if (crush.get_choose_total_tries() != 19)
302 out << "tunable choose_total_tries " << crush.get_choose_total_tries() << "\n";
303 if (crush.get_chooseleaf_descend_once() != 0)
304 out << "tunable chooseleaf_descend_once " << crush.get_chooseleaf_descend_once() << "\n";
305 if (crush.get_chooseleaf_vary_r() != 0)
306 out << "tunable chooseleaf_vary_r " << crush.get_chooseleaf_vary_r() << "\n";
307 if (crush.get_chooseleaf_stable() != 0)
308 out << "tunable chooseleaf_stable " << crush.get_chooseleaf_stable() << "\n";
309 if (crush.get_straw_calc_version() != 0)
310 out << "tunable straw_calc_version " << crush.get_straw_calc_version() << "\n";
311 if (crush.get_allowed_bucket_algs() != CRUSH_LEGACY_ALLOWED_BUCKET_ALGS)
312 out << "tunable allowed_bucket_algs " << crush.get_allowed_bucket_algs()
313 << "\n";
314
315 out << "\n# devices\n";
316 for (int i=0; i<crush.get_max_devices(); i++) {
b32b8144
FG
317 const char *name = crush.get_item_name(i);
318 if (name) {
319 out << "device " << i << " " << name;
320 print_item_class(out, i, crush);
321 out << "\n";
322 }
7c673cae
FG
323 }
324
325 out << "\n# types\n";
326 int n = crush.get_num_type_names();
327 for (int i=0; n; i++) {
328 const char *name = crush.get_type_name(i);
329 if (!name) {
330 if (i == 0) out << "type 0 osd\n";
331 continue;
332 }
333 n--;
334 out << "type " << i << " " << name << "\n";
335 }
336
337 out << "\n# buckets\n";
338 std::map<int, dcb_state_t> dcb_states;
339 for (int bucket = -1; bucket > -1-crush.get_max_buckets(); --bucket) {
340 int ret = decompile_bucket(bucket, dcb_states, out);
341 if (ret)
342 return ret;
343 }
344
345 out << "\n# rules\n";
346 for (int i=0; i<crush.get_max_rules(); i++) {
347 if (!crush.rule_exists(i))
348 continue;
349 out << "rule ";
350 if (crush.get_rule_name(i))
351 print_rule_name(out, i, crush);
352 out << " {\n";
c07f9fc5
FG
353 out << "\tid " << i << "\n";
354 if (i != crush.get_rule_mask_ruleset(i)) {
355 out << "\t# WARNING: ruleset " << crush.get_rule_mask_ruleset(i) << " != id " << i << "; this will not recompile to the same map\n";
356 }
7c673cae
FG
357
358 switch (crush.get_rule_mask_type(i)) {
359 case CEPH_PG_TYPE_REPLICATED:
360 out << "\ttype replicated\n";
361 break;
362 case CEPH_PG_TYPE_ERASURE:
363 out << "\ttype erasure\n";
364 break;
365 default:
366 out << "\ttype " << crush.get_rule_mask_type(i) << "\n";
367 }
368
369 out << "\tmin_size " << crush.get_rule_mask_min_size(i) << "\n";
370 out << "\tmax_size " << crush.get_rule_mask_max_size(i) << "\n";
371
372 for (int j=0; j<crush.get_rule_len(i); j++) {
373 switch (crush.get_rule_op(i, j)) {
374 case CRUSH_RULE_NOOP:
375 out << "\tstep noop\n";
376 break;
377 case CRUSH_RULE_TAKE:
378 out << "\tstep take ";
379 {
380 int step_item = crush.get_rule_arg1(i, j);
381 int original_item;
382 int c;
383 int res = crush.split_id_class(step_item, &original_item, &c);
384 if (res < 0)
385 return res;
386 if (c >= 0)
387 step_item = original_item;
388 print_item_name(out, step_item, crush);
389 if (c >= 0)
390 print_class(out, c, crush);
391 }
392 out << "\n";
393 break;
394 case CRUSH_RULE_EMIT:
395 out << "\tstep emit\n";
396 break;
397 case CRUSH_RULE_SET_CHOOSE_TRIES:
398 out << "\tstep set_choose_tries " << crush.get_rule_arg1(i, j)
399 << "\n";
400 break;
401 case CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES:
402 out << "\tstep set_choose_local_tries " << crush.get_rule_arg1(i, j)
403 << "\n";
404 break;
405 case CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES:
406 out << "\tstep set_choose_local_fallback_tries " << crush.get_rule_arg1(i, j)
407 << "\n";
408 break;
409 case CRUSH_RULE_SET_CHOOSELEAF_TRIES:
410 out << "\tstep set_chooseleaf_tries " << crush.get_rule_arg1(i, j)
411 << "\n";
412 break;
413 case CRUSH_RULE_SET_CHOOSELEAF_VARY_R:
414 out << "\tstep set_chooseleaf_vary_r " << crush.get_rule_arg1(i, j)
415 << "\n";
416 break;
417 case CRUSH_RULE_SET_CHOOSELEAF_STABLE:
418 out << "\tstep set_chooseleaf_stable " << crush.get_rule_arg1(i, j)
419 << "\n";
420 break;
421 case CRUSH_RULE_CHOOSE_FIRSTN:
422 out << "\tstep choose firstn "
423 << crush.get_rule_arg1(i, j)
424 << " type ";
425 print_type_name(out, crush.get_rule_arg2(i, j), crush);
426 out << "\n";
427 break;
428 case CRUSH_RULE_CHOOSE_INDEP:
429 out << "\tstep choose indep "
430 << crush.get_rule_arg1(i, j)
431 << " type ";
432 print_type_name(out, crush.get_rule_arg2(i, j), crush);
433 out << "\n";
434 break;
435 case CRUSH_RULE_CHOOSELEAF_FIRSTN:
436 out << "\tstep chooseleaf firstn "
437 << crush.get_rule_arg1(i, j)
438 << " type ";
439 print_type_name(out, crush.get_rule_arg2(i, j), crush);
440 out << "\n";
441 break;
442 case CRUSH_RULE_CHOOSELEAF_INDEP:
443 out << "\tstep chooseleaf indep "
444 << crush.get_rule_arg1(i, j)
445 << " type ";
446 print_type_name(out, crush.get_rule_arg2(i, j), crush);
447 out << "\n";
448 break;
449 }
450 }
451 out << "}\n";
452 }
453 if (crush.choose_args.size() > 0) {
454 out << "\n# choose_args\n";
455 for (auto i : crush.choose_args) {
456 int ret = decompile_choose_args(i, out);
457 if (ret)
458 return ret;
459 }
460 }
461 out << "\n# end crush map" << std::endl;
462 return 0;
463}
464
465
466// ================================================================
467
468string CrushCompiler::string_node(node_t &node)
469{
470 return boost::trim_copy(string(node.value.begin(), node.value.end()));
471}
472
473int CrushCompiler::int_node(node_t &node)
474{
475 string str = string_node(node);
476 return strtol(str.c_str(), 0, 10);
477}
478
479float CrushCompiler::float_node(node_t &node)
480{
481 string s = string_node(node);
482 return strtof(s.c_str(), 0);
483}
484
485int CrushCompiler::parse_device(iter_t const& i)
486{
487 int id = int_node(i->children[1]);
488
489 string name = string_node(i->children[2]);
490 crush.set_item_name(id, name.c_str());
491 if (item_id.count(name)) {
492 err << "item " << name << " defined twice" << std::endl;
493 return -1;
494 }
495 item_id[name] = id;
496 id_item[id] = name;
497
498 if (verbose) err << "device " << id << " '" << name << "'";
499
500 if (i->children.size() > 3) {
501 string c = string_node(i->children[4]);
502 crush.set_item_class(id, c);
503 if (verbose) err << " class" << " '" << c << "'" << std::endl;
504 } else {
505 if (verbose) err << std::endl;
506 }
507 return 0;
508}
509
510int CrushCompiler::parse_tunable(iter_t const& i)
511{
512 string name = string_node(i->children[1]);
513 int val = int_node(i->children[2]);
514
515 if (name == "choose_local_tries")
516 crush.set_choose_local_tries(val);
517 else if (name == "choose_local_fallback_tries")
518 crush.set_choose_local_fallback_tries(val);
519 else if (name == "choose_total_tries")
520 crush.set_choose_total_tries(val);
521 else if (name == "chooseleaf_descend_once")
522 crush.set_chooseleaf_descend_once(val);
523 else if (name == "chooseleaf_vary_r")
524 crush.set_chooseleaf_vary_r(val);
525 else if (name == "chooseleaf_stable")
526 crush.set_chooseleaf_stable(val);
527 else if (name == "straw_calc_version")
528 crush.set_straw_calc_version(val);
529 else if (name == "allowed_bucket_algs")
530 crush.set_allowed_bucket_algs(val);
531 else {
532 err << "tunable " << name << " not recognized" << std::endl;
533 return -1;
534 }
535
536 /*
537
538 current crop of tunables are all now "safe". re-enable this when we
539 add new ones that are ... new.
540
541 if (!unsafe_tunables) {
542 err << "tunables are NOT FULLY IMPLEMENTED; enable with --enable-unsafe-tunables to enable this feature" << std::endl;
543 return -1;
544 }
545 */
546
547 if (verbose) err << "tunable " << name << " " << val << std::endl;
548 return 0;
549}
550
551int CrushCompiler::parse_bucket_type(iter_t const& i)
552{
553 int id = int_node(i->children[1]);
554 string name = string_node(i->children[2]);
555 if (verbose) err << "type " << id << " '" << name << "'" << std::endl;
556 type_id[name] = id;
557 crush.set_type_name(id, name.c_str());
558 return 0;
559}
560
561int CrushCompiler::parse_bucket(iter_t const& i)
562{
563 string tname = string_node(i->children[0]);
564 if (!type_id.count(tname)) {
565 err << "bucket type '" << tname << "' is not defined" << std::endl;
566 return -1;
567 }
568 int type = type_id[tname];
569
570 string name = string_node(i->children[1]);
571 if (item_id.count(name)) {
572 err << "bucket or device '" << name << "' is already defined" << std::endl;
573 return -1;
574 }
575
576 int id = 0; // none, yet!
577 int alg = -1;
578 int hash = 0;
579 set<int> used_items;
580 int size = 0;
581 map<int32_t, int32_t> class_id;
582
583 for (unsigned p=3; p<i->children.size()-1; p++) {
584 iter_t sub = i->children.begin() + p;
585 string tag = string_node(sub->children[0]);
586 //err << "tag " << tag << std::endl;
587 if (tag == "id") {
588 int maybe_id = int_node(sub->children[1]);
589 if (verbose) err << "bucket " << name << " id " << maybe_id;
590 if (sub->children.size() > 2) {
591 string class_name = string_node(sub->children[3]);
b5b8bbf5
FG
592 // note that we do not verify class existence here,
593 // as this bucket might come from an empty shadow tree
594 // which currently has no OSDs but is still referenced by a rule!
595 int cid = crush.get_or_create_class_id(class_name);
7c673cae
FG
596 if (class_id.count(cid) != 0) {
597 err << "duplicate device class " << class_name << " for bucket " << name << std::endl;
598 return -ERANGE;
599 }
600 class_id[cid] = maybe_id;
601 if (verbose) err << " class" << " '" << class_name << "'" << std::endl;
602 } else {
603 id = maybe_id;
604 if (verbose) err << std::endl;
605 }
606 } else if (tag == "alg") {
607 string a = string_node(sub->children[1]);
608 if (a == "uniform")
609 alg = CRUSH_BUCKET_UNIFORM;
610 else if (a == "list")
611 alg = CRUSH_BUCKET_LIST;
612 else if (a == "tree")
613 alg = CRUSH_BUCKET_TREE;
614 else if (a == "straw")
615 alg = CRUSH_BUCKET_STRAW;
616 else if (a == "straw2")
617 alg = CRUSH_BUCKET_STRAW2;
618 else {
619 err << "unknown bucket alg '" << a << "'" << std::endl << std::endl;
620 return -EINVAL;
621 }
622 }
623 else if (tag == "hash") {
624 string a = string_node(sub->children[1]);
625 if (a == "rjenkins1")
626 hash = CRUSH_HASH_RJENKINS1;
627 else
628 hash = atoi(a.c_str());
629 }
630 else if (tag == "item") {
631 // first, just determine which item pos's are already used
632 size++;
633 for (unsigned q = 2; q < sub->children.size(); q++) {
634 string tag = string_node(sub->children[q++]);
635 if (tag == "pos") {
636 int pos = int_node(sub->children[q]);
637 if (used_items.count(pos)) {
638 err << "item '" << string_node(sub->children[1]) << "' in bucket '" << name << "' has explicit pos " << pos << ", which is occupied" << std::endl;
639 return -1;
640 }
641 used_items.insert(pos);
642 }
643 }
644 }
645 else ceph_abort();
646 }
647
648 // now do the items.
649 if (!used_items.empty())
650 size = MAX(size, *used_items.rbegin());
651 vector<int> items(size);
652 vector<int> weights(size);
653
654 int curpos = 0;
655 unsigned bucketweight = 0;
656 bool have_uniform_weight = false;
657 unsigned uniform_weight = 0;
658 for (unsigned p=3; p<i->children.size()-1; p++) {
659 iter_t sub = i->children.begin() + p;
660 string tag = string_node(sub->children[0]);
661 if (tag == "item") {
662
663 string iname = string_node(sub->children[1]);
664 if (!item_id.count(iname)) {
665 err << "item '" << iname << "' in bucket '" << name << "' is not defined" << std::endl;
666 return -1;
667 }
668 int itemid = item_id[iname];
669
670 unsigned weight = 0x10000;
671 if (item_weight.count(itemid))
672 weight = item_weight[itemid];
673
674 int pos = -1;
675 for (unsigned q = 2; q < sub->children.size(); q++) {
676 string tag = string_node(sub->children[q++]);
677 if (tag == "weight") {
678 weight = float_node(sub->children[q]) * (float)0x10000;
679 if (weight > CRUSH_MAX_DEVICE_WEIGHT && itemid >= 0) {
680 err << "device weight limited to " << CRUSH_MAX_DEVICE_WEIGHT / 0x10000 << std::endl;
681 return -ERANGE;
682 }
683 else if (weight > CRUSH_MAX_BUCKET_WEIGHT && itemid < 0) {
684 err << "bucket weight limited to " << CRUSH_MAX_BUCKET_WEIGHT / 0x10000
685 << " to prevent overflow" << std::endl;
686 return -ERANGE;
687 }
688 }
689 else if (tag == "pos")
690 pos = int_node(sub->children[q]);
691 else
692 ceph_abort();
693
694 }
695 if (alg == CRUSH_BUCKET_UNIFORM) {
696 if (!have_uniform_weight) {
697 have_uniform_weight = true;
698 uniform_weight = weight;
699 } else {
700 if (uniform_weight != weight) {
701 err << "item '" << iname << "' in uniform bucket '" << name << "' has weight " << weight
702 << " but previous item(s) have weight " << (float)uniform_weight/(float)0x10000
703 << "; uniform bucket items must all have identical weights." << std::endl;
704 return -1;
705 }
706 }
707 }
708
709 if (pos >= size) {
710 err << "item '" << iname << "' in bucket '" << name << "' has pos " << pos << " >= size " << size << std::endl;
711 return -1;
712 }
713 if (pos < 0) {
714 while (used_items.count(curpos)) curpos++;
715 pos = curpos++;
716 }
717 //err << " item " << iname << " (" << itemid << ") pos " << pos << " weight " << weight << std::endl;
718 items[pos] = itemid;
719 weights[pos] = weight;
720
721 if (crush_addition_is_unsafe(bucketweight, weight)) {
722 err << "oh no! our bucket weights are overflowing all over the place, better lower the item weights" << std::endl;
723 return -ERANGE;
724 }
725
726 bucketweight += weight;
727 }
728 }
729
730 if (id == 0) {
731 for (id=-1; id_item.count(id); id--) ;
732 //err << "assigned id " << id << std::endl;
733 }
734
735 for (auto &i : class_id)
d2e6a577 736 class_bucket[id][i.first] = i.second;
7c673cae
FG
737
738 if (verbose) err << "bucket " << name << " (" << id << ") " << size << " items and weight "
739 << (float)bucketweight / (float)0x10000 << std::endl;
740 id_item[id] = name;
741 item_id[name] = id;
742 item_weight[id] = bucketweight;
743
744 assert(id != 0);
b5b8bbf5
FG
745 int idout;
746 int r = crush.add_bucket(id, alg, hash, type, size,
747 &items[0], &weights[0], &idout);
7c673cae
FG
748 if (r < 0) {
749 if (r == -EEXIST)
750 err << "Duplicate bucket id " << id << std::endl;
751 else
752 err << "add_bucket failed " << cpp_strerror(r) << std::endl;
753 return r;
754 }
755 r = crush.set_item_name(id, name.c_str());
756 return r;
757}
758
759int CrushCompiler::parse_rule(iter_t const& i)
760{
7c673cae
FG
761 int start; // rule name is optional!
762
763 string rname = string_node(i->children[1]);
764 if (rname != "{") {
765 if (rule_id.count(rname)) {
766 err << "rule name '" << rname << "' already defined\n" << std::endl;
767 return -1;
768 }
769 start = 4;
770 } else {
771 rname = string();
772 start = 3;
773 }
774
c07f9fc5 775 int ruleno = int_node(i->children[start]);
7c673cae
FG
776
777 string tname = string_node(i->children[start+2]);
778 int type;
779 if (tname == "replicated")
780 type = CEPH_PG_TYPE_REPLICATED;
781 else if (tname == "erasure")
782 type = CEPH_PG_TYPE_ERASURE;
783 else
784 ceph_abort();
785
786 int minsize = int_node(i->children[start+4]);
787 int maxsize = int_node(i->children[start+6]);
788
789 int steps = i->children.size() - start - 8;
790 //err << "num steps " << steps << std::endl;
c07f9fc5
FG
791
792 if (crush.rule_exists(ruleno)) {
793 err << "rule " << ruleno << " already exists" << std::endl;
794 return -1;
795 }
796 int r = crush.add_rule(ruleno, steps, type, minsize, maxsize);
797 if (r != ruleno) {
798 err << "unable to add rule id " << ruleno << " for rule '" << rname
799 << "'" << std::endl;
800 return -1;
801 }
7c673cae
FG
802 if (rname.length()) {
803 crush.set_rule_name(ruleno, rname.c_str());
804 rule_id[rname] = ruleno;
805 }
806
807 int step = 0;
808 for (iter_t p = i->children.begin() + start + 7; step < steps; p++) {
809 iter_t s = p->children.begin() + 1;
810 int stepid = s->value.id().to_long();
811 switch (stepid) {
812 case crush_grammar::_step_take:
813 {
814 string item = string_node(s->children[1]);
815 if (!item_id.count(item)) {
816 err << "in rule '" << rname << "' item '" << item << "' not defined" << std::endl;
817 return -1;
818 }
819 int id = item_id[item];
820 int c = -1;
821 string class_name;
822 if (s->children.size() > 2) {
823 class_name = string_node(s->children[3]);
824 c = crush.get_class_id(class_name);
825 if (c < 0)
826 return c;
827 if (crush.class_bucket.count(id) == 0) {
828 err << "in rule '" << rname << "' step take " << item
829 << " has no class information" << std::endl;
830 return -EINVAL;
831 }
832 if (crush.class_bucket[id].count(c) == 0) {
833 err << "in rule '" << rname << "' step take " << item
834 << " no matching bucket for class " << class_name << std::endl;
835 return -EINVAL;
836 }
837 id = crush.class_bucket[id][c];
838 }
839 if (verbose) {
840 err << "rule " << rname << " take " << item;
841 if (c < 0)
842 err << std::endl;
843 else
844 err << " remapped to " << crush.get_item_name(id) << std::endl;
845 }
846
847 crush.set_rule_step_take(ruleno, step++, id);
848 }
849 break;
850
851 case crush_grammar::_step_set_choose_tries:
852 {
853 int val = int_node(s->children[1]);
854 crush.set_rule_step_set_choose_tries(ruleno, step++, val);
855 }
856 break;
857
858 case crush_grammar::_step_set_choose_local_tries:
859 {
860 int val = int_node(s->children[1]);
861 crush.set_rule_step_set_choose_local_tries(ruleno, step++, val);
862 }
863 break;
864
865 case crush_grammar::_step_set_choose_local_fallback_tries:
866 {
867 int val = int_node(s->children[1]);
868 crush.set_rule_step_set_choose_local_fallback_tries(ruleno, step++, val);
869 }
870 break;
871
872 case crush_grammar::_step_set_chooseleaf_tries:
873 {
874 int val = int_node(s->children[1]);
875 crush.set_rule_step_set_chooseleaf_tries(ruleno, step++, val);
876 }
877 break;
878
879 case crush_grammar::_step_set_chooseleaf_vary_r:
880 {
881 int val = int_node(s->children[1]);
882 crush.set_rule_step_set_chooseleaf_vary_r(ruleno, step++, val);
883 }
884 break;
885
886 case crush_grammar::_step_set_chooseleaf_stable:
887 {
888 int val = int_node(s->children[1]);
889 crush.set_rule_step_set_chooseleaf_stable(ruleno, step++, val);
890 }
891 break;
892
893 case crush_grammar::_step_choose:
894 case crush_grammar::_step_chooseleaf:
895 {
896 string type = string_node(s->children[4]);
897 if (!type_id.count(type)) {
898 err << "in rule '" << rname << "' type '" << type << "' not defined" << std::endl;
899 return -1;
900 }
901 string choose = string_node(s->children[0]);
902 string mode = string_node(s->children[1]);
903 if (choose == "choose") {
904 if (mode == "firstn")
905 crush.set_rule_step_choose_firstn(ruleno, step++, int_node(s->children[2]), type_id[type]);
906 else if (mode == "indep")
907 crush.set_rule_step_choose_indep(ruleno, step++, int_node(s->children[2]), type_id[type]);
908 else ceph_abort();
909 } else if (choose == "chooseleaf") {
910 if (mode == "firstn")
911 crush.set_rule_step_choose_leaf_firstn(ruleno, step++, int_node(s->children[2]), type_id[type]);
912 else if (mode == "indep")
913 crush.set_rule_step_choose_leaf_indep(ruleno, step++, int_node(s->children[2]), type_id[type]);
914 else ceph_abort();
915 } else ceph_abort();
916 }
917 break;
918
919 case crush_grammar::_step_emit:
920 crush.set_rule_step_emit(ruleno, step++);
921 break;
922
923 default:
924 err << "bad crush step " << stepid << std::endl;
925 return -1;
926 }
927 }
928 assert(step == steps);
929 return 0;
930}
931
932int CrushCompiler::parse_weight_set_weights(iter_t const& i, int bucket_id, crush_weight_set *weight_set)
933{
934 // -2 for the enclosing [ ]
935 __u32 size = i->children.size() - 2;
936 __u32 bucket_size = crush.get_bucket_size(bucket_id);
937 if (size != bucket_size) {
938 err << bucket_id << " needs exactly " << bucket_size
939 << " weights but got " << size << std::endl;
940 return -1;
941 }
942 weight_set->size = size;
943 weight_set->weights = (__u32 *)calloc(weight_set->size, sizeof(__u32));
944 __u32 pos = 0;
945 for (iter_t p = i->children.begin() + 1; p != i->children.end(); p++, pos++)
946 if (pos < size)
947 weight_set->weights[pos] = float_node(*p) * (float)0x10000;
948 return 0;
949}
950
951int CrushCompiler::parse_weight_set(iter_t const& i, int bucket_id, crush_choose_arg *arg)
952{
953 // -3 stands for the leading "weight_set" keyword and the enclosing [ ]
28e407b8
AA
954 arg->weight_set_positions = i->children.size() - 3;
955 arg->weight_set = (crush_weight_set *)calloc(arg->weight_set_positions, sizeof(crush_weight_set));
7c673cae
FG
956 __u32 pos = 0;
957 for (iter_t p = i->children.begin(); p != i->children.end(); p++) {
958 int r = 0;
959 switch((int)p->value.id().to_long()) {
960 case crush_grammar::_weight_set_weights:
28e407b8 961 if (pos < arg->weight_set_positions) {
7c673cae
FG
962 r = parse_weight_set_weights(p, bucket_id, &arg->weight_set[pos]);
963 pos++;
964 } else {
965 err << "invalid weight_set syntax" << std::endl;
966 r = -1;
967 }
968 }
969 if (r < 0)
970 return r;
971 }
972 return 0;
973}
974
975int CrushCompiler::parse_choose_arg_ids(iter_t const& i, int bucket_id, crush_choose_arg *arg)
976{
977 // -3 for the leading "ids" keyword and the enclosing [ ]
978 __u32 size = i->children.size() - 3;
979 __u32 bucket_size = crush.get_bucket_size(bucket_id);
980 if (size != bucket_size) {
981 err << bucket_id << " needs exactly " << bucket_size
982 << " ids but got " << size << std::endl;
983 return -1;
984 }
985 arg->ids_size = size;
224ce89b 986 arg->ids = (__s32 *)calloc(arg->ids_size, sizeof(__s32));
7c673cae
FG
987 __u32 pos = 0;
988 for (iter_t p = i->children.begin() + 2; pos < size; p++, pos++)
989 arg->ids[pos] = int_node(*p);
990 return 0;
991}
992
993int CrushCompiler::parse_choose_arg(iter_t const& i, crush_choose_arg *args)
994{
995 int bucket_id = int_node(i->children[2]);
996 if (-1-bucket_id < 0 || -1-bucket_id >= crush.get_max_buckets()) {
997 err << bucket_id << " is out of range" << std::endl;
998 return -1;
999 }
1000 if (!crush.bucket_exists(bucket_id)) {
1001 err << bucket_id << " does not exist" << std::endl;
1002 return -1;
1003 }
1004 crush_choose_arg *arg = &args[-1-bucket_id];
1005 for (iter_t p = i->children.begin(); p != i->children.end(); p++) {
1006 int r = 0;
1007 switch((int)p->value.id().to_long()) {
1008 case crush_grammar::_weight_set:
1009 r = parse_weight_set(p, bucket_id, arg);
1010 break;
1011 case crush_grammar::_choose_arg_ids:
1012 r = parse_choose_arg_ids(p, bucket_id, arg);
1013 break;
1014 }
1015 if (r < 0)
1016 return r;
1017 }
1018 return 0;
1019}
1020
1021int CrushCompiler::parse_choose_args(iter_t const& i)
1022{
1023 int choose_arg_index = int_node(i->children[1]);
1024 if (crush.choose_args.find(choose_arg_index) != crush.choose_args.end()) {
1025 err << choose_arg_index << " duplicated" << std::endl;
1026 return -1;
1027 }
1028 crush_choose_arg_map arg_map;
1029 arg_map.size = crush.get_max_buckets();
1030 arg_map.args = (crush_choose_arg *)calloc(arg_map.size, sizeof(crush_choose_arg));
1031 for (iter_t p = i->children.begin() + 2; p != i->children.end(); p++) {
1032 int r = 0;
1033 switch((int)p->value.id().to_long()) {
1034 case crush_grammar::_choose_arg:
1035 r = parse_choose_arg(p, arg_map.args);
1036 break;
1037 }
1038 if (r < 0) {
1039 crush.destroy_choose_args(arg_map);
1040 return r;
1041 }
1042 }
1043 crush.choose_args[choose_arg_index] = arg_map;
1044 return 0;
1045}
1046
1047void CrushCompiler::find_used_bucket_ids(iter_t const& i)
1048{
1049 for (iter_t p = i->children.begin(); p != i->children.end(); p++) {
1050 if ((int)p->value.id().to_long() == crush_grammar::_bucket) {
1051 iter_t firstline = p->children.begin() + 3;
1052 string tag = string_node(firstline->children[0]);
1053 if (tag == "id") {
1054 int id = int_node(firstline->children[1]);
1055 //err << "saw bucket id " << id << std::endl;
1056 id_item[id] = string();
1057 }
1058 }
1059 }
1060}
1061
1062int CrushCompiler::parse_crush(iter_t const& i)
1063{
1064 find_used_bucket_ids(i);
c07f9fc5 1065 bool saw_rule = false;
7c673cae
FG
1066 for (iter_t p = i->children.begin(); p != i->children.end(); p++) {
1067 int r = 0;
1068 switch (p->value.id().to_long()) {
1069 case crush_grammar::_tunable:
1070 r = parse_tunable(p);
1071 break;
1072 case crush_grammar::_device:
1073 r = parse_device(p);
1074 break;
1075 case crush_grammar::_bucket_type:
1076 r = parse_bucket_type(p);
1077 break;
c07f9fc5
FG
1078 case crush_grammar::_bucket:
1079 if (saw_rule) {
1080 err << "buckets must be defined before rules" << std::endl;
1081 return -1;
1082 }
7c673cae
FG
1083 r = parse_bucket(p);
1084 break;
c07f9fc5
FG
1085 case crush_grammar::_crushrule:
1086 if (!saw_rule) {
1087 saw_rule = true;
d2e6a577 1088 crush.populate_classes(class_bucket);
c07f9fc5 1089 }
7c673cae
FG
1090 r = parse_rule(p);
1091 break;
1092 case crush_grammar::_choose_args:
1093 r = parse_choose_args(p);
1094 break;
1095 default:
1096 ceph_abort();
1097 }
1098 if (r < 0) {
1099 return r;
1100 }
1101 }
1102
1103 //err << "max_devices " << crush.get_max_devices() << std::endl;
7c673cae
FG
1104 crush.finalize();
1105
1106 return 0;
1107}
1108
1109// squash runs of whitespace to one space, excepting newlines
1110string CrushCompiler::consolidate_whitespace(string in)
1111{
1112 string out;
1113
1114 bool white = false;
1115 for (unsigned p=0; p<in.length(); p++) {
1116 if (isspace(in[p]) && in[p] != '\n') {
1117 if (white)
1118 continue;
1119 white = true;
1120 } else {
1121 if (white) {
1122 if (out.length()) out += " ";
1123 white = false;
1124 }
1125 out += in[p];
1126 }
1127 }
1128 if (verbose > 3)
1129 err << " \"" << in << "\" -> \"" << out << "\"" << std::endl;
1130 return out;
1131}
1132
1133void CrushCompiler::dump(iter_t const& i, int ind)
1134{
1135 err << "dump";
1136 for (int j=0; j<ind; j++)
1137 cout << "\t";
1138 long id = i->value.id().to_long();
1139 err << id << "\t";
1140 err << "'" << string(i->value.begin(), i->value.end())
1141 << "' " << i->children.size() << " children" << std::endl;
1142 for (unsigned int j = 0; j < i->children.size(); j++)
1143 dump(i->children.begin() + j, ind+1);
1144}
1145
1146/**
1147* This function fix the problem like below
1148* rack using_foo { item foo }
1149* host foo { ... }
1150*
1151* if an item being used by a bucket is defined after that bucket.
1152* CRUSH compiler will create a map by which we can
1153* not identify that item when selecting in that bucket.
1154**/
1155int CrushCompiler::adjust_bucket_item_place(iter_t const &i)
1156{
1157 map<string,set<string> > bucket_items;
1158 map<string,iter_t> bucket_itrer;
1159 vector<string> buckets;
1160 for (iter_t p = i->children.begin(); p != i->children.end(); ++p) {
1161 if ((int)p->value.id().to_long() == crush_grammar::_bucket) {
1162 string name = string_node(p->children[1]);
1163 buckets.push_back(name);
1164 bucket_itrer[name] = p;
1165 //skip non-bucket-item children in the bucket's parse tree
1166 for (unsigned q=3; q < p->children.size()-1; ++q) {
1167 iter_t sub = p->children.begin() + q;
1168 if ((int)sub->value.id().to_long()
1169 == crush_grammar::_bucket_item) {
1170 string iname = string_node(sub->children[1]);
1171 bucket_items[name].insert(iname);
1172 }
1173 }
1174 }
1175 }
1176
1177 //adjust the bucket
1178 for (unsigned i=0; i < buckets.size(); ++i) {
1179 for (unsigned j=i+1; j < buckets.size(); ++j) {
1180 if (bucket_items[buckets[i]].count(buckets[j])) {
1181 if (bucket_items[buckets[j]].count(buckets[i])) {
1182 err << "bucket '" << buckets[i] << "' and bucket '"
1183 << buckets[j] << "' are included each other" << std::endl;
1184 return -1;
1185 } else {
1186 std::iter_swap(bucket_itrer[buckets[i]], bucket_itrer[buckets[j]]);
1187 }
1188 }
1189 }
1190 }
1191
1192 return 0;
1193}
1194
1195int CrushCompiler::compile(istream& in, const char *infn)
1196{
1197 if (!infn)
1198 infn = "<input>";
1199
1200 // always start with legacy tunables, so that the compiled result of
1201 // a given crush file is fixed for all time.
1202 crush.set_tunables_legacy();
1203
1204 string big;
1205 string str;
1206 int line = 1;
1207 map<int,int> line_pos; // pos -> line
1208 map<int,string> line_val;
1209 while (getline(in, str)) {
1210 // remove newline
1211 int l = str.length();
1212 if (l && str[l - 1] == '\n')
1213 str.erase(l-1, 1);
1214
1215 line_val[line] = str;
1216
1217 // strip comment
1218 int n = str.find("#");
1219 if (n >= 0)
1220 str.erase(n, str.length()-n);
1221
1222 if (verbose>1) err << line << ": " << str << std::endl;
1223
1224 // work around spirit crankiness by removing extraneous
1225 // whitespace. there is probably a more elegant solution, but
1226 // this only broke with the latest spirit (with the switchover to
1227 // "classic"), i don't want to spend too much time figuring it
1228 // out.
1229 string stripped = consolidate_whitespace(str);
1230 if (stripped.length() && big.length() && big[big.length()-1] != ' ') big += " ";
1231
1232 line_pos[big.length()] = line;
1233 line++;
1234 big += stripped;
1235 }
1236
1237 if (verbose > 2) err << "whole file is: \"" << big << "\"" << std::endl;
1238
1239 crush_grammar crushg;
1240 const char *start = big.c_str();
1241 //tree_parse_info<const char *> info = ast_parse(start, crushg, space_p);
1242 tree_parse_info<> info = ast_parse(start, crushg, space_p);
1243
1244 // parse error?
1245 if (!info.full) {
1246 int cpos = info.stop - start;
1247 //out << "cpos " << cpos << std::endl;
1248 //out << " linemap " << line_pos << std::endl;
1249 assert(!line_pos.empty());
1250 map<int,int>::iterator p = line_pos.upper_bound(cpos);
1251 if (p != line_pos.begin())
1252 --p;
1253 int line = p->second;
1254 int pos = cpos - p->first;
1255 err << infn << ":" << line //<< ":" << (pos+1)
1256 << " error: parse error at '" << line_val[line].substr(pos) << "'" << std::endl;
1257 return -1;
1258 }
1259
1260 int r = adjust_bucket_item_place(info.trees.begin());
1261 if (r < 0) {
1262 return r;
1263 }
1264 //out << "parsing succeeded\n";
1265 //dump(info.trees.begin());
1266 return parse_crush(info.trees.begin());
1267}